From cfba1808fdbd300b442cd7c811d4c8738948a529 Mon Sep 17 00:00:00 2001 From: Ponsuganth Ilangovan Ponkumar Ilango <pponkumar@geophysik.uni-muenchen.de> Date: Mon, 15 Jul 2024 19:46:56 +0200 Subject: [PATCH] Update generate.py and regenerate operators --- generate/generate.py | 9 +- generate/requirements.txt | 2 +- operators.toml | 6 +- operators/curl_curl/CMakeLists.txt | 16 +- .../curl_curl/N1E1ElementwiseCurlCurl.cpp | 9 +- .../curl_curl/N1E1ElementwiseCurlCurl.hpp | 122 +- ...pply_N1E1ElementwiseCurlCurl_macro_3D.cpp} | 6 +- ...lues_N1E1ElementwiseCurlCurl_macro_3D.cpp} | 6 +- ...pply_N1E1ElementwiseCurlCurl_macro_3D.cpp} | 6 +- ...lues_N1E1ElementwiseCurlCurl_macro_3D.cpp} | 6 +- ...trix_N1E1ElementwiseCurlCurl_macro_3D.cpp} | 6 +- operators/diffusion/CMakeLists.txt | 96 +- .../diffusion/P1ElementwiseDiffusion.cpp | 18 +- .../diffusion/P1ElementwiseDiffusion.hpp | 199 +- .../diffusion/P2ElementwiseDiffusion.cpp | 18 +- .../diffusion/P2ElementwiseDiffusion.hpp | 219 +- .../P2ElementwiseDiffusionAnnulusMap.cpp | 9 +- .../P2ElementwiseDiffusionAnnulusMap.hpp | 140 +- ...lementwiseDiffusionIcosahedralShellMap.cpp | 9 +- ...lementwiseDiffusionIcosahedralShellMap.hpp | 213 +- ...apply_P1ElementwiseDiffusion_macro_2D.cpp} | 6 +- ...apply_P1ElementwiseDiffusion_macro_3D.cpp} | 6 +- ...alues_P1ElementwiseDiffusion_macro_2D.cpp} | 6 +- ...alues_P1ElementwiseDiffusion_macro_3D.cpp} | 6 +- ...ementwiseDiffusionAnnulusMap_macro_2D.cpp} | 6 +- ...ementwiseDiffusionAnnulusMap_macro_2D.cpp} | 6 +- ...DiffusionIcosahedralShellMap_macro_3D.cpp} | 6 +- ...DiffusionIcosahedralShellMap_macro_3D.cpp} | 6 +- ...apply_P2ElementwiseDiffusion_macro_2D.cpp} | 6 +- ...apply_P2ElementwiseDiffusion_macro_3D.cpp} | 6 +- ...alues_P2ElementwiseDiffusion_macro_2D.cpp} | 6 +- ...alues_P2ElementwiseDiffusion_macro_3D.cpp} | 6 +- ...apply_P1ElementwiseDiffusion_macro_2D.cpp} | 6 +- ...apply_P1ElementwiseDiffusion_macro_3D.cpp} | 6 +- ...alues_P1ElementwiseDiffusion_macro_2D.cpp} | 6 +- ...alues_P1ElementwiseDiffusion_macro_3D.cpp} | 6 +- ...atrix_P1ElementwiseDiffusion_macro_2D.cpp} | 6 +- ...atrix_P1ElementwiseDiffusion_macro_3D.cpp} | 6 +- ...ementwiseDiffusionAnnulusMap_macro_2D.cpp} | 6 +- ...ementwiseDiffusionAnnulusMap_macro_2D.cpp} | 6 +- ...ementwiseDiffusionAnnulusMap_macro_2D.cpp} | 6 +- ...DiffusionIcosahedralShellMap_macro_3D.cpp} | 6 +- ...DiffusionIcosahedralShellMap_macro_3D.cpp} | 6 +- ...DiffusionIcosahedralShellMap_macro_3D.cpp} | 6 +- ...apply_P2ElementwiseDiffusion_macro_2D.cpp} | 6 +- ...apply_P2ElementwiseDiffusion_macro_3D.cpp} | 6 +- ...alues_P2ElementwiseDiffusion_macro_2D.cpp} | 6 +- ...alues_P2ElementwiseDiffusion_macro_3D.cpp} | 6 +- ...atrix_P2ElementwiseDiffusion_macro_2D.cpp} | 6 +- ...atrix_P2ElementwiseDiffusion_macro_3D.cpp} | 6 +- operators/div_k_grad/CMakeLists.txt | 96 +- .../div_k_grad/P1ElementwiseDivKGrad.cpp | 18 +- .../div_k_grad/P1ElementwiseDivKGrad.hpp | 211 +- .../div_k_grad/P2ElementwiseDivKGrad.cpp | 18 +- .../div_k_grad/P2ElementwiseDivKGrad.hpp | 243 +- .../P2ElementwiseDivKGradAnnulusMap.cpp | 9 +- .../P2ElementwiseDivKGradAnnulusMap.hpp | 152 +- ...ElementwiseDivKGradIcosahedralShellMap.cpp | 9 +- ...ElementwiseDivKGradIcosahedralShellMap.hpp | 225 +- ..._apply_P1ElementwiseDivKGrad_macro_2D.cpp} | 6 +- ..._apply_P1ElementwiseDivKGrad_macro_3D.cpp} | 6 +- ...Values_P1ElementwiseDivKGrad_macro_2D.cpp} | 6 +- ...Values_P1ElementwiseDivKGrad_macro_3D.cpp} | 6 +- ...lementwiseDivKGradAnnulusMap_macro_2D.cpp} | 6 +- ...lementwiseDivKGradAnnulusMap_macro_2D.cpp} | 6 +- ...eDivKGradIcosahedralShellMap_macro_3D.cpp} | 6 +- ...eDivKGradIcosahedralShellMap_macro_3D.cpp} | 6 +- ..._apply_P2ElementwiseDivKGrad_macro_2D.cpp} | 6 +- ..._apply_P2ElementwiseDivKGrad_macro_3D.cpp} | 6 +- ...Values_P2ElementwiseDivKGrad_macro_2D.cpp} | 6 +- ...Values_P2ElementwiseDivKGrad_macro_3D.cpp} | 6 +- ..._apply_P1ElementwiseDivKGrad_macro_2D.cpp} | 6 +- ..._apply_P1ElementwiseDivKGrad_macro_3D.cpp} | 6 +- ...Values_P1ElementwiseDivKGrad_macro_2D.cpp} | 6 +- ...Values_P1ElementwiseDivKGrad_macro_3D.cpp} | 6 +- ...Matrix_P1ElementwiseDivKGrad_macro_2D.cpp} | 6 +- ...Matrix_P1ElementwiseDivKGrad_macro_3D.cpp} | 6 +- ...lementwiseDivKGradAnnulusMap_macro_2D.cpp} | 6 +- ...lementwiseDivKGradAnnulusMap_macro_2D.cpp} | 6 +- ...lementwiseDivKGradAnnulusMap_macro_2D.cpp} | 6 +- ...eDivKGradIcosahedralShellMap_macro_3D.cpp} | 6 +- ...eDivKGradIcosahedralShellMap_macro_3D.cpp} | 6 +- ...eDivKGradIcosahedralShellMap_macro_3D.cpp} | 6 +- ..._apply_P2ElementwiseDivKGrad_macro_2D.cpp} | 6 +- ..._apply_P2ElementwiseDivKGrad_macro_3D.cpp} | 6 +- ...Values_P2ElementwiseDivKGrad_macro_2D.cpp} | 6 +- ...Values_P2ElementwiseDivKGrad_macro_3D.cpp} | 6 +- ...Matrix_P2ElementwiseDivKGrad_macro_2D.cpp} | 6 +- ...Matrix_P2ElementwiseDivKGrad_macro_3D.cpp} | 6 +- operators/divergence/CMakeLists.txt | 100 +- ...oP1ElementwiseDivergenceAnnulusMap_0_0.cpp | 6 +- ...oP1ElementwiseDivergenceAnnulusMap_0_0.hpp | 93 +- ...oP1ElementwiseDivergenceAnnulusMap_0_1.cpp | 6 +- ...oP1ElementwiseDivergenceAnnulusMap_0_1.hpp | 93 +- ...twiseDivergenceIcosahedralShellMap_0_0.cpp | 6 +- ...twiseDivergenceIcosahedralShellMap_0_0.hpp | 141 +- ...twiseDivergenceIcosahedralShellMap_0_1.cpp | 6 +- ...twiseDivergenceIcosahedralShellMap_0_1.hpp | 141 +- ...twiseDivergenceIcosahedralShellMap_0_2.cpp | 6 +- ...twiseDivergenceIcosahedralShellMap_0_2.hpp | 141 +- .../P2ToP1ElementwiseDivergence_0_0.cpp | 12 +- .../P2ToP1ElementwiseDivergence_0_0.hpp | 145 +- .../P2ToP1ElementwiseDivergence_0_1.cpp | 12 +- .../P2ToP1ElementwiseDivergence_0_1.hpp | 145 +- .../P2ToP1ElementwiseDivergence_0_2.cpp | 6 +- .../P2ToP1ElementwiseDivergence_0_2.hpp | 85 +- ...wiseDivergenceAnnulusMap_0_0_macro_2D.cpp} | 6 +- ...wiseDivergenceAnnulusMap_0_1_macro_2D.cpp} | 6 +- ...genceIcosahedralShellMap_0_0_macro_3D.cpp} | 6 +- ...genceIcosahedralShellMap_0_1_macro_3D.cpp} | 6 +- ...genceIcosahedralShellMap_0_2_macro_3D.cpp} | 6 +- ...oP1ElementwiseDivergence_0_0_macro_2D.cpp} | 6 +- ...oP1ElementwiseDivergence_0_0_macro_3D.cpp} | 6 +- ...oP1ElementwiseDivergence_0_1_macro_2D.cpp} | 6 +- ...oP1ElementwiseDivergence_0_1_macro_3D.cpp} | 6 +- ...oP1ElementwiseDivergence_0_2_macro_3D.cpp} | 6 +- ...wiseDivergenceAnnulusMap_0_0_macro_2D.cpp} | 6 +- ...wiseDivergenceAnnulusMap_0_0_macro_2D.cpp} | 6 +- ...wiseDivergenceAnnulusMap_0_1_macro_2D.cpp} | 6 +- ...wiseDivergenceAnnulusMap_0_1_macro_2D.cpp} | 6 +- ...genceIcosahedralShellMap_0_0_macro_3D.cpp} | 6 +- ...genceIcosahedralShellMap_0_0_macro_3D.cpp} | 6 +- ...genceIcosahedralShellMap_0_1_macro_3D.cpp} | 6 +- ...genceIcosahedralShellMap_0_1_macro_3D.cpp} | 6 +- ...genceIcosahedralShellMap_0_2_macro_3D.cpp} | 6 +- ...genceIcosahedralShellMap_0_2_macro_3D.cpp} | 6 +- ...oP1ElementwiseDivergence_0_0_macro_2D.cpp} | 6 +- ...oP1ElementwiseDivergence_0_0_macro_3D.cpp} | 6 +- ...oP1ElementwiseDivergence_0_0_macro_2D.cpp} | 6 +- ...oP1ElementwiseDivergence_0_0_macro_3D.cpp} | 6 +- ...oP1ElementwiseDivergence_0_1_macro_2D.cpp} | 6 +- ...oP1ElementwiseDivergence_0_1_macro_3D.cpp} | 6 +- ...oP1ElementwiseDivergence_0_1_macro_2D.cpp} | 6 +- ...oP1ElementwiseDivergence_0_1_macro_3D.cpp} | 6 +- ...oP1ElementwiseDivergence_0_2_macro_3D.cpp} | 6 +- ...oP1ElementwiseDivergence_0_2_macro_3D.cpp} | 6 +- operators/epsilon/CMakeLists.txt | 320 +- .../P2ElementwiseEpsilonAnnulusMap_0_0.cpp | 9 +- .../P2ElementwiseEpsilonAnnulusMap_0_0.hpp | 152 +- .../P2ElementwiseEpsilonAnnulusMap_0_1.cpp | 6 +- .../P2ElementwiseEpsilonAnnulusMap_0_1.hpp | 105 +- .../P2ElementwiseEpsilonAnnulusMap_1_0.cpp | 6 +- .../P2ElementwiseEpsilonAnnulusMap_1_0.hpp | 105 +- .../P2ElementwiseEpsilonAnnulusMap_1_1.cpp | 9 +- .../P2ElementwiseEpsilonAnnulusMap_1_1.hpp | 152 +- ...mentwiseEpsilonIcosahedralShellMap_0_0.cpp | 9 +- ...mentwiseEpsilonIcosahedralShellMap_0_0.hpp | 225 +- ...mentwiseEpsilonIcosahedralShellMap_0_1.cpp | 6 +- ...mentwiseEpsilonIcosahedralShellMap_0_1.hpp | 153 +- ...mentwiseEpsilonIcosahedralShellMap_0_2.cpp | 6 +- ...mentwiseEpsilonIcosahedralShellMap_0_2.hpp | 153 +- ...mentwiseEpsilonIcosahedralShellMap_1_0.cpp | 6 +- ...mentwiseEpsilonIcosahedralShellMap_1_0.hpp | 153 +- ...mentwiseEpsilonIcosahedralShellMap_1_1.cpp | 9 +- ...mentwiseEpsilonIcosahedralShellMap_1_1.hpp | 225 +- ...mentwiseEpsilonIcosahedralShellMap_1_2.cpp | 6 +- ...mentwiseEpsilonIcosahedralShellMap_1_2.hpp | 153 +- ...mentwiseEpsilonIcosahedralShellMap_2_0.cpp | 6 +- ...mentwiseEpsilonIcosahedralShellMap_2_0.hpp | 153 +- ...mentwiseEpsilonIcosahedralShellMap_2_1.cpp | 6 +- ...mentwiseEpsilonIcosahedralShellMap_2_1.hpp | 153 +- ...mentwiseEpsilonIcosahedralShellMap_2_2.cpp | 9 +- ...mentwiseEpsilonIcosahedralShellMap_2_2.hpp | 225 +- .../epsilon/P2ElementwiseEpsilon_0_0.cpp | 18 +- .../epsilon/P2ElementwiseEpsilon_0_0.hpp | 243 +- .../epsilon/P2ElementwiseEpsilon_0_1.cpp | 12 +- .../epsilon/P2ElementwiseEpsilon_0_1.hpp | 169 +- .../epsilon/P2ElementwiseEpsilon_0_2.cpp | 6 +- .../epsilon/P2ElementwiseEpsilon_0_2.hpp | 97 +- .../epsilon/P2ElementwiseEpsilon_1_0.cpp | 12 +- .../epsilon/P2ElementwiseEpsilon_1_0.hpp | 169 +- .../epsilon/P2ElementwiseEpsilon_1_1.cpp | 18 +- .../epsilon/P2ElementwiseEpsilon_1_1.hpp | 243 +- .../epsilon/P2ElementwiseEpsilon_1_2.cpp | 6 +- .../epsilon/P2ElementwiseEpsilon_1_2.hpp | 97 +- .../epsilon/P2ElementwiseEpsilon_2_0.cpp | 6 +- .../epsilon/P2ElementwiseEpsilon_2_0.hpp | 97 +- .../epsilon/P2ElementwiseEpsilon_2_1.cpp | 6 +- .../epsilon/P2ElementwiseEpsilon_2_1.hpp | 97 +- .../epsilon/P2ElementwiseEpsilon_2_2.cpp | 9 +- .../epsilon/P2ElementwiseEpsilon_2_2.hpp | 140 +- ...entwiseEpsilonAnnulusMap_0_0_macro_2D.cpp} | 6 +- ...entwiseEpsilonAnnulusMap_0_0_macro_2D.cpp} | 6 +- ...entwiseEpsilonAnnulusMap_0_1_macro_2D.cpp} | 6 +- ...entwiseEpsilonAnnulusMap_1_0_macro_2D.cpp} | 6 +- ...entwiseEpsilonAnnulusMap_1_1_macro_2D.cpp} | 6 +- ...entwiseEpsilonAnnulusMap_1_1_macro_2D.cpp} | 6 +- ...silonIcosahedralShellMap_0_0_macro_3D.cpp} | 6 +- ...silonIcosahedralShellMap_0_0_macro_3D.cpp} | 6 +- ...silonIcosahedralShellMap_0_1_macro_3D.cpp} | 6 +- ...silonIcosahedralShellMap_0_2_macro_3D.cpp} | 6 +- ...silonIcosahedralShellMap_1_0_macro_3D.cpp} | 6 +- ...silonIcosahedralShellMap_1_1_macro_3D.cpp} | 6 +- ...silonIcosahedralShellMap_1_1_macro_3D.cpp} | 6 +- ...silonIcosahedralShellMap_1_2_macro_3D.cpp} | 6 +- ...silonIcosahedralShellMap_2_0_macro_3D.cpp} | 6 +- ...silonIcosahedralShellMap_2_1_macro_3D.cpp} | 6 +- ...silonIcosahedralShellMap_2_2_macro_3D.cpp} | 6 +- ...silonIcosahedralShellMap_2_2_macro_3D.cpp} | 6 +- ...ply_P2ElementwiseEpsilon_0_0_macro_2D.cpp} | 6 +- ...ply_P2ElementwiseEpsilon_0_0_macro_3D.cpp} | 6 +- ...ues_P2ElementwiseEpsilon_0_0_macro_2D.cpp} | 6 +- ...ues_P2ElementwiseEpsilon_0_0_macro_3D.cpp} | 6 +- ...ply_P2ElementwiseEpsilon_0_1_macro_2D.cpp} | 6 +- ...ply_P2ElementwiseEpsilon_0_1_macro_3D.cpp} | 6 +- ...ply_P2ElementwiseEpsilon_0_2_macro_3D.cpp} | 6 +- ...ply_P2ElementwiseEpsilon_1_0_macro_2D.cpp} | 6 +- ...ply_P2ElementwiseEpsilon_1_0_macro_3D.cpp} | 6 +- ...ply_P2ElementwiseEpsilon_1_1_macro_2D.cpp} | 6 +- ...ply_P2ElementwiseEpsilon_1_1_macro_3D.cpp} | 6 +- ...ues_P2ElementwiseEpsilon_1_1_macro_2D.cpp} | 6 +- ...ues_P2ElementwiseEpsilon_1_1_macro_3D.cpp} | 6 +- ...ply_P2ElementwiseEpsilon_1_2_macro_3D.cpp} | 6 +- ...ply_P2ElementwiseEpsilon_2_0_macro_3D.cpp} | 6 +- ...ply_P2ElementwiseEpsilon_2_1_macro_3D.cpp} | 6 +- ...ply_P2ElementwiseEpsilon_2_2_macro_3D.cpp} | 6 +- ...ues_P2ElementwiseEpsilon_2_2_macro_3D.cpp} | 6 +- ...entwiseEpsilonAnnulusMap_0_0_macro_2D.cpp} | 6 +- ...entwiseEpsilonAnnulusMap_0_0_macro_2D.cpp} | 6 +- ...entwiseEpsilonAnnulusMap_0_0_macro_2D.cpp} | 6 +- ...entwiseEpsilonAnnulusMap_0_1_macro_2D.cpp} | 6 +- ...entwiseEpsilonAnnulusMap_0_1_macro_2D.cpp} | 6 +- ...entwiseEpsilonAnnulusMap_1_0_macro_2D.cpp} | 6 +- ...entwiseEpsilonAnnulusMap_1_0_macro_2D.cpp} | 6 +- ...entwiseEpsilonAnnulusMap_1_1_macro_2D.cpp} | 6 +- ...entwiseEpsilonAnnulusMap_1_1_macro_2D.cpp} | 6 +- ...entwiseEpsilonAnnulusMap_1_1_macro_2D.cpp} | 6 +- ...silonIcosahedralShellMap_0_0_macro_3D.cpp} | 6 +- ...silonIcosahedralShellMap_0_0_macro_3D.cpp} | 6 +- ...silonIcosahedralShellMap_0_0_macro_3D.cpp} | 6 +- ...silonIcosahedralShellMap_0_1_macro_3D.cpp} | 6 +- ...silonIcosahedralShellMap_0_1_macro_3D.cpp} | 6 +- ...silonIcosahedralShellMap_0_2_macro_3D.cpp} | 6 +- ...silonIcosahedralShellMap_0_2_macro_3D.cpp} | 6 +- ...silonIcosahedralShellMap_1_0_macro_3D.cpp} | 6 +- ...silonIcosahedralShellMap_1_0_macro_3D.cpp} | 6 +- ...silonIcosahedralShellMap_1_1_macro_3D.cpp} | 6 +- ...silonIcosahedralShellMap_1_1_macro_3D.cpp} | 6 +- ...silonIcosahedralShellMap_1_1_macro_3D.cpp} | 6 +- ...silonIcosahedralShellMap_1_2_macro_3D.cpp} | 6 +- ...silonIcosahedralShellMap_1_2_macro_3D.cpp} | 6 +- ...silonIcosahedralShellMap_2_0_macro_3D.cpp} | 6 +- ...silonIcosahedralShellMap_2_0_macro_3D.cpp} | 6 +- ...silonIcosahedralShellMap_2_1_macro_3D.cpp} | 6 +- ...silonIcosahedralShellMap_2_1_macro_3D.cpp} | 6 +- ...silonIcosahedralShellMap_2_2_macro_3D.cpp} | 6 +- ...silonIcosahedralShellMap_2_2_macro_3D.cpp} | 6 +- ...silonIcosahedralShellMap_2_2_macro_3D.cpp} | 6 +- ...ply_P2ElementwiseEpsilon_0_0_macro_2D.cpp} | 6 +- ...ply_P2ElementwiseEpsilon_0_0_macro_3D.cpp} | 6 +- ...ues_P2ElementwiseEpsilon_0_0_macro_2D.cpp} | 6 +- ...ues_P2ElementwiseEpsilon_0_0_macro_3D.cpp} | 6 +- ...rix_P2ElementwiseEpsilon_0_0_macro_2D.cpp} | 6 +- ...rix_P2ElementwiseEpsilon_0_0_macro_3D.cpp} | 6 +- ...ply_P2ElementwiseEpsilon_0_1_macro_2D.cpp} | 6 +- ...ply_P2ElementwiseEpsilon_0_1_macro_3D.cpp} | 6 +- ...rix_P2ElementwiseEpsilon_0_1_macro_2D.cpp} | 6 +- ...rix_P2ElementwiseEpsilon_0_1_macro_3D.cpp} | 6 +- ...ply_P2ElementwiseEpsilon_0_2_macro_3D.cpp} | 6 +- ...rix_P2ElementwiseEpsilon_0_2_macro_3D.cpp} | 6 +- ...ply_P2ElementwiseEpsilon_1_0_macro_2D.cpp} | 6 +- ...ply_P2ElementwiseEpsilon_1_0_macro_3D.cpp} | 6 +- ...rix_P2ElementwiseEpsilon_1_0_macro_2D.cpp} | 6 +- ...rix_P2ElementwiseEpsilon_1_0_macro_3D.cpp} | 6 +- ...ply_P2ElementwiseEpsilon_1_1_macro_2D.cpp} | 6 +- ...ply_P2ElementwiseEpsilon_1_1_macro_3D.cpp} | 6 +- ...ues_P2ElementwiseEpsilon_1_1_macro_2D.cpp} | 6 +- ...ues_P2ElementwiseEpsilon_1_1_macro_3D.cpp} | 6 +- ...rix_P2ElementwiseEpsilon_1_1_macro_2D.cpp} | 6 +- ...rix_P2ElementwiseEpsilon_1_1_macro_3D.cpp} | 6 +- ...ply_P2ElementwiseEpsilon_1_2_macro_3D.cpp} | 6 +- ...rix_P2ElementwiseEpsilon_1_2_macro_3D.cpp} | 6 +- ...ply_P2ElementwiseEpsilon_2_0_macro_3D.cpp} | 6 +- ...rix_P2ElementwiseEpsilon_2_0_macro_3D.cpp} | 6 +- ...ply_P2ElementwiseEpsilon_2_1_macro_3D.cpp} | 6 +- ...rix_P2ElementwiseEpsilon_2_1_macro_3D.cpp} | 6 +- ...ply_P2ElementwiseEpsilon_2_2_macro_3D.cpp} | 6 +- ...ues_P2ElementwiseEpsilon_2_2_macro_3D.cpp} | 6 +- ...rix_P2ElementwiseEpsilon_2_2_macro_3D.cpp} | 6 +- operators/full_stokes/CMakeLists.txt | 320 +- .../P2ElementwiseFullStokesAnnulusMap_0_0.cpp | 9 +- .../P2ElementwiseFullStokesAnnulusMap_0_0.hpp | 153 +- .../P2ElementwiseFullStokesAnnulusMap_0_1.cpp | 6 +- .../P2ElementwiseFullStokesAnnulusMap_0_1.hpp | 105 +- .../P2ElementwiseFullStokesAnnulusMap_1_0.cpp | 6 +- .../P2ElementwiseFullStokesAnnulusMap_1_0.hpp | 105 +- .../P2ElementwiseFullStokesAnnulusMap_1_1.cpp | 9 +- .../P2ElementwiseFullStokesAnnulusMap_1_1.hpp | 153 +- ...twiseFullStokesIcosahedralShellMap_0_0.cpp | 9 +- ...twiseFullStokesIcosahedralShellMap_0_0.hpp | 225 +- ...twiseFullStokesIcosahedralShellMap_0_1.cpp | 6 +- ...twiseFullStokesIcosahedralShellMap_0_1.hpp | 153 +- ...twiseFullStokesIcosahedralShellMap_0_2.cpp | 6 +- ...twiseFullStokesIcosahedralShellMap_0_2.hpp | 153 +- ...twiseFullStokesIcosahedralShellMap_1_0.cpp | 6 +- ...twiseFullStokesIcosahedralShellMap_1_0.hpp | 153 +- ...twiseFullStokesIcosahedralShellMap_1_1.cpp | 9 +- ...twiseFullStokesIcosahedralShellMap_1_1.hpp | 225 +- ...twiseFullStokesIcosahedralShellMap_1_2.cpp | 6 +- ...twiseFullStokesIcosahedralShellMap_1_2.hpp | 153 +- ...twiseFullStokesIcosahedralShellMap_2_0.cpp | 6 +- ...twiseFullStokesIcosahedralShellMap_2_0.hpp | 153 +- ...twiseFullStokesIcosahedralShellMap_2_1.cpp | 6 +- ...twiseFullStokesIcosahedralShellMap_2_1.hpp | 153 +- ...twiseFullStokesIcosahedralShellMap_2_2.cpp | 9 +- ...twiseFullStokesIcosahedralShellMap_2_2.hpp | 225 +- .../P2ElementwiseFullStokes_0_0.cpp | 18 +- .../P2ElementwiseFullStokes_0_0.hpp | 245 +- .../P2ElementwiseFullStokes_0_1.cpp | 12 +- .../P2ElementwiseFullStokes_0_1.hpp | 169 +- .../P2ElementwiseFullStokes_0_2.cpp | 6 +- .../P2ElementwiseFullStokes_0_2.hpp | 97 +- .../P2ElementwiseFullStokes_1_0.cpp | 12 +- .../P2ElementwiseFullStokes_1_0.hpp | 169 +- .../P2ElementwiseFullStokes_1_1.cpp | 18 +- .../P2ElementwiseFullStokes_1_1.hpp | 245 +- .../P2ElementwiseFullStokes_1_2.cpp | 6 +- .../P2ElementwiseFullStokes_1_2.hpp | 97 +- .../P2ElementwiseFullStokes_2_0.cpp | 6 +- .../P2ElementwiseFullStokes_2_0.hpp | 97 +- .../P2ElementwiseFullStokes_2_1.cpp | 6 +- .../P2ElementwiseFullStokes_2_1.hpp | 97 +- .../P2ElementwiseFullStokes_2_2.cpp | 9 +- .../P2ElementwiseFullStokes_2_2.hpp | 141 +- ...wiseFullStokesAnnulusMap_0_0_macro_2D.cpp} | 6 +- ...wiseFullStokesAnnulusMap_0_0_macro_2D.cpp} | 6 +- ...wiseFullStokesAnnulusMap_0_1_macro_2D.cpp} | 6 +- ...wiseFullStokesAnnulusMap_1_0_macro_2D.cpp} | 6 +- ...wiseFullStokesAnnulusMap_1_1_macro_2D.cpp} | 6 +- ...wiseFullStokesAnnulusMap_1_1_macro_2D.cpp} | 6 +- ...tokesIcosahedralShellMap_0_0_macro_3D.cpp} | 6 +- ...tokesIcosahedralShellMap_0_0_macro_3D.cpp} | 6 +- ...tokesIcosahedralShellMap_0_1_macro_3D.cpp} | 6 +- ...tokesIcosahedralShellMap_0_2_macro_3D.cpp} | 6 +- ...tokesIcosahedralShellMap_1_0_macro_3D.cpp} | 6 +- ...tokesIcosahedralShellMap_1_1_macro_3D.cpp} | 6 +- ...tokesIcosahedralShellMap_1_1_macro_3D.cpp} | 6 +- ...tokesIcosahedralShellMap_1_2_macro_3D.cpp} | 6 +- ...tokesIcosahedralShellMap_2_0_macro_3D.cpp} | 6 +- ...tokesIcosahedralShellMap_2_1_macro_3D.cpp} | 6 +- ...tokesIcosahedralShellMap_2_2_macro_3D.cpp} | 6 +- ...tokesIcosahedralShellMap_2_2_macro_3D.cpp} | 6 +- ..._P2ElementwiseFullStokes_0_0_macro_2D.cpp} | 6 +- ..._P2ElementwiseFullStokes_0_0_macro_3D.cpp} | 6 +- ..._P2ElementwiseFullStokes_0_0_macro_2D.cpp} | 6 +- ..._P2ElementwiseFullStokes_0_0_macro_3D.cpp} | 6 +- ..._P2ElementwiseFullStokes_0_1_macro_2D.cpp} | 6 +- ..._P2ElementwiseFullStokes_0_1_macro_3D.cpp} | 6 +- ..._P2ElementwiseFullStokes_0_2_macro_3D.cpp} | 6 +- ..._P2ElementwiseFullStokes_1_0_macro_2D.cpp} | 6 +- ..._P2ElementwiseFullStokes_1_0_macro_3D.cpp} | 6 +- ..._P2ElementwiseFullStokes_1_1_macro_2D.cpp} | 6 +- ..._P2ElementwiseFullStokes_1_1_macro_3D.cpp} | 6 +- ..._P2ElementwiseFullStokes_1_1_macro_2D.cpp} | 6 +- ..._P2ElementwiseFullStokes_1_1_macro_3D.cpp} | 6 +- ..._P2ElementwiseFullStokes_1_2_macro_3D.cpp} | 6 +- ..._P2ElementwiseFullStokes_2_0_macro_3D.cpp} | 6 +- ..._P2ElementwiseFullStokes_2_1_macro_3D.cpp} | 6 +- ..._P2ElementwiseFullStokes_2_2_macro_3D.cpp} | 6 +- ..._P2ElementwiseFullStokes_2_2_macro_3D.cpp} | 6 +- ...wiseFullStokesAnnulusMap_0_0_macro_2D.cpp} | 6 +- ...wiseFullStokesAnnulusMap_0_0_macro_2D.cpp} | 6 +- ...wiseFullStokesAnnulusMap_0_0_macro_2D.cpp} | 6 +- ...wiseFullStokesAnnulusMap_0_1_macro_2D.cpp} | 6 +- ...wiseFullStokesAnnulusMap_0_1_macro_2D.cpp} | 6 +- ...wiseFullStokesAnnulusMap_1_0_macro_2D.cpp} | 6 +- ...wiseFullStokesAnnulusMap_1_0_macro_2D.cpp} | 6 +- ...wiseFullStokesAnnulusMap_1_1_macro_2D.cpp} | 6 +- ...wiseFullStokesAnnulusMap_1_1_macro_2D.cpp} | 6 +- ...wiseFullStokesAnnulusMap_1_1_macro_2D.cpp} | 6 +- ...tokesIcosahedralShellMap_0_0_macro_3D.cpp} | 6 +- ...tokesIcosahedralShellMap_0_0_macro_3D.cpp} | 6 +- ...tokesIcosahedralShellMap_0_0_macro_3D.cpp} | 6 +- ...tokesIcosahedralShellMap_0_1_macro_3D.cpp} | 6 +- ...tokesIcosahedralShellMap_0_1_macro_3D.cpp} | 6 +- ...tokesIcosahedralShellMap_0_2_macro_3D.cpp} | 6 +- ...tokesIcosahedralShellMap_0_2_macro_3D.cpp} | 6 +- ...tokesIcosahedralShellMap_1_0_macro_3D.cpp} | 6 +- ...tokesIcosahedralShellMap_1_0_macro_3D.cpp} | 6 +- ...tokesIcosahedralShellMap_1_1_macro_3D.cpp} | 6 +- ...tokesIcosahedralShellMap_1_1_macro_3D.cpp} | 6 +- ...tokesIcosahedralShellMap_1_1_macro_3D.cpp} | 6 +- ...tokesIcosahedralShellMap_1_2_macro_3D.cpp} | 6 +- ...tokesIcosahedralShellMap_1_2_macro_3D.cpp} | 6 +- ...tokesIcosahedralShellMap_2_0_macro_3D.cpp} | 6 +- ...tokesIcosahedralShellMap_2_0_macro_3D.cpp} | 6 +- ...tokesIcosahedralShellMap_2_1_macro_3D.cpp} | 6 +- ...tokesIcosahedralShellMap_2_1_macro_3D.cpp} | 6 +- ...tokesIcosahedralShellMap_2_2_macro_3D.cpp} | 6 +- ...tokesIcosahedralShellMap_2_2_macro_3D.cpp} | 6 +- ...tokesIcosahedralShellMap_2_2_macro_3D.cpp} | 6 +- ..._P2ElementwiseFullStokes_0_0_macro_2D.cpp} | 6 +- ..._P2ElementwiseFullStokes_0_0_macro_3D.cpp} | 6 +- ..._P2ElementwiseFullStokes_0_0_macro_2D.cpp} | 6 +- ..._P2ElementwiseFullStokes_0_0_macro_3D.cpp} | 6 +- ..._P2ElementwiseFullStokes_0_0_macro_2D.cpp} | 6 +- ..._P2ElementwiseFullStokes_0_0_macro_3D.cpp} | 6 +- ..._P2ElementwiseFullStokes_0_1_macro_2D.cpp} | 6 +- ..._P2ElementwiseFullStokes_0_1_macro_3D.cpp} | 6 +- ..._P2ElementwiseFullStokes_0_1_macro_2D.cpp} | 6 +- ..._P2ElementwiseFullStokes_0_1_macro_3D.cpp} | 6 +- ..._P2ElementwiseFullStokes_0_2_macro_3D.cpp} | 6 +- ..._P2ElementwiseFullStokes_0_2_macro_3D.cpp} | 6 +- ..._P2ElementwiseFullStokes_1_0_macro_2D.cpp} | 6 +- ..._P2ElementwiseFullStokes_1_0_macro_3D.cpp} | 6 +- ..._P2ElementwiseFullStokes_1_0_macro_2D.cpp} | 6 +- ..._P2ElementwiseFullStokes_1_0_macro_3D.cpp} | 6 +- ..._P2ElementwiseFullStokes_1_1_macro_2D.cpp} | 6 +- ..._P2ElementwiseFullStokes_1_1_macro_3D.cpp} | 6 +- ..._P2ElementwiseFullStokes_1_1_macro_2D.cpp} | 6 +- ..._P2ElementwiseFullStokes_1_1_macro_3D.cpp} | 6 +- ..._P2ElementwiseFullStokes_1_1_macro_2D.cpp} | 6 +- ..._P2ElementwiseFullStokes_1_1_macro_3D.cpp} | 6 +- ..._P2ElementwiseFullStokes_1_2_macro_3D.cpp} | 6 +- ..._P2ElementwiseFullStokes_1_2_macro_3D.cpp} | 6 +- ..._P2ElementwiseFullStokes_2_0_macro_3D.cpp} | 6 +- ..._P2ElementwiseFullStokes_2_0_macro_3D.cpp} | 6 +- ..._P2ElementwiseFullStokes_2_1_macro_3D.cpp} | 6 +- ..._P2ElementwiseFullStokes_2_1_macro_3D.cpp} | 6 +- ..._P2ElementwiseFullStokes_2_2_macro_3D.cpp} | 6 +- ..._P2ElementwiseFullStokes_2_2_macro_3D.cpp} | 6 +- ..._P2ElementwiseFullStokes_2_2_macro_3D.cpp} | 6 +- .../grad_rho_by_rho_dot_u/CMakeLists.txt | 50 +- ...2VectorToP1ElementwiseGradRhoByRhoDotU.cpp | 12 +- ...2VectorToP1ElementwiseGradRhoByRhoDotU.hpp | 195 +- ...1ElementwiseGradRhoByRhoDotUAnnulusMap.cpp | 6 +- ...1ElementwiseGradRhoByRhoDotUAnnulusMap.hpp | 115 +- ...iseGradRhoByRhoDotUIcosahedralShellMap.cpp | 6 +- ...iseGradRhoByRhoDotUIcosahedralShellMap.hpp | 171 +- ...iseGradRhoByRhoDotUAnnulusMap_macro_2D.cpp | 1071 +++ ...oByRhoDotUIcosahedralShellMap_macro_3D.cpp | 7929 +++++++++++++++++ ...P1ElementwiseGradRhoByRhoDotU_macro_2D.cpp | 874 ++ ...P1ElementwiseGradRhoByRhoDotU_macro_3D.cpp | 6458 ++++++++++++++ ...seGradRhoByRhoDotUAnnulusMap_macro_2D.cpp} | 173 +- ...seGradRhoByRhoDotUAnnulusMap_macro_2D.cpp} | 173 +- ...ByRhoDotUIcosahedralShellMap_macro_3D.cpp} | 1085 ++- ...ByRhoDotUIcosahedralShellMap_macro_3D.cpp} | 1085 ++- ...1ElementwiseGradRhoByRhoDotU_macro_2D.cpp} | 136 +- ...1ElementwiseGradRhoByRhoDotU_macro_3D.cpp} | 810 +- ...1ElementwiseGradRhoByRhoDotU_macro_2D.cpp} | 136 +- ...1ElementwiseGradRhoByRhoDotU_macro_3D.cpp} | 810 +- operators/gradient/CMakeLists.txt | 100 +- ...1ToP2ElementwiseGradientAnnulusMap_0_0.cpp | 6 +- ...1ToP2ElementwiseGradientAnnulusMap_0_0.hpp | 93 +- ...1ToP2ElementwiseGradientAnnulusMap_1_0.cpp | 6 +- ...1ToP2ElementwiseGradientAnnulusMap_1_0.hpp | 93 +- ...entwiseGradientIcosahedralShellMap_0_0.cpp | 6 +- ...entwiseGradientIcosahedralShellMap_0_0.hpp | 141 +- ...entwiseGradientIcosahedralShellMap_1_0.cpp | 6 +- ...entwiseGradientIcosahedralShellMap_1_0.hpp | 141 +- ...entwiseGradientIcosahedralShellMap_2_0.cpp | 6 +- ...entwiseGradientIcosahedralShellMap_2_0.hpp | 141 +- .../P1ToP2ElementwiseGradient_0_0.cpp | 12 +- .../P1ToP2ElementwiseGradient_0_0.hpp | 145 +- .../P1ToP2ElementwiseGradient_1_0.cpp | 12 +- .../P1ToP2ElementwiseGradient_1_0.hpp | 145 +- .../P1ToP2ElementwiseGradient_2_0.cpp | 6 +- .../P1ToP2ElementwiseGradient_2_0.hpp | 85 +- ...ntwiseGradientAnnulusMap_0_0_macro_2D.cpp} | 6 +- ...ntwiseGradientAnnulusMap_1_0_macro_2D.cpp} | 6 +- ...dientIcosahedralShellMap_0_0_macro_3D.cpp} | 6 +- ...dientIcosahedralShellMap_1_0_macro_3D.cpp} | 6 +- ...dientIcosahedralShellMap_2_0_macro_3D.cpp} | 6 +- ...1ToP2ElementwiseGradient_0_0_macro_2D.cpp} | 6 +- ...1ToP2ElementwiseGradient_0_0_macro_3D.cpp} | 6 +- ...1ToP2ElementwiseGradient_1_0_macro_2D.cpp} | 6 +- ...1ToP2ElementwiseGradient_1_0_macro_3D.cpp} | 6 +- ...1ToP2ElementwiseGradient_2_0_macro_3D.cpp} | 6 +- ...ntwiseGradientAnnulusMap_0_0_macro_2D.cpp} | 6 +- ...ntwiseGradientAnnulusMap_0_0_macro_2D.cpp} | 6 +- ...ntwiseGradientAnnulusMap_1_0_macro_2D.cpp} | 6 +- ...ntwiseGradientAnnulusMap_1_0_macro_2D.cpp} | 6 +- ...dientIcosahedralShellMap_0_0_macro_3D.cpp} | 6 +- ...dientIcosahedralShellMap_0_0_macro_3D.cpp} | 6 +- ...dientIcosahedralShellMap_1_0_macro_3D.cpp} | 6 +- ...dientIcosahedralShellMap_1_0_macro_3D.cpp} | 6 +- ...dientIcosahedralShellMap_2_0_macro_3D.cpp} | 6 +- ...dientIcosahedralShellMap_2_0_macro_3D.cpp} | 6 +- ...1ToP2ElementwiseGradient_0_0_macro_2D.cpp} | 6 +- ...1ToP2ElementwiseGradient_0_0_macro_3D.cpp} | 6 +- ...1ToP2ElementwiseGradient_0_0_macro_2D.cpp} | 6 +- ...1ToP2ElementwiseGradient_0_0_macro_3D.cpp} | 6 +- ...1ToP2ElementwiseGradient_1_0_macro_2D.cpp} | 6 +- ...1ToP2ElementwiseGradient_1_0_macro_3D.cpp} | 6 +- ...1ToP2ElementwiseGradient_1_0_macro_2D.cpp} | 6 +- ...1ToP2ElementwiseGradient_1_0_macro_3D.cpp} | 6 +- ...1ToP2ElementwiseGradient_2_0_macro_3D.cpp} | 6 +- ...1ToP2ElementwiseGradient_2_0_macro_3D.cpp} | 6 +- operators/k_mass/CMakeLists.txt | 168 +- operators/k_mass/P1ElementwiseKMass.cpp | 18 +- operators/k_mass/P1ElementwiseKMass.hpp | 211 +- .../k_mass/P1ElementwiseKMassAnnulusMap.cpp | 9 +- .../k_mass/P1ElementwiseKMassAnnulusMap.hpp | 136 +- .../P1ElementwiseKMassIcosahedralShellMap.cpp | 9 +- .../P1ElementwiseKMassIcosahedralShellMap.hpp | 209 +- operators/k_mass/P2ElementwiseKMass.cpp | 18 +- operators/k_mass/P2ElementwiseKMass.hpp | 243 +- .../k_mass/P2ElementwiseKMassAnnulusMap.cpp | 9 +- .../k_mass/P2ElementwiseKMassAnnulusMap.hpp | 152 +- .../P2ElementwiseKMassIcosahedralShellMap.cpp | 9 +- .../P2ElementwiseKMassIcosahedralShellMap.hpp | 225 +- operators/k_mass/P2ToP1ElementwiseKMass.cpp | 12 +- operators/k_mass/P2ToP1ElementwiseKMass.hpp | 161 +- .../P2ToP1ElementwiseKMassAnnulusMap.cpp | 6 +- .../P2ToP1ElementwiseKMassAnnulusMap.hpp | 101 +- ...oP1ElementwiseKMassIcosahedralShellMap.cpp | 6 +- ...oP1ElementwiseKMassIcosahedralShellMap.hpp | 149 +- ...P1ElementwiseKMassAnnulusMap_macro_2D.cpp} | 6 +- ...P1ElementwiseKMassAnnulusMap_macro_2D.cpp} | 6 +- ...wiseKMassIcosahedralShellMap_macro_3D.cpp} | 6 +- ...wiseKMassIcosahedralShellMap_macro_3D.cpp} | 6 +- ...ass_apply_P1ElementwiseKMass_macro_2D.cpp} | 6 +- ...ass_apply_P1ElementwiseKMass_macro_3D.cpp} | 6 +- ...torValues_P1ElementwiseKMass_macro_2D.cpp} | 6 +- ...torValues_P1ElementwiseKMass_macro_3D.cpp} | 6 +- ...P2ElementwiseKMassAnnulusMap_macro_2D.cpp} | 6 +- ...P2ElementwiseKMassAnnulusMap_macro_2D.cpp} | 6 +- ...wiseKMassIcosahedralShellMap_macro_3D.cpp} | 6 +- ...wiseKMassIcosahedralShellMap_macro_3D.cpp} | 6 +- ...ass_apply_P2ElementwiseKMass_macro_2D.cpp} | 6 +- ...ass_apply_P2ElementwiseKMass_macro_3D.cpp} | 6 +- ...torValues_P2ElementwiseKMass_macro_2D.cpp} | 6 +- ...torValues_P2ElementwiseKMass_macro_3D.cpp} | 6 +- ...P1ElementwiseKMassAnnulusMap_macro_2D.cpp} | 6 +- ...wiseKMassIcosahedralShellMap_macro_3D.cpp} | 6 +- ...apply_P2ToP1ElementwiseKMass_macro_2D.cpp} | 6 +- ...apply_P2ToP1ElementwiseKMass_macro_3D.cpp} | 6 +- ...P1ElementwiseKMassAnnulusMap_macro_2D.cpp} | 6 +- ...P1ElementwiseKMassAnnulusMap_macro_2D.cpp} | 6 +- ...P1ElementwiseKMassAnnulusMap_macro_2D.cpp} | 6 +- ...wiseKMassIcosahedralShellMap_macro_3D.cpp} | 6 +- ...wiseKMassIcosahedralShellMap_macro_3D.cpp} | 6 +- ...wiseKMassIcosahedralShellMap_macro_3D.cpp} | 6 +- ...ass_apply_P1ElementwiseKMass_macro_2D.cpp} | 6 +- ...ass_apply_P1ElementwiseKMass_macro_3D.cpp} | 6 +- ...torValues_P1ElementwiseKMass_macro_2D.cpp} | 6 +- ...torValues_P1ElementwiseKMass_macro_3D.cpp} | 6 +- ..._toMatrix_P1ElementwiseKMass_macro_2D.cpp} | 6 +- ..._toMatrix_P1ElementwiseKMass_macro_3D.cpp} | 6 +- ...P2ElementwiseKMassAnnulusMap_macro_2D.cpp} | 6 +- ...P2ElementwiseKMassAnnulusMap_macro_2D.cpp} | 6 +- ...P2ElementwiseKMassAnnulusMap_macro_2D.cpp} | 6 +- ...wiseKMassIcosahedralShellMap_macro_3D.cpp} | 6 +- ...wiseKMassIcosahedralShellMap_macro_3D.cpp} | 6 +- ...wiseKMassIcosahedralShellMap_macro_3D.cpp} | 6 +- ...ass_apply_P2ElementwiseKMass_macro_2D.cpp} | 6 +- ...ass_apply_P2ElementwiseKMass_macro_3D.cpp} | 6 +- ...torValues_P2ElementwiseKMass_macro_2D.cpp} | 6 +- ...torValues_P2ElementwiseKMass_macro_3D.cpp} | 6 +- ..._toMatrix_P2ElementwiseKMass_macro_2D.cpp} | 6 +- ..._toMatrix_P2ElementwiseKMass_macro_3D.cpp} | 6 +- ...P1ElementwiseKMassAnnulusMap_macro_2D.cpp} | 6 +- ...P1ElementwiseKMassAnnulusMap_macro_2D.cpp} | 6 +- ...wiseKMassIcosahedralShellMap_macro_3D.cpp} | 6 +- ...wiseKMassIcosahedralShellMap_macro_3D.cpp} | 6 +- ...apply_P2ToP1ElementwiseKMass_macro_2D.cpp} | 6 +- ...apply_P2ToP1ElementwiseKMass_macro_3D.cpp} | 6 +- ...atrix_P2ToP1ElementwiseKMass_macro_2D.cpp} | 6 +- ...atrix_P2ToP1ElementwiseKMass_macro_3D.cpp} | 6 +- operators/mass/CMakeLists.txt | 96 +- operators/mass/P1ElementwiseMass.cpp | 18 +- operators/mass/P1ElementwiseMass.hpp | 199 +- operators/mass/P2ElementwiseMass.cpp | 18 +- operators/mass/P2ElementwiseMass.hpp | 219 +- .../mass/P2ElementwiseMassAnnulusMap.cpp | 9 +- .../mass/P2ElementwiseMassAnnulusMap.hpp | 140 +- .../P2ElementwiseMassIcosahedralShellMap.cpp | 9 +- .../P2ElementwiseMassIcosahedralShellMap.hpp | 213 +- ...Mass_apply_P1ElementwiseMass_macro_2D.cpp} | 6 +- ...Mass_apply_P1ElementwiseMass_macro_3D.cpp} | 6 +- ...atorValues_P1ElementwiseMass_macro_2D.cpp} | 6 +- ...atorValues_P1ElementwiseMass_macro_3D.cpp} | 6 +- ..._P2ElementwiseMassAnnulusMap_macro_2D.cpp} | 6 +- ..._P2ElementwiseMassAnnulusMap_macro_2D.cpp} | 6 +- ...twiseMassIcosahedralShellMap_macro_3D.cpp} | 6 +- ...twiseMassIcosahedralShellMap_macro_3D.cpp} | 6 +- ...Mass_apply_P2ElementwiseMass_macro_2D.cpp} | 6 +- ...Mass_apply_P2ElementwiseMass_macro_3D.cpp} | 6 +- ...atorValues_P2ElementwiseMass_macro_2D.cpp} | 6 +- ...atorValues_P2ElementwiseMass_macro_3D.cpp} | 6 +- ...Mass_apply_P1ElementwiseMass_macro_2D.cpp} | 6 +- ...Mass_apply_P1ElementwiseMass_macro_3D.cpp} | 6 +- ...atorValues_P1ElementwiseMass_macro_2D.cpp} | 6 +- ...atorValues_P1ElementwiseMass_macro_3D.cpp} | 6 +- ...s_toMatrix_P1ElementwiseMass_macro_2D.cpp} | 6 +- ...s_toMatrix_P1ElementwiseMass_macro_3D.cpp} | 6 +- ..._P2ElementwiseMassAnnulusMap_macro_2D.cpp} | 6 +- ..._P2ElementwiseMassAnnulusMap_macro_2D.cpp} | 6 +- ..._P2ElementwiseMassAnnulusMap_macro_2D.cpp} | 6 +- ...twiseMassIcosahedralShellMap_macro_3D.cpp} | 6 +- ...twiseMassIcosahedralShellMap_macro_3D.cpp} | 6 +- ...twiseMassIcosahedralShellMap_macro_3D.cpp} | 6 +- ...Mass_apply_P2ElementwiseMass_macro_2D.cpp} | 6 +- ...Mass_apply_P2ElementwiseMass_macro_3D.cpp} | 6 +- ...atorValues_P2ElementwiseMass_macro_2D.cpp} | 6 +- ...atorValues_P2ElementwiseMass_macro_3D.cpp} | 6 +- ...s_toMatrix_P2ElementwiseMass_macro_2D.cpp} | 6 +- ...s_toMatrix_P2ElementwiseMass_macro_3D.cpp} | 6 +- operators/shear_heating/CMakeLists.txt | 64 +- .../P2ElementwiseShearHeating.cpp | 18 +- .../P2ElementwiseShearHeating.hpp | 303 +- .../P2ElementwiseShearHeatingAnnulusMap.cpp | 9 +- .../P2ElementwiseShearHeatingAnnulusMap.hpp | 177 +- ...entwiseShearHeatingIcosahedralShellMap.cpp | 9 +- ...entwiseShearHeatingIcosahedralShellMap.hpp | 261 +- ...ntwiseShearHeatingAnnulusMap_macro_2D.cpp} | 6 +- ...ntwiseShearHeatingAnnulusMap_macro_2D.cpp} | 6 +- ...arHeatingIcosahedralShellMap_macro_3D.cpp} | 6 +- ...arHeatingIcosahedralShellMap_macro_3D.cpp} | 6 +- ...ly_P2ElementwiseShearHeating_macro_2D.cpp} | 6 +- ...ly_P2ElementwiseShearHeating_macro_3D.cpp} | 6 +- ...es_P2ElementwiseShearHeating_macro_2D.cpp} | 6 +- ...es_P2ElementwiseShearHeating_macro_3D.cpp} | 6 +- ...ntwiseShearHeatingAnnulusMap_macro_2D.cpp} | 6 +- ...ntwiseShearHeatingAnnulusMap_macro_2D.cpp} | 6 +- ...ntwiseShearHeatingAnnulusMap_macro_2D.cpp} | 6 +- ...arHeatingIcosahedralShellMap_macro_3D.cpp} | 6 +- ...arHeatingIcosahedralShellMap_macro_3D.cpp} | 6 +- ...arHeatingIcosahedralShellMap_macro_3D.cpp} | 6 +- ...ly_P2ElementwiseShearHeating_macro_2D.cpp} | 6 +- ...ly_P2ElementwiseShearHeating_macro_3D.cpp} | 6 +- ...es_P2ElementwiseShearHeating_macro_2D.cpp} | 6 +- ...es_P2ElementwiseShearHeating_macro_3D.cpp} | 6 +- ...ix_P2ElementwiseShearHeating_macro_2D.cpp} | 6 +- ...ix_P2ElementwiseShearHeating_macro_3D.cpp} | 6 +- 625 files changed, 29931 insertions(+), 9806 deletions(-) rename operators/curl_curl/avx/{N1E1ElementwiseCurlCurl_apply_macro_3D.cpp => N1E1ElementwiseCurlCurl_apply_N1E1ElementwiseCurlCurl_macro_3D.cpp} (99%) rename operators/curl_curl/avx/{N1E1ElementwiseCurlCurl_computeInverseDiagonalOperatorValues_macro_3D.cpp => N1E1ElementwiseCurlCurl_computeInverseDiagonalOperatorValues_N1E1ElementwiseCurlCurl_macro_3D.cpp} (99%) rename operators/curl_curl/noarch/{N1E1ElementwiseCurlCurl_apply_macro_3D.cpp => N1E1ElementwiseCurlCurl_apply_N1E1ElementwiseCurlCurl_macro_3D.cpp} (99%) rename operators/curl_curl/noarch/{N1E1ElementwiseCurlCurl_computeInverseDiagonalOperatorValues_macro_3D.cpp => N1E1ElementwiseCurlCurl_computeInverseDiagonalOperatorValues_N1E1ElementwiseCurlCurl_macro_3D.cpp} (99%) rename operators/curl_curl/noarch/{N1E1ElementwiseCurlCurl_toMatrix_macro_3D.cpp => N1E1ElementwiseCurlCurl_toMatrix_N1E1ElementwiseCurlCurl_macro_3D.cpp} (99%) rename operators/diffusion/avx/{P1ElementwiseDiffusion_apply_macro_2D.cpp => P1ElementwiseDiffusion_apply_P1ElementwiseDiffusion_macro_2D.cpp} (99%) rename operators/diffusion/avx/{P1ElementwiseDiffusion_apply_macro_3D.cpp => P1ElementwiseDiffusion_apply_P1ElementwiseDiffusion_macro_3D.cpp} (99%) rename operators/diffusion/avx/{P1ElementwiseDiffusion_computeInverseDiagonalOperatorValues_macro_2D.cpp => P1ElementwiseDiffusion_computeInverseDiagonalOperatorValues_P1ElementwiseDiffusion_macro_2D.cpp} (99%) rename operators/diffusion/avx/{P1ElementwiseDiffusion_computeInverseDiagonalOperatorValues_macro_3D.cpp => P1ElementwiseDiffusion_computeInverseDiagonalOperatorValues_P1ElementwiseDiffusion_macro_3D.cpp} (99%) rename operators/diffusion/avx/{P2ElementwiseDiffusionAnnulusMap_apply_macro_2D.cpp => P2ElementwiseDiffusionAnnulusMap_apply_P2ElementwiseDiffusionAnnulusMap_macro_2D.cpp} (99%) rename operators/diffusion/avx/{P2ElementwiseDiffusionAnnulusMap_computeInverseDiagonalOperatorValues_macro_2D.cpp => P2ElementwiseDiffusionAnnulusMap_computeInverseDiagonalOperatorValues_P2ElementwiseDiffusionAnnulusMap_macro_2D.cpp} (99%) rename operators/diffusion/avx/{P2ElementwiseDiffusionIcosahedralShellMap_apply_macro_3D.cpp => P2ElementwiseDiffusionIcosahedralShellMap_apply_P2ElementwiseDiffusionIcosahedralShellMap_macro_3D.cpp} (99%) rename operators/diffusion/avx/{P2ElementwiseDiffusionIcosahedralShellMap_computeInverseDiagonalOperatorValues_macro_3D.cpp => P2ElementwiseDiffusionIcosahedralShellMap_computeInverseDiagonalOperatorValues_P2ElementwiseDiffusionIcosahedralShellMap_macro_3D.cpp} (99%) rename operators/diffusion/avx/{P2ElementwiseDiffusion_apply_macro_2D.cpp => P2ElementwiseDiffusion_apply_P2ElementwiseDiffusion_macro_2D.cpp} (99%) rename operators/diffusion/avx/{P2ElementwiseDiffusion_apply_macro_3D.cpp => P2ElementwiseDiffusion_apply_P2ElementwiseDiffusion_macro_3D.cpp} (99%) rename operators/diffusion/avx/{P2ElementwiseDiffusion_computeInverseDiagonalOperatorValues_macro_2D.cpp => P2ElementwiseDiffusion_computeInverseDiagonalOperatorValues_P2ElementwiseDiffusion_macro_2D.cpp} (99%) rename operators/diffusion/avx/{P2ElementwiseDiffusion_computeInverseDiagonalOperatorValues_macro_3D.cpp => P2ElementwiseDiffusion_computeInverseDiagonalOperatorValues_P2ElementwiseDiffusion_macro_3D.cpp} (99%) rename operators/diffusion/noarch/{P1ElementwiseDiffusion_apply_macro_2D.cpp => P1ElementwiseDiffusion_apply_P1ElementwiseDiffusion_macro_2D.cpp} (98%) rename operators/diffusion/noarch/{P1ElementwiseDiffusion_apply_macro_3D.cpp => P1ElementwiseDiffusion_apply_P1ElementwiseDiffusion_macro_3D.cpp} (99%) rename operators/diffusion/noarch/{P1ElementwiseDiffusion_computeInverseDiagonalOperatorValues_macro_2D.cpp => P1ElementwiseDiffusion_computeInverseDiagonalOperatorValues_P1ElementwiseDiffusion_macro_2D.cpp} (98%) rename operators/diffusion/noarch/{P1ElementwiseDiffusion_computeInverseDiagonalOperatorValues_macro_3D.cpp => P1ElementwiseDiffusion_computeInverseDiagonalOperatorValues_P1ElementwiseDiffusion_macro_3D.cpp} (99%) rename operators/diffusion/noarch/{P1ElementwiseDiffusion_toMatrix_macro_2D.cpp => P1ElementwiseDiffusion_toMatrix_P1ElementwiseDiffusion_macro_2D.cpp} (98%) rename operators/diffusion/noarch/{P1ElementwiseDiffusion_toMatrix_macro_3D.cpp => P1ElementwiseDiffusion_toMatrix_P1ElementwiseDiffusion_macro_3D.cpp} (99%) rename operators/diffusion/noarch/{P2ElementwiseDiffusionAnnulusMap_apply_macro_2D.cpp => P2ElementwiseDiffusionAnnulusMap_apply_P2ElementwiseDiffusionAnnulusMap_macro_2D.cpp} (99%) rename operators/diffusion/noarch/{P2ElementwiseDiffusionAnnulusMap_computeInverseDiagonalOperatorValues_macro_2D.cpp => P2ElementwiseDiffusionAnnulusMap_computeInverseDiagonalOperatorValues_P2ElementwiseDiffusionAnnulusMap_macro_2D.cpp} (99%) rename operators/diffusion/noarch/{P2ElementwiseDiffusionAnnulusMap_toMatrix_macro_2D.cpp => P2ElementwiseDiffusionAnnulusMap_toMatrix_P2ElementwiseDiffusionAnnulusMap_macro_2D.cpp} (99%) rename operators/diffusion/noarch/{P2ElementwiseDiffusionIcosahedralShellMap_apply_macro_3D.cpp => P2ElementwiseDiffusionIcosahedralShellMap_apply_P2ElementwiseDiffusionIcosahedralShellMap_macro_3D.cpp} (99%) rename operators/diffusion/noarch/{P2ElementwiseDiffusionIcosahedralShellMap_computeInverseDiagonalOperatorValues_macro_3D.cpp => P2ElementwiseDiffusionIcosahedralShellMap_computeInverseDiagonalOperatorValues_P2ElementwiseDiffusionIcosahedralShellMap_macro_3D.cpp} (99%) rename operators/diffusion/noarch/{P2ElementwiseDiffusionIcosahedralShellMap_toMatrix_macro_3D.cpp => P2ElementwiseDiffusionIcosahedralShellMap_toMatrix_P2ElementwiseDiffusionIcosahedralShellMap_macro_3D.cpp} (99%) rename operators/diffusion/noarch/{P2ElementwiseDiffusion_apply_macro_2D.cpp => P2ElementwiseDiffusion_apply_P2ElementwiseDiffusion_macro_2D.cpp} (99%) rename operators/diffusion/noarch/{P2ElementwiseDiffusion_apply_macro_3D.cpp => P2ElementwiseDiffusion_apply_P2ElementwiseDiffusion_macro_3D.cpp} (99%) rename operators/diffusion/noarch/{P2ElementwiseDiffusion_computeInverseDiagonalOperatorValues_macro_2D.cpp => P2ElementwiseDiffusion_computeInverseDiagonalOperatorValues_P2ElementwiseDiffusion_macro_2D.cpp} (98%) rename operators/diffusion/noarch/{P2ElementwiseDiffusion_computeInverseDiagonalOperatorValues_macro_3D.cpp => P2ElementwiseDiffusion_computeInverseDiagonalOperatorValues_P2ElementwiseDiffusion_macro_3D.cpp} (99%) rename operators/diffusion/noarch/{P2ElementwiseDiffusion_toMatrix_macro_2D.cpp => P2ElementwiseDiffusion_toMatrix_P2ElementwiseDiffusion_macro_2D.cpp} (99%) rename operators/diffusion/noarch/{P2ElementwiseDiffusion_toMatrix_macro_3D.cpp => P2ElementwiseDiffusion_toMatrix_P2ElementwiseDiffusion_macro_3D.cpp} (99%) rename operators/div_k_grad/avx/{P1ElementwiseDivKGrad_apply_macro_2D.cpp => P1ElementwiseDivKGrad_apply_P1ElementwiseDivKGrad_macro_2D.cpp} (99%) rename operators/div_k_grad/avx/{P1ElementwiseDivKGrad_apply_macro_3D.cpp => P1ElementwiseDivKGrad_apply_P1ElementwiseDivKGrad_macro_3D.cpp} (99%) rename operators/div_k_grad/avx/{P1ElementwiseDivKGrad_computeInverseDiagonalOperatorValues_macro_2D.cpp => P1ElementwiseDivKGrad_computeInverseDiagonalOperatorValues_P1ElementwiseDivKGrad_macro_2D.cpp} (99%) rename operators/div_k_grad/avx/{P1ElementwiseDivKGrad_computeInverseDiagonalOperatorValues_macro_3D.cpp => P1ElementwiseDivKGrad_computeInverseDiagonalOperatorValues_P1ElementwiseDivKGrad_macro_3D.cpp} (99%) rename operators/div_k_grad/avx/{P2ElementwiseDivKGradAnnulusMap_apply_macro_2D.cpp => P2ElementwiseDivKGradAnnulusMap_apply_P2ElementwiseDivKGradAnnulusMap_macro_2D.cpp} (99%) rename operators/div_k_grad/avx/{P2ElementwiseDivKGradAnnulusMap_computeInverseDiagonalOperatorValues_macro_2D.cpp => P2ElementwiseDivKGradAnnulusMap_computeInverseDiagonalOperatorValues_P2ElementwiseDivKGradAnnulusMap_macro_2D.cpp} (99%) rename operators/div_k_grad/avx/{P2ElementwiseDivKGradIcosahedralShellMap_apply_macro_3D.cpp => P2ElementwiseDivKGradIcosahedralShellMap_apply_P2ElementwiseDivKGradIcosahedralShellMap_macro_3D.cpp} (99%) rename operators/div_k_grad/avx/{P2ElementwiseDivKGradIcosahedralShellMap_computeInverseDiagonalOperatorValues_macro_3D.cpp => P2ElementwiseDivKGradIcosahedralShellMap_computeInverseDiagonalOperatorValues_P2ElementwiseDivKGradIcosahedralShellMap_macro_3D.cpp} (99%) rename operators/div_k_grad/avx/{P2ElementwiseDivKGrad_apply_macro_2D.cpp => P2ElementwiseDivKGrad_apply_P2ElementwiseDivKGrad_macro_2D.cpp} (99%) rename operators/div_k_grad/avx/{P2ElementwiseDivKGrad_apply_macro_3D.cpp => P2ElementwiseDivKGrad_apply_P2ElementwiseDivKGrad_macro_3D.cpp} (99%) rename operators/div_k_grad/avx/{P2ElementwiseDivKGrad_computeInverseDiagonalOperatorValues_macro_2D.cpp => P2ElementwiseDivKGrad_computeInverseDiagonalOperatorValues_P2ElementwiseDivKGrad_macro_2D.cpp} (99%) rename operators/div_k_grad/avx/{P2ElementwiseDivKGrad_computeInverseDiagonalOperatorValues_macro_3D.cpp => P2ElementwiseDivKGrad_computeInverseDiagonalOperatorValues_P2ElementwiseDivKGrad_macro_3D.cpp} (99%) rename operators/div_k_grad/noarch/{P1ElementwiseDivKGrad_apply_macro_2D.cpp => P1ElementwiseDivKGrad_apply_P1ElementwiseDivKGrad_macro_2D.cpp} (97%) rename operators/div_k_grad/noarch/{P1ElementwiseDivKGrad_apply_macro_3D.cpp => P1ElementwiseDivKGrad_apply_P1ElementwiseDivKGrad_macro_3D.cpp} (99%) rename operators/div_k_grad/noarch/{P1ElementwiseDivKGrad_computeInverseDiagonalOperatorValues_macro_2D.cpp => P1ElementwiseDivKGrad_computeInverseDiagonalOperatorValues_P1ElementwiseDivKGrad_macro_2D.cpp} (97%) rename operators/div_k_grad/noarch/{P1ElementwiseDivKGrad_computeInverseDiagonalOperatorValues_macro_3D.cpp => P1ElementwiseDivKGrad_computeInverseDiagonalOperatorValues_P1ElementwiseDivKGrad_macro_3D.cpp} (99%) rename operators/div_k_grad/noarch/{P1ElementwiseDivKGrad_toMatrix_macro_2D.cpp => P1ElementwiseDivKGrad_toMatrix_P1ElementwiseDivKGrad_macro_2D.cpp} (97%) rename operators/div_k_grad/noarch/{P1ElementwiseDivKGrad_toMatrix_macro_3D.cpp => P1ElementwiseDivKGrad_toMatrix_P1ElementwiseDivKGrad_macro_3D.cpp} (99%) rename operators/div_k_grad/noarch/{P2ElementwiseDivKGradAnnulusMap_apply_macro_2D.cpp => P2ElementwiseDivKGradAnnulusMap_apply_P2ElementwiseDivKGradAnnulusMap_macro_2D.cpp} (98%) rename operators/div_k_grad/noarch/{P2ElementwiseDivKGradAnnulusMap_computeInverseDiagonalOperatorValues_macro_2D.cpp => P2ElementwiseDivKGradAnnulusMap_computeInverseDiagonalOperatorValues_P2ElementwiseDivKGradAnnulusMap_macro_2D.cpp} (98%) rename operators/div_k_grad/noarch/{P2ElementwiseDivKGradAnnulusMap_toMatrix_macro_2D.cpp => P2ElementwiseDivKGradAnnulusMap_toMatrix_P2ElementwiseDivKGradAnnulusMap_macro_2D.cpp} (98%) rename operators/div_k_grad/noarch/{P2ElementwiseDivKGradIcosahedralShellMap_apply_macro_3D.cpp => P2ElementwiseDivKGradIcosahedralShellMap_apply_P2ElementwiseDivKGradIcosahedralShellMap_macro_3D.cpp} (99%) rename operators/div_k_grad/noarch/{P2ElementwiseDivKGradIcosahedralShellMap_computeInverseDiagonalOperatorValues_macro_3D.cpp => P2ElementwiseDivKGradIcosahedralShellMap_computeInverseDiagonalOperatorValues_P2ElementwiseDivKGradIcosahedralShellMap_macro_3D.cpp} (99%) rename operators/div_k_grad/noarch/{P2ElementwiseDivKGradIcosahedralShellMap_toMatrix_macro_3D.cpp => P2ElementwiseDivKGradIcosahedralShellMap_toMatrix_P2ElementwiseDivKGradIcosahedralShellMap_macro_3D.cpp} (99%) rename operators/div_k_grad/noarch/{P2ElementwiseDivKGrad_apply_macro_2D.cpp => P2ElementwiseDivKGrad_apply_P2ElementwiseDivKGrad_macro_2D.cpp} (98%) rename operators/div_k_grad/noarch/{P2ElementwiseDivKGrad_apply_macro_3D.cpp => P2ElementwiseDivKGrad_apply_P2ElementwiseDivKGrad_macro_3D.cpp} (99%) rename operators/div_k_grad/noarch/{P2ElementwiseDivKGrad_computeInverseDiagonalOperatorValues_macro_2D.cpp => P2ElementwiseDivKGrad_computeInverseDiagonalOperatorValues_P2ElementwiseDivKGrad_macro_2D.cpp} (98%) rename operators/div_k_grad/noarch/{P2ElementwiseDivKGrad_computeInverseDiagonalOperatorValues_macro_3D.cpp => P2ElementwiseDivKGrad_computeInverseDiagonalOperatorValues_P2ElementwiseDivKGrad_macro_3D.cpp} (99%) rename operators/div_k_grad/noarch/{P2ElementwiseDivKGrad_toMatrix_macro_2D.cpp => P2ElementwiseDivKGrad_toMatrix_P2ElementwiseDivKGrad_macro_2D.cpp} (98%) rename operators/div_k_grad/noarch/{P2ElementwiseDivKGrad_toMatrix_macro_3D.cpp => P2ElementwiseDivKGrad_toMatrix_P2ElementwiseDivKGrad_macro_3D.cpp} (99%) rename operators/divergence/avx/{P2ToP1ElementwiseDivergenceAnnulusMap_0_0_apply_macro_2D.cpp => P2ToP1ElementwiseDivergenceAnnulusMap_0_0_apply_P2ToP1ElementwiseDivergenceAnnulusMap_0_0_macro_2D.cpp} (99%) rename operators/divergence/avx/{P2ToP1ElementwiseDivergenceAnnulusMap_0_1_apply_macro_2D.cpp => P2ToP1ElementwiseDivergenceAnnulusMap_0_1_apply_P2ToP1ElementwiseDivergenceAnnulusMap_0_1_macro_2D.cpp} (99%) rename operators/divergence/avx/{P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_0_apply_macro_3D.cpp => P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_0_apply_P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_0_macro_3D.cpp} (99%) rename operators/divergence/avx/{P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_1_apply_macro_3D.cpp => P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_1_apply_P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_1_macro_3D.cpp} (99%) rename operators/divergence/avx/{P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_2_apply_macro_3D.cpp => P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_2_apply_P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_2_macro_3D.cpp} (99%) rename operators/divergence/avx/{P2ToP1ElementwiseDivergence_0_0_apply_macro_2D.cpp => P2ToP1ElementwiseDivergence_0_0_apply_P2ToP1ElementwiseDivergence_0_0_macro_2D.cpp} (99%) rename operators/divergence/avx/{P2ToP1ElementwiseDivergence_0_0_apply_macro_3D.cpp => P2ToP1ElementwiseDivergence_0_0_apply_P2ToP1ElementwiseDivergence_0_0_macro_3D.cpp} (99%) rename operators/divergence/avx/{P2ToP1ElementwiseDivergence_0_1_apply_macro_2D.cpp => P2ToP1ElementwiseDivergence_0_1_apply_P2ToP1ElementwiseDivergence_0_1_macro_2D.cpp} (99%) rename operators/divergence/avx/{P2ToP1ElementwiseDivergence_0_1_apply_macro_3D.cpp => P2ToP1ElementwiseDivergence_0_1_apply_P2ToP1ElementwiseDivergence_0_1_macro_3D.cpp} (99%) rename operators/divergence/avx/{P2ToP1ElementwiseDivergence_0_2_apply_macro_3D.cpp => P2ToP1ElementwiseDivergence_0_2_apply_P2ToP1ElementwiseDivergence_0_2_macro_3D.cpp} (99%) rename operators/divergence/noarch/{P2ToP1ElementwiseDivergenceAnnulusMap_0_0_apply_macro_2D.cpp => P2ToP1ElementwiseDivergenceAnnulusMap_0_0_apply_P2ToP1ElementwiseDivergenceAnnulusMap_0_0_macro_2D.cpp} (98%) rename operators/divergence/noarch/{P2ToP1ElementwiseDivergenceAnnulusMap_0_0_toMatrix_macro_2D.cpp => P2ToP1ElementwiseDivergenceAnnulusMap_0_0_toMatrix_P2ToP1ElementwiseDivergenceAnnulusMap_0_0_macro_2D.cpp} (98%) rename operators/divergence/noarch/{P2ToP1ElementwiseDivergenceAnnulusMap_0_1_apply_macro_2D.cpp => P2ToP1ElementwiseDivergenceAnnulusMap_0_1_apply_P2ToP1ElementwiseDivergenceAnnulusMap_0_1_macro_2D.cpp} (98%) rename operators/divergence/noarch/{P2ToP1ElementwiseDivergenceAnnulusMap_0_1_toMatrix_macro_2D.cpp => P2ToP1ElementwiseDivergenceAnnulusMap_0_1_toMatrix_P2ToP1ElementwiseDivergenceAnnulusMap_0_1_macro_2D.cpp} (98%) rename operators/divergence/noarch/{P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_0_apply_macro_3D.cpp => P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_0_apply_P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_0_macro_3D.cpp} (99%) rename operators/divergence/noarch/{P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_0_toMatrix_macro_3D.cpp => P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_0_toMatrix_P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_0_macro_3D.cpp} (99%) rename operators/divergence/noarch/{P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_1_apply_macro_3D.cpp => P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_1_apply_P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_1_macro_3D.cpp} (99%) rename operators/divergence/noarch/{P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_1_toMatrix_macro_3D.cpp => P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_1_toMatrix_P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_1_macro_3D.cpp} (99%) rename operators/divergence/noarch/{P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_2_apply_macro_3D.cpp => P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_2_apply_P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_2_macro_3D.cpp} (99%) rename operators/divergence/noarch/{P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_2_toMatrix_macro_3D.cpp => P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_2_toMatrix_P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_2_macro_3D.cpp} (99%) rename operators/divergence/noarch/{P2ToP1ElementwiseDivergence_0_0_apply_macro_2D.cpp => P2ToP1ElementwiseDivergence_0_0_apply_P2ToP1ElementwiseDivergence_0_0_macro_2D.cpp} (98%) rename operators/divergence/noarch/{P2ToP1ElementwiseDivergence_0_0_apply_macro_3D.cpp => P2ToP1ElementwiseDivergence_0_0_apply_P2ToP1ElementwiseDivergence_0_0_macro_3D.cpp} (99%) rename operators/divergence/noarch/{P2ToP1ElementwiseDivergence_0_0_toMatrix_macro_2D.cpp => P2ToP1ElementwiseDivergence_0_0_toMatrix_P2ToP1ElementwiseDivergence_0_0_macro_2D.cpp} (98%) rename operators/divergence/noarch/{P2ToP1ElementwiseDivergence_0_0_toMatrix_macro_3D.cpp => P2ToP1ElementwiseDivergence_0_0_toMatrix_P2ToP1ElementwiseDivergence_0_0_macro_3D.cpp} (99%) rename operators/divergence/noarch/{P2ToP1ElementwiseDivergence_0_1_apply_macro_2D.cpp => P2ToP1ElementwiseDivergence_0_1_apply_P2ToP1ElementwiseDivergence_0_1_macro_2D.cpp} (98%) rename operators/divergence/noarch/{P2ToP1ElementwiseDivergence_0_1_apply_macro_3D.cpp => P2ToP1ElementwiseDivergence_0_1_apply_P2ToP1ElementwiseDivergence_0_1_macro_3D.cpp} (99%) rename operators/divergence/noarch/{P2ToP1ElementwiseDivergence_0_1_toMatrix_macro_2D.cpp => P2ToP1ElementwiseDivergence_0_1_toMatrix_P2ToP1ElementwiseDivergence_0_1_macro_2D.cpp} (98%) rename operators/divergence/noarch/{P2ToP1ElementwiseDivergence_0_1_toMatrix_macro_3D.cpp => P2ToP1ElementwiseDivergence_0_1_toMatrix_P2ToP1ElementwiseDivergence_0_1_macro_3D.cpp} (99%) rename operators/divergence/noarch/{P2ToP1ElementwiseDivergence_0_2_apply_macro_3D.cpp => P2ToP1ElementwiseDivergence_0_2_apply_P2ToP1ElementwiseDivergence_0_2_macro_3D.cpp} (99%) rename operators/divergence/noarch/{P2ToP1ElementwiseDivergence_0_2_toMatrix_macro_3D.cpp => P2ToP1ElementwiseDivergence_0_2_toMatrix_P2ToP1ElementwiseDivergence_0_2_macro_3D.cpp} (99%) rename operators/epsilon/avx/{P2ElementwiseEpsilonAnnulusMap_0_0_apply_macro_2D.cpp => P2ElementwiseEpsilonAnnulusMap_0_0_apply_P2ElementwiseEpsilonAnnulusMap_0_0_macro_2D.cpp} (99%) rename operators/epsilon/avx/{P2ElementwiseEpsilonAnnulusMap_0_0_computeInverseDiagonalOperatorValues_macro_2D.cpp => P2ElementwiseEpsilonAnnulusMap_0_0_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilonAnnulusMap_0_0_macro_2D.cpp} (99%) rename operators/epsilon/avx/{P2ElementwiseEpsilonAnnulusMap_0_1_apply_macro_2D.cpp => P2ElementwiseEpsilonAnnulusMap_0_1_apply_P2ElementwiseEpsilonAnnulusMap_0_1_macro_2D.cpp} (99%) rename operators/epsilon/avx/{P2ElementwiseEpsilonAnnulusMap_1_0_apply_macro_2D.cpp => P2ElementwiseEpsilonAnnulusMap_1_0_apply_P2ElementwiseEpsilonAnnulusMap_1_0_macro_2D.cpp} (99%) rename operators/epsilon/avx/{P2ElementwiseEpsilonAnnulusMap_1_1_apply_macro_2D.cpp => P2ElementwiseEpsilonAnnulusMap_1_1_apply_P2ElementwiseEpsilonAnnulusMap_1_1_macro_2D.cpp} (99%) rename operators/epsilon/avx/{P2ElementwiseEpsilonAnnulusMap_1_1_computeInverseDiagonalOperatorValues_macro_2D.cpp => P2ElementwiseEpsilonAnnulusMap_1_1_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilonAnnulusMap_1_1_macro_2D.cpp} (99%) rename operators/epsilon/avx/{P2ElementwiseEpsilonIcosahedralShellMap_0_0_apply_macro_3D.cpp => P2ElementwiseEpsilonIcosahedralShellMap_0_0_apply_P2ElementwiseEpsilonIcosahedralShellMap_0_0_macro_3D.cpp} (99%) rename operators/epsilon/avx/{P2ElementwiseEpsilonIcosahedralShellMap_0_0_computeInverseDiagonalOperatorValues_macro_3D.cpp => P2ElementwiseEpsilonIcosahedralShellMap_0_0_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilonIcosahedralShellMap_0_0_macro_3D.cpp} (99%) rename operators/epsilon/avx/{P2ElementwiseEpsilonIcosahedralShellMap_0_1_apply_macro_3D.cpp => P2ElementwiseEpsilonIcosahedralShellMap_0_1_apply_P2ElementwiseEpsilonIcosahedralShellMap_0_1_macro_3D.cpp} (99%) rename operators/epsilon/avx/{P2ElementwiseEpsilonIcosahedralShellMap_0_2_apply_macro_3D.cpp => P2ElementwiseEpsilonIcosahedralShellMap_0_2_apply_P2ElementwiseEpsilonIcosahedralShellMap_0_2_macro_3D.cpp} (99%) rename operators/epsilon/avx/{P2ElementwiseEpsilonIcosahedralShellMap_1_0_apply_macro_3D.cpp => P2ElementwiseEpsilonIcosahedralShellMap_1_0_apply_P2ElementwiseEpsilonIcosahedralShellMap_1_0_macro_3D.cpp} (99%) rename operators/epsilon/avx/{P2ElementwiseEpsilonIcosahedralShellMap_1_1_apply_macro_3D.cpp => P2ElementwiseEpsilonIcosahedralShellMap_1_1_apply_P2ElementwiseEpsilonIcosahedralShellMap_1_1_macro_3D.cpp} (99%) rename operators/epsilon/avx/{P2ElementwiseEpsilonIcosahedralShellMap_1_1_computeInverseDiagonalOperatorValues_macro_3D.cpp => P2ElementwiseEpsilonIcosahedralShellMap_1_1_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilonIcosahedralShellMap_1_1_macro_3D.cpp} (99%) rename operators/epsilon/avx/{P2ElementwiseEpsilonIcosahedralShellMap_1_2_apply_macro_3D.cpp => P2ElementwiseEpsilonIcosahedralShellMap_1_2_apply_P2ElementwiseEpsilonIcosahedralShellMap_1_2_macro_3D.cpp} (99%) rename operators/epsilon/avx/{P2ElementwiseEpsilonIcosahedralShellMap_2_0_apply_macro_3D.cpp => P2ElementwiseEpsilonIcosahedralShellMap_2_0_apply_P2ElementwiseEpsilonIcosahedralShellMap_2_0_macro_3D.cpp} (99%) rename operators/epsilon/avx/{P2ElementwiseEpsilonIcosahedralShellMap_2_1_apply_macro_3D.cpp => P2ElementwiseEpsilonIcosahedralShellMap_2_1_apply_P2ElementwiseEpsilonIcosahedralShellMap_2_1_macro_3D.cpp} (99%) rename operators/epsilon/avx/{P2ElementwiseEpsilonIcosahedralShellMap_2_2_apply_macro_3D.cpp => P2ElementwiseEpsilonIcosahedralShellMap_2_2_apply_P2ElementwiseEpsilonIcosahedralShellMap_2_2_macro_3D.cpp} (99%) rename operators/epsilon/avx/{P2ElementwiseEpsilonIcosahedralShellMap_2_2_computeInverseDiagonalOperatorValues_macro_3D.cpp => P2ElementwiseEpsilonIcosahedralShellMap_2_2_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilonIcosahedralShellMap_2_2_macro_3D.cpp} (99%) rename operators/epsilon/avx/{P2ElementwiseEpsilon_0_0_apply_macro_2D.cpp => P2ElementwiseEpsilon_0_0_apply_P2ElementwiseEpsilon_0_0_macro_2D.cpp} (99%) rename operators/epsilon/avx/{P2ElementwiseEpsilon_0_0_apply_macro_3D.cpp => P2ElementwiseEpsilon_0_0_apply_P2ElementwiseEpsilon_0_0_macro_3D.cpp} (99%) rename operators/epsilon/avx/{P2ElementwiseEpsilon_0_0_computeInverseDiagonalOperatorValues_macro_2D.cpp => P2ElementwiseEpsilon_0_0_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilon_0_0_macro_2D.cpp} (99%) rename operators/epsilon/avx/{P2ElementwiseEpsilon_0_0_computeInverseDiagonalOperatorValues_macro_3D.cpp => P2ElementwiseEpsilon_0_0_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilon_0_0_macro_3D.cpp} (99%) rename operators/epsilon/avx/{P2ElementwiseEpsilon_0_1_apply_macro_2D.cpp => P2ElementwiseEpsilon_0_1_apply_P2ElementwiseEpsilon_0_1_macro_2D.cpp} (99%) rename operators/epsilon/avx/{P2ElementwiseEpsilon_0_1_apply_macro_3D.cpp => P2ElementwiseEpsilon_0_1_apply_P2ElementwiseEpsilon_0_1_macro_3D.cpp} (99%) rename operators/epsilon/avx/{P2ElementwiseEpsilon_0_2_apply_macro_3D.cpp => P2ElementwiseEpsilon_0_2_apply_P2ElementwiseEpsilon_0_2_macro_3D.cpp} (99%) rename operators/epsilon/avx/{P2ElementwiseEpsilon_1_0_apply_macro_2D.cpp => P2ElementwiseEpsilon_1_0_apply_P2ElementwiseEpsilon_1_0_macro_2D.cpp} (99%) rename operators/epsilon/avx/{P2ElementwiseEpsilon_1_0_apply_macro_3D.cpp => P2ElementwiseEpsilon_1_0_apply_P2ElementwiseEpsilon_1_0_macro_3D.cpp} (99%) rename operators/epsilon/avx/{P2ElementwiseEpsilon_1_1_apply_macro_2D.cpp => P2ElementwiseEpsilon_1_1_apply_P2ElementwiseEpsilon_1_1_macro_2D.cpp} (99%) rename operators/epsilon/avx/{P2ElementwiseEpsilon_1_1_apply_macro_3D.cpp => P2ElementwiseEpsilon_1_1_apply_P2ElementwiseEpsilon_1_1_macro_3D.cpp} (99%) rename operators/epsilon/avx/{P2ElementwiseEpsilon_1_1_computeInverseDiagonalOperatorValues_macro_2D.cpp => P2ElementwiseEpsilon_1_1_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilon_1_1_macro_2D.cpp} (99%) rename operators/epsilon/avx/{P2ElementwiseEpsilon_1_1_computeInverseDiagonalOperatorValues_macro_3D.cpp => P2ElementwiseEpsilon_1_1_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilon_1_1_macro_3D.cpp} (99%) rename operators/epsilon/avx/{P2ElementwiseEpsilon_1_2_apply_macro_3D.cpp => P2ElementwiseEpsilon_1_2_apply_P2ElementwiseEpsilon_1_2_macro_3D.cpp} (99%) rename operators/epsilon/avx/{P2ElementwiseEpsilon_2_0_apply_macro_3D.cpp => P2ElementwiseEpsilon_2_0_apply_P2ElementwiseEpsilon_2_0_macro_3D.cpp} (99%) rename operators/epsilon/avx/{P2ElementwiseEpsilon_2_1_apply_macro_3D.cpp => P2ElementwiseEpsilon_2_1_apply_P2ElementwiseEpsilon_2_1_macro_3D.cpp} (99%) rename operators/epsilon/avx/{P2ElementwiseEpsilon_2_2_apply_macro_3D.cpp => P2ElementwiseEpsilon_2_2_apply_P2ElementwiseEpsilon_2_2_macro_3D.cpp} (99%) rename operators/epsilon/avx/{P2ElementwiseEpsilon_2_2_computeInverseDiagonalOperatorValues_macro_3D.cpp => P2ElementwiseEpsilon_2_2_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilon_2_2_macro_3D.cpp} (99%) rename operators/epsilon/noarch/{P2ElementwiseEpsilonAnnulusMap_0_0_apply_macro_2D.cpp => P2ElementwiseEpsilonAnnulusMap_0_0_apply_P2ElementwiseEpsilonAnnulusMap_0_0_macro_2D.cpp} (98%) rename operators/epsilon/noarch/{P2ElementwiseEpsilonAnnulusMap_0_0_computeInverseDiagonalOperatorValues_macro_2D.cpp => P2ElementwiseEpsilonAnnulusMap_0_0_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilonAnnulusMap_0_0_macro_2D.cpp} (98%) rename operators/epsilon/noarch/{P2ElementwiseEpsilonAnnulusMap_0_0_toMatrix_macro_2D.cpp => P2ElementwiseEpsilonAnnulusMap_0_0_toMatrix_P2ElementwiseEpsilonAnnulusMap_0_0_macro_2D.cpp} (98%) rename operators/epsilon/noarch/{P2ElementwiseEpsilonAnnulusMap_0_1_apply_macro_2D.cpp => P2ElementwiseEpsilonAnnulusMap_0_1_apply_P2ElementwiseEpsilonAnnulusMap_0_1_macro_2D.cpp} (99%) rename operators/epsilon/noarch/{P2ElementwiseEpsilonAnnulusMap_0_1_toMatrix_macro_2D.cpp => P2ElementwiseEpsilonAnnulusMap_0_1_toMatrix_P2ElementwiseEpsilonAnnulusMap_0_1_macro_2D.cpp} (99%) rename operators/epsilon/noarch/{P2ElementwiseEpsilonAnnulusMap_1_0_apply_macro_2D.cpp => P2ElementwiseEpsilonAnnulusMap_1_0_apply_P2ElementwiseEpsilonAnnulusMap_1_0_macro_2D.cpp} (99%) rename operators/epsilon/noarch/{P2ElementwiseEpsilonAnnulusMap_1_0_toMatrix_macro_2D.cpp => P2ElementwiseEpsilonAnnulusMap_1_0_toMatrix_P2ElementwiseEpsilonAnnulusMap_1_0_macro_2D.cpp} (99%) rename operators/epsilon/noarch/{P2ElementwiseEpsilonAnnulusMap_1_1_apply_macro_2D.cpp => P2ElementwiseEpsilonAnnulusMap_1_1_apply_P2ElementwiseEpsilonAnnulusMap_1_1_macro_2D.cpp} (98%) rename operators/epsilon/noarch/{P2ElementwiseEpsilonAnnulusMap_1_1_computeInverseDiagonalOperatorValues_macro_2D.cpp => P2ElementwiseEpsilonAnnulusMap_1_1_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilonAnnulusMap_1_1_macro_2D.cpp} (98%) rename operators/epsilon/noarch/{P2ElementwiseEpsilonAnnulusMap_1_1_toMatrix_macro_2D.cpp => P2ElementwiseEpsilonAnnulusMap_1_1_toMatrix_P2ElementwiseEpsilonAnnulusMap_1_1_macro_2D.cpp} (98%) rename operators/epsilon/noarch/{P2ElementwiseEpsilonIcosahedralShellMap_0_0_apply_macro_3D.cpp => P2ElementwiseEpsilonIcosahedralShellMap_0_0_apply_P2ElementwiseEpsilonIcosahedralShellMap_0_0_macro_3D.cpp} (99%) rename operators/epsilon/noarch/{P2ElementwiseEpsilonIcosahedralShellMap_0_0_computeInverseDiagonalOperatorValues_macro_3D.cpp => P2ElementwiseEpsilonIcosahedralShellMap_0_0_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilonIcosahedralShellMap_0_0_macro_3D.cpp} (99%) rename operators/epsilon/noarch/{P2ElementwiseEpsilonIcosahedralShellMap_0_0_toMatrix_macro_3D.cpp => P2ElementwiseEpsilonIcosahedralShellMap_0_0_toMatrix_P2ElementwiseEpsilonIcosahedralShellMap_0_0_macro_3D.cpp} (99%) rename operators/epsilon/noarch/{P2ElementwiseEpsilonIcosahedralShellMap_0_1_apply_macro_3D.cpp => P2ElementwiseEpsilonIcosahedralShellMap_0_1_apply_P2ElementwiseEpsilonIcosahedralShellMap_0_1_macro_3D.cpp} (99%) rename operators/epsilon/noarch/{P2ElementwiseEpsilonIcosahedralShellMap_0_1_toMatrix_macro_3D.cpp => P2ElementwiseEpsilonIcosahedralShellMap_0_1_toMatrix_P2ElementwiseEpsilonIcosahedralShellMap_0_1_macro_3D.cpp} (99%) rename operators/epsilon/noarch/{P2ElementwiseEpsilonIcosahedralShellMap_0_2_apply_macro_3D.cpp => P2ElementwiseEpsilonIcosahedralShellMap_0_2_apply_P2ElementwiseEpsilonIcosahedralShellMap_0_2_macro_3D.cpp} (99%) rename operators/epsilon/noarch/{P2ElementwiseEpsilonIcosahedralShellMap_0_2_toMatrix_macro_3D.cpp => P2ElementwiseEpsilonIcosahedralShellMap_0_2_toMatrix_P2ElementwiseEpsilonIcosahedralShellMap_0_2_macro_3D.cpp} (99%) rename operators/epsilon/noarch/{P2ElementwiseEpsilonIcosahedralShellMap_1_0_apply_macro_3D.cpp => P2ElementwiseEpsilonIcosahedralShellMap_1_0_apply_P2ElementwiseEpsilonIcosahedralShellMap_1_0_macro_3D.cpp} (99%) rename operators/epsilon/noarch/{P2ElementwiseEpsilonIcosahedralShellMap_1_0_toMatrix_macro_3D.cpp => P2ElementwiseEpsilonIcosahedralShellMap_1_0_toMatrix_P2ElementwiseEpsilonIcosahedralShellMap_1_0_macro_3D.cpp} (99%) rename operators/epsilon/noarch/{P2ElementwiseEpsilonIcosahedralShellMap_1_1_apply_macro_3D.cpp => P2ElementwiseEpsilonIcosahedralShellMap_1_1_apply_P2ElementwiseEpsilonIcosahedralShellMap_1_1_macro_3D.cpp} (99%) rename operators/epsilon/noarch/{P2ElementwiseEpsilonIcosahedralShellMap_1_1_computeInverseDiagonalOperatorValues_macro_3D.cpp => P2ElementwiseEpsilonIcosahedralShellMap_1_1_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilonIcosahedralShellMap_1_1_macro_3D.cpp} (99%) rename operators/epsilon/noarch/{P2ElementwiseEpsilonIcosahedralShellMap_1_1_toMatrix_macro_3D.cpp => P2ElementwiseEpsilonIcosahedralShellMap_1_1_toMatrix_P2ElementwiseEpsilonIcosahedralShellMap_1_1_macro_3D.cpp} (99%) rename operators/epsilon/noarch/{P2ElementwiseEpsilonIcosahedralShellMap_1_2_apply_macro_3D.cpp => P2ElementwiseEpsilonIcosahedralShellMap_1_2_apply_P2ElementwiseEpsilonIcosahedralShellMap_1_2_macro_3D.cpp} (99%) rename operators/epsilon/noarch/{P2ElementwiseEpsilonIcosahedralShellMap_1_2_toMatrix_macro_3D.cpp => P2ElementwiseEpsilonIcosahedralShellMap_1_2_toMatrix_P2ElementwiseEpsilonIcosahedralShellMap_1_2_macro_3D.cpp} (99%) rename operators/epsilon/noarch/{P2ElementwiseEpsilonIcosahedralShellMap_2_0_apply_macro_3D.cpp => P2ElementwiseEpsilonIcosahedralShellMap_2_0_apply_P2ElementwiseEpsilonIcosahedralShellMap_2_0_macro_3D.cpp} (99%) rename operators/epsilon/noarch/{P2ElementwiseEpsilonIcosahedralShellMap_2_0_toMatrix_macro_3D.cpp => P2ElementwiseEpsilonIcosahedralShellMap_2_0_toMatrix_P2ElementwiseEpsilonIcosahedralShellMap_2_0_macro_3D.cpp} (99%) rename operators/epsilon/noarch/{P2ElementwiseEpsilonIcosahedralShellMap_2_1_apply_macro_3D.cpp => P2ElementwiseEpsilonIcosahedralShellMap_2_1_apply_P2ElementwiseEpsilonIcosahedralShellMap_2_1_macro_3D.cpp} (99%) rename operators/epsilon/noarch/{P2ElementwiseEpsilonIcosahedralShellMap_2_1_toMatrix_macro_3D.cpp => P2ElementwiseEpsilonIcosahedralShellMap_2_1_toMatrix_P2ElementwiseEpsilonIcosahedralShellMap_2_1_macro_3D.cpp} (99%) rename operators/epsilon/noarch/{P2ElementwiseEpsilonIcosahedralShellMap_2_2_apply_macro_3D.cpp => P2ElementwiseEpsilonIcosahedralShellMap_2_2_apply_P2ElementwiseEpsilonIcosahedralShellMap_2_2_macro_3D.cpp} (99%) rename operators/epsilon/noarch/{P2ElementwiseEpsilonIcosahedralShellMap_2_2_computeInverseDiagonalOperatorValues_macro_3D.cpp => P2ElementwiseEpsilonIcosahedralShellMap_2_2_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilonIcosahedralShellMap_2_2_macro_3D.cpp} (99%) rename operators/epsilon/noarch/{P2ElementwiseEpsilonIcosahedralShellMap_2_2_toMatrix_macro_3D.cpp => P2ElementwiseEpsilonIcosahedralShellMap_2_2_toMatrix_P2ElementwiseEpsilonIcosahedralShellMap_2_2_macro_3D.cpp} (99%) rename operators/epsilon/noarch/{P2ElementwiseEpsilon_0_0_apply_macro_2D.cpp => P2ElementwiseEpsilon_0_0_apply_P2ElementwiseEpsilon_0_0_macro_2D.cpp} (99%) rename operators/epsilon/noarch/{P2ElementwiseEpsilon_0_0_apply_macro_3D.cpp => P2ElementwiseEpsilon_0_0_apply_P2ElementwiseEpsilon_0_0_macro_3D.cpp} (99%) rename operators/epsilon/noarch/{P2ElementwiseEpsilon_0_0_computeInverseDiagonalOperatorValues_macro_2D.cpp => P2ElementwiseEpsilon_0_0_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilon_0_0_macro_2D.cpp} (99%) rename operators/epsilon/noarch/{P2ElementwiseEpsilon_0_0_computeInverseDiagonalOperatorValues_macro_3D.cpp => P2ElementwiseEpsilon_0_0_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilon_0_0_macro_3D.cpp} (99%) rename operators/epsilon/noarch/{P2ElementwiseEpsilon_0_0_toMatrix_macro_2D.cpp => P2ElementwiseEpsilon_0_0_toMatrix_P2ElementwiseEpsilon_0_0_macro_2D.cpp} (99%) rename operators/epsilon/noarch/{P2ElementwiseEpsilon_0_0_toMatrix_macro_3D.cpp => P2ElementwiseEpsilon_0_0_toMatrix_P2ElementwiseEpsilon_0_0_macro_3D.cpp} (99%) rename operators/epsilon/noarch/{P2ElementwiseEpsilon_0_1_apply_macro_2D.cpp => P2ElementwiseEpsilon_0_1_apply_P2ElementwiseEpsilon_0_1_macro_2D.cpp} (99%) rename operators/epsilon/noarch/{P2ElementwiseEpsilon_0_1_apply_macro_3D.cpp => P2ElementwiseEpsilon_0_1_apply_P2ElementwiseEpsilon_0_1_macro_3D.cpp} (99%) rename operators/epsilon/noarch/{P2ElementwiseEpsilon_0_1_toMatrix_macro_2D.cpp => P2ElementwiseEpsilon_0_1_toMatrix_P2ElementwiseEpsilon_0_1_macro_2D.cpp} (99%) rename operators/epsilon/noarch/{P2ElementwiseEpsilon_0_1_toMatrix_macro_3D.cpp => P2ElementwiseEpsilon_0_1_toMatrix_P2ElementwiseEpsilon_0_1_macro_3D.cpp} (99%) rename operators/epsilon/noarch/{P2ElementwiseEpsilon_0_2_apply_macro_3D.cpp => P2ElementwiseEpsilon_0_2_apply_P2ElementwiseEpsilon_0_2_macro_3D.cpp} (99%) rename operators/epsilon/noarch/{P2ElementwiseEpsilon_0_2_toMatrix_macro_3D.cpp => P2ElementwiseEpsilon_0_2_toMatrix_P2ElementwiseEpsilon_0_2_macro_3D.cpp} (99%) rename operators/epsilon/noarch/{P2ElementwiseEpsilon_1_0_apply_macro_2D.cpp => P2ElementwiseEpsilon_1_0_apply_P2ElementwiseEpsilon_1_0_macro_2D.cpp} (99%) rename operators/epsilon/noarch/{P2ElementwiseEpsilon_1_0_apply_macro_3D.cpp => P2ElementwiseEpsilon_1_0_apply_P2ElementwiseEpsilon_1_0_macro_3D.cpp} (99%) rename operators/epsilon/noarch/{P2ElementwiseEpsilon_1_0_toMatrix_macro_2D.cpp => P2ElementwiseEpsilon_1_0_toMatrix_P2ElementwiseEpsilon_1_0_macro_2D.cpp} (99%) rename operators/epsilon/noarch/{P2ElementwiseEpsilon_1_0_toMatrix_macro_3D.cpp => P2ElementwiseEpsilon_1_0_toMatrix_P2ElementwiseEpsilon_1_0_macro_3D.cpp} (99%) rename operators/epsilon/noarch/{P2ElementwiseEpsilon_1_1_apply_macro_2D.cpp => P2ElementwiseEpsilon_1_1_apply_P2ElementwiseEpsilon_1_1_macro_2D.cpp} (99%) rename operators/epsilon/noarch/{P2ElementwiseEpsilon_1_1_apply_macro_3D.cpp => P2ElementwiseEpsilon_1_1_apply_P2ElementwiseEpsilon_1_1_macro_3D.cpp} (99%) rename operators/epsilon/noarch/{P2ElementwiseEpsilon_1_1_computeInverseDiagonalOperatorValues_macro_2D.cpp => P2ElementwiseEpsilon_1_1_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilon_1_1_macro_2D.cpp} (99%) rename operators/epsilon/noarch/{P2ElementwiseEpsilon_1_1_computeInverseDiagonalOperatorValues_macro_3D.cpp => P2ElementwiseEpsilon_1_1_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilon_1_1_macro_3D.cpp} (99%) rename operators/epsilon/noarch/{P2ElementwiseEpsilon_1_1_toMatrix_macro_2D.cpp => P2ElementwiseEpsilon_1_1_toMatrix_P2ElementwiseEpsilon_1_1_macro_2D.cpp} (99%) rename operators/epsilon/noarch/{P2ElementwiseEpsilon_1_1_toMatrix_macro_3D.cpp => P2ElementwiseEpsilon_1_1_toMatrix_P2ElementwiseEpsilon_1_1_macro_3D.cpp} (99%) rename operators/epsilon/noarch/{P2ElementwiseEpsilon_1_2_apply_macro_3D.cpp => P2ElementwiseEpsilon_1_2_apply_P2ElementwiseEpsilon_1_2_macro_3D.cpp} (99%) rename operators/epsilon/noarch/{P2ElementwiseEpsilon_1_2_toMatrix_macro_3D.cpp => P2ElementwiseEpsilon_1_2_toMatrix_P2ElementwiseEpsilon_1_2_macro_3D.cpp} (99%) rename operators/epsilon/noarch/{P2ElementwiseEpsilon_2_0_apply_macro_3D.cpp => P2ElementwiseEpsilon_2_0_apply_P2ElementwiseEpsilon_2_0_macro_3D.cpp} (99%) rename operators/epsilon/noarch/{P2ElementwiseEpsilon_2_0_toMatrix_macro_3D.cpp => P2ElementwiseEpsilon_2_0_toMatrix_P2ElementwiseEpsilon_2_0_macro_3D.cpp} (99%) rename operators/epsilon/noarch/{P2ElementwiseEpsilon_2_1_apply_macro_3D.cpp => P2ElementwiseEpsilon_2_1_apply_P2ElementwiseEpsilon_2_1_macro_3D.cpp} (99%) rename operators/epsilon/noarch/{P2ElementwiseEpsilon_2_1_toMatrix_macro_3D.cpp => P2ElementwiseEpsilon_2_1_toMatrix_P2ElementwiseEpsilon_2_1_macro_3D.cpp} (99%) rename operators/epsilon/noarch/{P2ElementwiseEpsilon_2_2_apply_macro_3D.cpp => P2ElementwiseEpsilon_2_2_apply_P2ElementwiseEpsilon_2_2_macro_3D.cpp} (99%) rename operators/epsilon/noarch/{P2ElementwiseEpsilon_2_2_computeInverseDiagonalOperatorValues_macro_3D.cpp => P2ElementwiseEpsilon_2_2_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilon_2_2_macro_3D.cpp} (99%) rename operators/epsilon/noarch/{P2ElementwiseEpsilon_2_2_toMatrix_macro_3D.cpp => P2ElementwiseEpsilon_2_2_toMatrix_P2ElementwiseEpsilon_2_2_macro_3D.cpp} (99%) rename operators/full_stokes/avx/{P2ElementwiseFullStokesAnnulusMap_0_0_apply_macro_2D.cpp => P2ElementwiseFullStokesAnnulusMap_0_0_apply_P2ElementwiseFullStokesAnnulusMap_0_0_macro_2D.cpp} (99%) rename operators/full_stokes/avx/{P2ElementwiseFullStokesAnnulusMap_0_0_computeInverseDiagonalOperatorValues_macro_2D.cpp => P2ElementwiseFullStokesAnnulusMap_0_0_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokesAnnulusMap_0_0_macro_2D.cpp} (99%) rename operators/full_stokes/avx/{P2ElementwiseFullStokesAnnulusMap_0_1_apply_macro_2D.cpp => P2ElementwiseFullStokesAnnulusMap_0_1_apply_P2ElementwiseFullStokesAnnulusMap_0_1_macro_2D.cpp} (99%) rename operators/full_stokes/avx/{P2ElementwiseFullStokesAnnulusMap_1_0_apply_macro_2D.cpp => P2ElementwiseFullStokesAnnulusMap_1_0_apply_P2ElementwiseFullStokesAnnulusMap_1_0_macro_2D.cpp} (99%) rename operators/full_stokes/avx/{P2ElementwiseFullStokesAnnulusMap_1_1_apply_macro_2D.cpp => P2ElementwiseFullStokesAnnulusMap_1_1_apply_P2ElementwiseFullStokesAnnulusMap_1_1_macro_2D.cpp} (99%) rename operators/full_stokes/avx/{P2ElementwiseFullStokesAnnulusMap_1_1_computeInverseDiagonalOperatorValues_macro_2D.cpp => P2ElementwiseFullStokesAnnulusMap_1_1_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokesAnnulusMap_1_1_macro_2D.cpp} (99%) rename operators/full_stokes/avx/{P2ElementwiseFullStokesIcosahedralShellMap_0_0_apply_macro_3D.cpp => P2ElementwiseFullStokesIcosahedralShellMap_0_0_apply_P2ElementwiseFullStokesIcosahedralShellMap_0_0_macro_3D.cpp} (99%) rename operators/full_stokes/avx/{P2ElementwiseFullStokesIcosahedralShellMap_0_0_computeInverseDiagonalOperatorValues_macro_3D.cpp => P2ElementwiseFullStokesIcosahedralShellMap_0_0_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokesIcosahedralShellMap_0_0_macro_3D.cpp} (99%) rename operators/full_stokes/avx/{P2ElementwiseFullStokesIcosahedralShellMap_0_1_apply_macro_3D.cpp => P2ElementwiseFullStokesIcosahedralShellMap_0_1_apply_P2ElementwiseFullStokesIcosahedralShellMap_0_1_macro_3D.cpp} (99%) rename operators/full_stokes/avx/{P2ElementwiseFullStokesIcosahedralShellMap_0_2_apply_macro_3D.cpp => P2ElementwiseFullStokesIcosahedralShellMap_0_2_apply_P2ElementwiseFullStokesIcosahedralShellMap_0_2_macro_3D.cpp} (99%) rename operators/full_stokes/avx/{P2ElementwiseFullStokesIcosahedralShellMap_1_0_apply_macro_3D.cpp => P2ElementwiseFullStokesIcosahedralShellMap_1_0_apply_P2ElementwiseFullStokesIcosahedralShellMap_1_0_macro_3D.cpp} (99%) rename operators/full_stokes/avx/{P2ElementwiseFullStokesIcosahedralShellMap_1_1_apply_macro_3D.cpp => P2ElementwiseFullStokesIcosahedralShellMap_1_1_apply_P2ElementwiseFullStokesIcosahedralShellMap_1_1_macro_3D.cpp} (99%) rename operators/full_stokes/avx/{P2ElementwiseFullStokesIcosahedralShellMap_1_1_computeInverseDiagonalOperatorValues_macro_3D.cpp => P2ElementwiseFullStokesIcosahedralShellMap_1_1_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokesIcosahedralShellMap_1_1_macro_3D.cpp} (99%) rename operators/full_stokes/avx/{P2ElementwiseFullStokesIcosahedralShellMap_1_2_apply_macro_3D.cpp => P2ElementwiseFullStokesIcosahedralShellMap_1_2_apply_P2ElementwiseFullStokesIcosahedralShellMap_1_2_macro_3D.cpp} (99%) rename operators/full_stokes/avx/{P2ElementwiseFullStokesIcosahedralShellMap_2_0_apply_macro_3D.cpp => P2ElementwiseFullStokesIcosahedralShellMap_2_0_apply_P2ElementwiseFullStokesIcosahedralShellMap_2_0_macro_3D.cpp} (99%) rename operators/full_stokes/avx/{P2ElementwiseFullStokesIcosahedralShellMap_2_1_apply_macro_3D.cpp => P2ElementwiseFullStokesIcosahedralShellMap_2_1_apply_P2ElementwiseFullStokesIcosahedralShellMap_2_1_macro_3D.cpp} (99%) rename operators/full_stokes/avx/{P2ElementwiseFullStokesIcosahedralShellMap_2_2_apply_macro_3D.cpp => P2ElementwiseFullStokesIcosahedralShellMap_2_2_apply_P2ElementwiseFullStokesIcosahedralShellMap_2_2_macro_3D.cpp} (99%) rename operators/full_stokes/avx/{P2ElementwiseFullStokesIcosahedralShellMap_2_2_computeInverseDiagonalOperatorValues_macro_3D.cpp => P2ElementwiseFullStokesIcosahedralShellMap_2_2_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokesIcosahedralShellMap_2_2_macro_3D.cpp} (99%) rename operators/full_stokes/avx/{P2ElementwiseFullStokes_0_0_apply_macro_2D.cpp => P2ElementwiseFullStokes_0_0_apply_P2ElementwiseFullStokes_0_0_macro_2D.cpp} (99%) rename operators/full_stokes/avx/{P2ElementwiseFullStokes_0_0_apply_macro_3D.cpp => P2ElementwiseFullStokes_0_0_apply_P2ElementwiseFullStokes_0_0_macro_3D.cpp} (99%) rename operators/full_stokes/avx/{P2ElementwiseFullStokes_0_0_computeInverseDiagonalOperatorValues_macro_2D.cpp => P2ElementwiseFullStokes_0_0_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokes_0_0_macro_2D.cpp} (99%) rename operators/full_stokes/avx/{P2ElementwiseFullStokes_0_0_computeInverseDiagonalOperatorValues_macro_3D.cpp => P2ElementwiseFullStokes_0_0_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokes_0_0_macro_3D.cpp} (99%) rename operators/full_stokes/avx/{P2ElementwiseFullStokes_0_1_apply_macro_2D.cpp => P2ElementwiseFullStokes_0_1_apply_P2ElementwiseFullStokes_0_1_macro_2D.cpp} (99%) rename operators/full_stokes/avx/{P2ElementwiseFullStokes_0_1_apply_macro_3D.cpp => P2ElementwiseFullStokes_0_1_apply_P2ElementwiseFullStokes_0_1_macro_3D.cpp} (99%) rename operators/full_stokes/avx/{P2ElementwiseFullStokes_0_2_apply_macro_3D.cpp => P2ElementwiseFullStokes_0_2_apply_P2ElementwiseFullStokes_0_2_macro_3D.cpp} (99%) rename operators/full_stokes/avx/{P2ElementwiseFullStokes_1_0_apply_macro_2D.cpp => P2ElementwiseFullStokes_1_0_apply_P2ElementwiseFullStokes_1_0_macro_2D.cpp} (99%) rename operators/full_stokes/avx/{P2ElementwiseFullStokes_1_0_apply_macro_3D.cpp => P2ElementwiseFullStokes_1_0_apply_P2ElementwiseFullStokes_1_0_macro_3D.cpp} (99%) rename operators/full_stokes/avx/{P2ElementwiseFullStokes_1_1_apply_macro_2D.cpp => P2ElementwiseFullStokes_1_1_apply_P2ElementwiseFullStokes_1_1_macro_2D.cpp} (99%) rename operators/full_stokes/avx/{P2ElementwiseFullStokes_1_1_apply_macro_3D.cpp => P2ElementwiseFullStokes_1_1_apply_P2ElementwiseFullStokes_1_1_macro_3D.cpp} (99%) rename operators/full_stokes/avx/{P2ElementwiseFullStokes_1_1_computeInverseDiagonalOperatorValues_macro_2D.cpp => P2ElementwiseFullStokes_1_1_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokes_1_1_macro_2D.cpp} (99%) rename operators/full_stokes/avx/{P2ElementwiseFullStokes_1_1_computeInverseDiagonalOperatorValues_macro_3D.cpp => P2ElementwiseFullStokes_1_1_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokes_1_1_macro_3D.cpp} (99%) rename operators/full_stokes/avx/{P2ElementwiseFullStokes_1_2_apply_macro_3D.cpp => P2ElementwiseFullStokes_1_2_apply_P2ElementwiseFullStokes_1_2_macro_3D.cpp} (99%) rename operators/full_stokes/avx/{P2ElementwiseFullStokes_2_0_apply_macro_3D.cpp => P2ElementwiseFullStokes_2_0_apply_P2ElementwiseFullStokes_2_0_macro_3D.cpp} (99%) rename operators/full_stokes/avx/{P2ElementwiseFullStokes_2_1_apply_macro_3D.cpp => P2ElementwiseFullStokes_2_1_apply_P2ElementwiseFullStokes_2_1_macro_3D.cpp} (99%) rename operators/full_stokes/avx/{P2ElementwiseFullStokes_2_2_apply_macro_3D.cpp => P2ElementwiseFullStokes_2_2_apply_P2ElementwiseFullStokes_2_2_macro_3D.cpp} (99%) rename operators/full_stokes/avx/{P2ElementwiseFullStokes_2_2_computeInverseDiagonalOperatorValues_macro_3D.cpp => P2ElementwiseFullStokes_2_2_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokes_2_2_macro_3D.cpp} (99%) rename operators/full_stokes/noarch/{P2ElementwiseFullStokesAnnulusMap_0_0_apply_macro_2D.cpp => P2ElementwiseFullStokesAnnulusMap_0_0_apply_P2ElementwiseFullStokesAnnulusMap_0_0_macro_2D.cpp} (99%) rename operators/full_stokes/noarch/{P2ElementwiseFullStokesAnnulusMap_0_0_computeInverseDiagonalOperatorValues_macro_2D.cpp => P2ElementwiseFullStokesAnnulusMap_0_0_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokesAnnulusMap_0_0_macro_2D.cpp} (98%) rename operators/full_stokes/noarch/{P2ElementwiseFullStokesAnnulusMap_0_0_toMatrix_macro_2D.cpp => P2ElementwiseFullStokesAnnulusMap_0_0_toMatrix_P2ElementwiseFullStokesAnnulusMap_0_0_macro_2D.cpp} (99%) rename operators/full_stokes/noarch/{P2ElementwiseFullStokesAnnulusMap_0_1_apply_macro_2D.cpp => P2ElementwiseFullStokesAnnulusMap_0_1_apply_P2ElementwiseFullStokesAnnulusMap_0_1_macro_2D.cpp} (99%) rename operators/full_stokes/noarch/{P2ElementwiseFullStokesAnnulusMap_0_1_toMatrix_macro_2D.cpp => P2ElementwiseFullStokesAnnulusMap_0_1_toMatrix_P2ElementwiseFullStokesAnnulusMap_0_1_macro_2D.cpp} (99%) rename operators/full_stokes/noarch/{P2ElementwiseFullStokesAnnulusMap_1_0_apply_macro_2D.cpp => P2ElementwiseFullStokesAnnulusMap_1_0_apply_P2ElementwiseFullStokesAnnulusMap_1_0_macro_2D.cpp} (99%) rename operators/full_stokes/noarch/{P2ElementwiseFullStokesAnnulusMap_1_0_toMatrix_macro_2D.cpp => P2ElementwiseFullStokesAnnulusMap_1_0_toMatrix_P2ElementwiseFullStokesAnnulusMap_1_0_macro_2D.cpp} (99%) rename operators/full_stokes/noarch/{P2ElementwiseFullStokesAnnulusMap_1_1_apply_macro_2D.cpp => P2ElementwiseFullStokesAnnulusMap_1_1_apply_P2ElementwiseFullStokesAnnulusMap_1_1_macro_2D.cpp} (99%) rename operators/full_stokes/noarch/{P2ElementwiseFullStokesAnnulusMap_1_1_computeInverseDiagonalOperatorValues_macro_2D.cpp => P2ElementwiseFullStokesAnnulusMap_1_1_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokesAnnulusMap_1_1_macro_2D.cpp} (98%) rename operators/full_stokes/noarch/{P2ElementwiseFullStokesAnnulusMap_1_1_toMatrix_macro_2D.cpp => P2ElementwiseFullStokesAnnulusMap_1_1_toMatrix_P2ElementwiseFullStokesAnnulusMap_1_1_macro_2D.cpp} (99%) rename operators/full_stokes/noarch/{P2ElementwiseFullStokesIcosahedralShellMap_0_0_apply_macro_3D.cpp => P2ElementwiseFullStokesIcosahedralShellMap_0_0_apply_P2ElementwiseFullStokesIcosahedralShellMap_0_0_macro_3D.cpp} (99%) rename operators/full_stokes/noarch/{P2ElementwiseFullStokesIcosahedralShellMap_0_0_computeInverseDiagonalOperatorValues_macro_3D.cpp => P2ElementwiseFullStokesIcosahedralShellMap_0_0_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokesIcosahedralShellMap_0_0_macro_3D.cpp} (99%) rename operators/full_stokes/noarch/{P2ElementwiseFullStokesIcosahedralShellMap_0_0_toMatrix_macro_3D.cpp => P2ElementwiseFullStokesIcosahedralShellMap_0_0_toMatrix_P2ElementwiseFullStokesIcosahedralShellMap_0_0_macro_3D.cpp} (99%) rename operators/full_stokes/noarch/{P2ElementwiseFullStokesIcosahedralShellMap_0_1_apply_macro_3D.cpp => P2ElementwiseFullStokesIcosahedralShellMap_0_1_apply_P2ElementwiseFullStokesIcosahedralShellMap_0_1_macro_3D.cpp} (99%) rename operators/full_stokes/noarch/{P2ElementwiseFullStokesIcosahedralShellMap_0_1_toMatrix_macro_3D.cpp => P2ElementwiseFullStokesIcosahedralShellMap_0_1_toMatrix_P2ElementwiseFullStokesIcosahedralShellMap_0_1_macro_3D.cpp} (99%) rename operators/full_stokes/noarch/{P2ElementwiseFullStokesIcosahedralShellMap_0_2_apply_macro_3D.cpp => P2ElementwiseFullStokesIcosahedralShellMap_0_2_apply_P2ElementwiseFullStokesIcosahedralShellMap_0_2_macro_3D.cpp} (99%) rename operators/full_stokes/noarch/{P2ElementwiseFullStokesIcosahedralShellMap_0_2_toMatrix_macro_3D.cpp => P2ElementwiseFullStokesIcosahedralShellMap_0_2_toMatrix_P2ElementwiseFullStokesIcosahedralShellMap_0_2_macro_3D.cpp} (99%) rename operators/full_stokes/noarch/{P2ElementwiseFullStokesIcosahedralShellMap_1_0_apply_macro_3D.cpp => P2ElementwiseFullStokesIcosahedralShellMap_1_0_apply_P2ElementwiseFullStokesIcosahedralShellMap_1_0_macro_3D.cpp} (99%) rename operators/full_stokes/noarch/{P2ElementwiseFullStokesIcosahedralShellMap_1_0_toMatrix_macro_3D.cpp => P2ElementwiseFullStokesIcosahedralShellMap_1_0_toMatrix_P2ElementwiseFullStokesIcosahedralShellMap_1_0_macro_3D.cpp} (99%) rename operators/full_stokes/noarch/{P2ElementwiseFullStokesIcosahedralShellMap_1_1_apply_macro_3D.cpp => P2ElementwiseFullStokesIcosahedralShellMap_1_1_apply_P2ElementwiseFullStokesIcosahedralShellMap_1_1_macro_3D.cpp} (99%) rename operators/full_stokes/noarch/{P2ElementwiseFullStokesIcosahedralShellMap_1_1_computeInverseDiagonalOperatorValues_macro_3D.cpp => P2ElementwiseFullStokesIcosahedralShellMap_1_1_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokesIcosahedralShellMap_1_1_macro_3D.cpp} (99%) rename operators/full_stokes/noarch/{P2ElementwiseFullStokesIcosahedralShellMap_1_1_toMatrix_macro_3D.cpp => P2ElementwiseFullStokesIcosahedralShellMap_1_1_toMatrix_P2ElementwiseFullStokesIcosahedralShellMap_1_1_macro_3D.cpp} (99%) rename operators/full_stokes/noarch/{P2ElementwiseFullStokesIcosahedralShellMap_1_2_apply_macro_3D.cpp => P2ElementwiseFullStokesIcosahedralShellMap_1_2_apply_P2ElementwiseFullStokesIcosahedralShellMap_1_2_macro_3D.cpp} (99%) rename operators/full_stokes/noarch/{P2ElementwiseFullStokesIcosahedralShellMap_1_2_toMatrix_macro_3D.cpp => P2ElementwiseFullStokesIcosahedralShellMap_1_2_toMatrix_P2ElementwiseFullStokesIcosahedralShellMap_1_2_macro_3D.cpp} (99%) rename operators/full_stokes/noarch/{P2ElementwiseFullStokesIcosahedralShellMap_2_0_apply_macro_3D.cpp => P2ElementwiseFullStokesIcosahedralShellMap_2_0_apply_P2ElementwiseFullStokesIcosahedralShellMap_2_0_macro_3D.cpp} (99%) rename operators/full_stokes/noarch/{P2ElementwiseFullStokesIcosahedralShellMap_2_0_toMatrix_macro_3D.cpp => P2ElementwiseFullStokesIcosahedralShellMap_2_0_toMatrix_P2ElementwiseFullStokesIcosahedralShellMap_2_0_macro_3D.cpp} (99%) rename operators/full_stokes/noarch/{P2ElementwiseFullStokesIcosahedralShellMap_2_1_apply_macro_3D.cpp => P2ElementwiseFullStokesIcosahedralShellMap_2_1_apply_P2ElementwiseFullStokesIcosahedralShellMap_2_1_macro_3D.cpp} (99%) rename operators/full_stokes/noarch/{P2ElementwiseFullStokesIcosahedralShellMap_2_1_toMatrix_macro_3D.cpp => P2ElementwiseFullStokesIcosahedralShellMap_2_1_toMatrix_P2ElementwiseFullStokesIcosahedralShellMap_2_1_macro_3D.cpp} (99%) rename operators/full_stokes/noarch/{P2ElementwiseFullStokesIcosahedralShellMap_2_2_apply_macro_3D.cpp => P2ElementwiseFullStokesIcosahedralShellMap_2_2_apply_P2ElementwiseFullStokesIcosahedralShellMap_2_2_macro_3D.cpp} (99%) rename operators/full_stokes/noarch/{P2ElementwiseFullStokesIcosahedralShellMap_2_2_computeInverseDiagonalOperatorValues_macro_3D.cpp => P2ElementwiseFullStokesIcosahedralShellMap_2_2_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokesIcosahedralShellMap_2_2_macro_3D.cpp} (99%) rename operators/full_stokes/noarch/{P2ElementwiseFullStokesIcosahedralShellMap_2_2_toMatrix_macro_3D.cpp => P2ElementwiseFullStokesIcosahedralShellMap_2_2_toMatrix_P2ElementwiseFullStokesIcosahedralShellMap_2_2_macro_3D.cpp} (99%) rename operators/full_stokes/noarch/{P2ElementwiseFullStokes_0_0_apply_macro_2D.cpp => P2ElementwiseFullStokes_0_0_apply_P2ElementwiseFullStokes_0_0_macro_2D.cpp} (99%) rename operators/full_stokes/noarch/{P2ElementwiseFullStokes_0_0_apply_macro_3D.cpp => P2ElementwiseFullStokes_0_0_apply_P2ElementwiseFullStokes_0_0_macro_3D.cpp} (99%) rename operators/full_stokes/noarch/{P2ElementwiseFullStokes_0_0_computeInverseDiagonalOperatorValues_macro_2D.cpp => P2ElementwiseFullStokes_0_0_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokes_0_0_macro_2D.cpp} (99%) rename operators/full_stokes/noarch/{P2ElementwiseFullStokes_0_0_computeInverseDiagonalOperatorValues_macro_3D.cpp => P2ElementwiseFullStokes_0_0_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokes_0_0_macro_3D.cpp} (99%) rename operators/full_stokes/noarch/{P2ElementwiseFullStokes_0_0_toMatrix_macro_2D.cpp => P2ElementwiseFullStokes_0_0_toMatrix_P2ElementwiseFullStokes_0_0_macro_2D.cpp} (99%) rename operators/full_stokes/noarch/{P2ElementwiseFullStokes_0_0_toMatrix_macro_3D.cpp => P2ElementwiseFullStokes_0_0_toMatrix_P2ElementwiseFullStokes_0_0_macro_3D.cpp} (99%) rename operators/full_stokes/noarch/{P2ElementwiseFullStokes_0_1_apply_macro_2D.cpp => P2ElementwiseFullStokes_0_1_apply_P2ElementwiseFullStokes_0_1_macro_2D.cpp} (99%) rename operators/full_stokes/noarch/{P2ElementwiseFullStokes_0_1_apply_macro_3D.cpp => P2ElementwiseFullStokes_0_1_apply_P2ElementwiseFullStokes_0_1_macro_3D.cpp} (99%) rename operators/full_stokes/noarch/{P2ElementwiseFullStokes_0_1_toMatrix_macro_2D.cpp => P2ElementwiseFullStokes_0_1_toMatrix_P2ElementwiseFullStokes_0_1_macro_2D.cpp} (99%) rename operators/full_stokes/noarch/{P2ElementwiseFullStokes_0_1_toMatrix_macro_3D.cpp => P2ElementwiseFullStokes_0_1_toMatrix_P2ElementwiseFullStokes_0_1_macro_3D.cpp} (99%) rename operators/full_stokes/noarch/{P2ElementwiseFullStokes_0_2_apply_macro_3D.cpp => P2ElementwiseFullStokes_0_2_apply_P2ElementwiseFullStokes_0_2_macro_3D.cpp} (99%) rename operators/full_stokes/noarch/{P2ElementwiseFullStokes_0_2_toMatrix_macro_3D.cpp => P2ElementwiseFullStokes_0_2_toMatrix_P2ElementwiseFullStokes_0_2_macro_3D.cpp} (99%) rename operators/full_stokes/noarch/{P2ElementwiseFullStokes_1_0_apply_macro_2D.cpp => P2ElementwiseFullStokes_1_0_apply_P2ElementwiseFullStokes_1_0_macro_2D.cpp} (99%) rename operators/full_stokes/noarch/{P2ElementwiseFullStokes_1_0_apply_macro_3D.cpp => P2ElementwiseFullStokes_1_0_apply_P2ElementwiseFullStokes_1_0_macro_3D.cpp} (99%) rename operators/full_stokes/noarch/{P2ElementwiseFullStokes_1_0_toMatrix_macro_2D.cpp => P2ElementwiseFullStokes_1_0_toMatrix_P2ElementwiseFullStokes_1_0_macro_2D.cpp} (99%) rename operators/full_stokes/noarch/{P2ElementwiseFullStokes_1_0_toMatrix_macro_3D.cpp => P2ElementwiseFullStokes_1_0_toMatrix_P2ElementwiseFullStokes_1_0_macro_3D.cpp} (99%) rename operators/full_stokes/noarch/{P2ElementwiseFullStokes_1_1_apply_macro_2D.cpp => P2ElementwiseFullStokes_1_1_apply_P2ElementwiseFullStokes_1_1_macro_2D.cpp} (99%) rename operators/full_stokes/noarch/{P2ElementwiseFullStokes_1_1_apply_macro_3D.cpp => P2ElementwiseFullStokes_1_1_apply_P2ElementwiseFullStokes_1_1_macro_3D.cpp} (99%) rename operators/full_stokes/noarch/{P2ElementwiseFullStokes_1_1_computeInverseDiagonalOperatorValues_macro_2D.cpp => P2ElementwiseFullStokes_1_1_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokes_1_1_macro_2D.cpp} (99%) rename operators/full_stokes/noarch/{P2ElementwiseFullStokes_1_1_computeInverseDiagonalOperatorValues_macro_3D.cpp => P2ElementwiseFullStokes_1_1_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokes_1_1_macro_3D.cpp} (99%) rename operators/full_stokes/noarch/{P2ElementwiseFullStokes_1_1_toMatrix_macro_2D.cpp => P2ElementwiseFullStokes_1_1_toMatrix_P2ElementwiseFullStokes_1_1_macro_2D.cpp} (99%) rename operators/full_stokes/noarch/{P2ElementwiseFullStokes_1_1_toMatrix_macro_3D.cpp => P2ElementwiseFullStokes_1_1_toMatrix_P2ElementwiseFullStokes_1_1_macro_3D.cpp} (99%) rename operators/full_stokes/noarch/{P2ElementwiseFullStokes_1_2_apply_macro_3D.cpp => P2ElementwiseFullStokes_1_2_apply_P2ElementwiseFullStokes_1_2_macro_3D.cpp} (99%) rename operators/full_stokes/noarch/{P2ElementwiseFullStokes_1_2_toMatrix_macro_3D.cpp => P2ElementwiseFullStokes_1_2_toMatrix_P2ElementwiseFullStokes_1_2_macro_3D.cpp} (99%) rename operators/full_stokes/noarch/{P2ElementwiseFullStokes_2_0_apply_macro_3D.cpp => P2ElementwiseFullStokes_2_0_apply_P2ElementwiseFullStokes_2_0_macro_3D.cpp} (99%) rename operators/full_stokes/noarch/{P2ElementwiseFullStokes_2_0_toMatrix_macro_3D.cpp => P2ElementwiseFullStokes_2_0_toMatrix_P2ElementwiseFullStokes_2_0_macro_3D.cpp} (99%) rename operators/full_stokes/noarch/{P2ElementwiseFullStokes_2_1_apply_macro_3D.cpp => P2ElementwiseFullStokes_2_1_apply_P2ElementwiseFullStokes_2_1_macro_3D.cpp} (99%) rename operators/full_stokes/noarch/{P2ElementwiseFullStokes_2_1_toMatrix_macro_3D.cpp => P2ElementwiseFullStokes_2_1_toMatrix_P2ElementwiseFullStokes_2_1_macro_3D.cpp} (99%) rename operators/full_stokes/noarch/{P2ElementwiseFullStokes_2_2_apply_macro_3D.cpp => P2ElementwiseFullStokes_2_2_apply_P2ElementwiseFullStokes_2_2_macro_3D.cpp} (99%) rename operators/full_stokes/noarch/{P2ElementwiseFullStokes_2_2_computeInverseDiagonalOperatorValues_macro_3D.cpp => P2ElementwiseFullStokes_2_2_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokes_2_2_macro_3D.cpp} (99%) rename operators/full_stokes/noarch/{P2ElementwiseFullStokes_2_2_toMatrix_macro_3D.cpp => P2ElementwiseFullStokes_2_2_toMatrix_P2ElementwiseFullStokes_2_2_macro_3D.cpp} (99%) create mode 100644 operators/grad_rho_by_rho_dot_u/avx/P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap_apply_P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap_macro_2D.cpp create mode 100644 operators/grad_rho_by_rho_dot_u/avx/P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap_apply_P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap_macro_3D.cpp create mode 100644 operators/grad_rho_by_rho_dot_u/avx/P2VectorToP1ElementwiseGradRhoByRhoDotU_apply_P2VectorToP1ElementwiseGradRhoByRhoDotU_macro_2D.cpp create mode 100644 operators/grad_rho_by_rho_dot_u/avx/P2VectorToP1ElementwiseGradRhoByRhoDotU_apply_P2VectorToP1ElementwiseGradRhoByRhoDotU_macro_3D.cpp rename operators/grad_rho_by_rho_dot_u/noarch/{P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap_apply_macro_2D.cpp => P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap_apply_P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap_macro_2D.cpp} (78%) rename operators/grad_rho_by_rho_dot_u/noarch/{P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap_toMatrix_macro_2D.cpp => P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap_toMatrix_P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap_macro_2D.cpp} (80%) rename operators/grad_rho_by_rho_dot_u/noarch/{P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap_apply_macro_3D.cpp => P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap_apply_P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap_macro_3D.cpp} (82%) rename operators/grad_rho_by_rho_dot_u/noarch/{P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap_toMatrix_macro_3D.cpp => P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap_toMatrix_P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap_macro_3D.cpp} (84%) rename operators/grad_rho_by_rho_dot_u/noarch/{P2VectorToP1ElementwiseGradRhoByRhoDotU_apply_macro_2D.cpp => P2VectorToP1ElementwiseGradRhoByRhoDotU_apply_P2VectorToP1ElementwiseGradRhoByRhoDotU_macro_2D.cpp} (74%) rename operators/grad_rho_by_rho_dot_u/noarch/{P2VectorToP1ElementwiseGradRhoByRhoDotU_apply_macro_3D.cpp => P2VectorToP1ElementwiseGradRhoByRhoDotU_apply_P2VectorToP1ElementwiseGradRhoByRhoDotU_macro_3D.cpp} (78%) rename operators/grad_rho_by_rho_dot_u/noarch/{P2VectorToP1ElementwiseGradRhoByRhoDotU_toMatrix_macro_2D.cpp => P2VectorToP1ElementwiseGradRhoByRhoDotU_toMatrix_P2VectorToP1ElementwiseGradRhoByRhoDotU_macro_2D.cpp} (77%) rename operators/grad_rho_by_rho_dot_u/noarch/{P2VectorToP1ElementwiseGradRhoByRhoDotU_toMatrix_macro_3D.cpp => P2VectorToP1ElementwiseGradRhoByRhoDotU_toMatrix_P2VectorToP1ElementwiseGradRhoByRhoDotU_macro_3D.cpp} (81%) rename operators/gradient/avx/{P1ToP2ElementwiseGradientAnnulusMap_0_0_apply_macro_2D.cpp => P1ToP2ElementwiseGradientAnnulusMap_0_0_apply_P1ToP2ElementwiseGradientAnnulusMap_0_0_macro_2D.cpp} (99%) rename operators/gradient/avx/{P1ToP2ElementwiseGradientAnnulusMap_1_0_apply_macro_2D.cpp => P1ToP2ElementwiseGradientAnnulusMap_1_0_apply_P1ToP2ElementwiseGradientAnnulusMap_1_0_macro_2D.cpp} (99%) rename operators/gradient/avx/{P1ToP2ElementwiseGradientIcosahedralShellMap_0_0_apply_macro_3D.cpp => P1ToP2ElementwiseGradientIcosahedralShellMap_0_0_apply_P1ToP2ElementwiseGradientIcosahedralShellMap_0_0_macro_3D.cpp} (99%) rename operators/gradient/avx/{P1ToP2ElementwiseGradientIcosahedralShellMap_1_0_apply_macro_3D.cpp => P1ToP2ElementwiseGradientIcosahedralShellMap_1_0_apply_P1ToP2ElementwiseGradientIcosahedralShellMap_1_0_macro_3D.cpp} (99%) rename operators/gradient/avx/{P1ToP2ElementwiseGradientIcosahedralShellMap_2_0_apply_macro_3D.cpp => P1ToP2ElementwiseGradientIcosahedralShellMap_2_0_apply_P1ToP2ElementwiseGradientIcosahedralShellMap_2_0_macro_3D.cpp} (99%) rename operators/gradient/avx/{P1ToP2ElementwiseGradient_0_0_apply_macro_2D.cpp => P1ToP2ElementwiseGradient_0_0_apply_P1ToP2ElementwiseGradient_0_0_macro_2D.cpp} (99%) rename operators/gradient/avx/{P1ToP2ElementwiseGradient_0_0_apply_macro_3D.cpp => P1ToP2ElementwiseGradient_0_0_apply_P1ToP2ElementwiseGradient_0_0_macro_3D.cpp} (99%) rename operators/gradient/avx/{P1ToP2ElementwiseGradient_1_0_apply_macro_2D.cpp => P1ToP2ElementwiseGradient_1_0_apply_P1ToP2ElementwiseGradient_1_0_macro_2D.cpp} (99%) rename operators/gradient/avx/{P1ToP2ElementwiseGradient_1_0_apply_macro_3D.cpp => P1ToP2ElementwiseGradient_1_0_apply_P1ToP2ElementwiseGradient_1_0_macro_3D.cpp} (99%) rename operators/gradient/avx/{P1ToP2ElementwiseGradient_2_0_apply_macro_3D.cpp => P1ToP2ElementwiseGradient_2_0_apply_P1ToP2ElementwiseGradient_2_0_macro_3D.cpp} (99%) rename operators/gradient/noarch/{P1ToP2ElementwiseGradientAnnulusMap_0_0_apply_macro_2D.cpp => P1ToP2ElementwiseGradientAnnulusMap_0_0_apply_P1ToP2ElementwiseGradientAnnulusMap_0_0_macro_2D.cpp} (98%) rename operators/gradient/noarch/{P1ToP2ElementwiseGradientAnnulusMap_0_0_toMatrix_macro_2D.cpp => P1ToP2ElementwiseGradientAnnulusMap_0_0_toMatrix_P1ToP2ElementwiseGradientAnnulusMap_0_0_macro_2D.cpp} (98%) rename operators/gradient/noarch/{P1ToP2ElementwiseGradientAnnulusMap_1_0_apply_macro_2D.cpp => P1ToP2ElementwiseGradientAnnulusMap_1_0_apply_P1ToP2ElementwiseGradientAnnulusMap_1_0_macro_2D.cpp} (98%) rename operators/gradient/noarch/{P1ToP2ElementwiseGradientAnnulusMap_1_0_toMatrix_macro_2D.cpp => P1ToP2ElementwiseGradientAnnulusMap_1_0_toMatrix_P1ToP2ElementwiseGradientAnnulusMap_1_0_macro_2D.cpp} (98%) rename operators/gradient/noarch/{P1ToP2ElementwiseGradientIcosahedralShellMap_0_0_apply_macro_3D.cpp => P1ToP2ElementwiseGradientIcosahedralShellMap_0_0_apply_P1ToP2ElementwiseGradientIcosahedralShellMap_0_0_macro_3D.cpp} (99%) rename operators/gradient/noarch/{P1ToP2ElementwiseGradientIcosahedralShellMap_0_0_toMatrix_macro_3D.cpp => P1ToP2ElementwiseGradientIcosahedralShellMap_0_0_toMatrix_P1ToP2ElementwiseGradientIcosahedralShellMap_0_0_macro_3D.cpp} (99%) rename operators/gradient/noarch/{P1ToP2ElementwiseGradientIcosahedralShellMap_1_0_apply_macro_3D.cpp => P1ToP2ElementwiseGradientIcosahedralShellMap_1_0_apply_P1ToP2ElementwiseGradientIcosahedralShellMap_1_0_macro_3D.cpp} (99%) rename operators/gradient/noarch/{P1ToP2ElementwiseGradientIcosahedralShellMap_1_0_toMatrix_macro_3D.cpp => P1ToP2ElementwiseGradientIcosahedralShellMap_1_0_toMatrix_P1ToP2ElementwiseGradientIcosahedralShellMap_1_0_macro_3D.cpp} (99%) rename operators/gradient/noarch/{P1ToP2ElementwiseGradientIcosahedralShellMap_2_0_apply_macro_3D.cpp => P1ToP2ElementwiseGradientIcosahedralShellMap_2_0_apply_P1ToP2ElementwiseGradientIcosahedralShellMap_2_0_macro_3D.cpp} (99%) rename operators/gradient/noarch/{P1ToP2ElementwiseGradientIcosahedralShellMap_2_0_toMatrix_macro_3D.cpp => P1ToP2ElementwiseGradientIcosahedralShellMap_2_0_toMatrix_P1ToP2ElementwiseGradientIcosahedralShellMap_2_0_macro_3D.cpp} (99%) rename operators/gradient/noarch/{P1ToP2ElementwiseGradient_0_0_apply_macro_2D.cpp => P1ToP2ElementwiseGradient_0_0_apply_P1ToP2ElementwiseGradient_0_0_macro_2D.cpp} (98%) rename operators/gradient/noarch/{P1ToP2ElementwiseGradient_0_0_apply_macro_3D.cpp => P1ToP2ElementwiseGradient_0_0_apply_P1ToP2ElementwiseGradient_0_0_macro_3D.cpp} (99%) rename operators/gradient/noarch/{P1ToP2ElementwiseGradient_0_0_toMatrix_macro_2D.cpp => P1ToP2ElementwiseGradient_0_0_toMatrix_P1ToP2ElementwiseGradient_0_0_macro_2D.cpp} (98%) rename operators/gradient/noarch/{P1ToP2ElementwiseGradient_0_0_toMatrix_macro_3D.cpp => P1ToP2ElementwiseGradient_0_0_toMatrix_P1ToP2ElementwiseGradient_0_0_macro_3D.cpp} (99%) rename operators/gradient/noarch/{P1ToP2ElementwiseGradient_1_0_apply_macro_2D.cpp => P1ToP2ElementwiseGradient_1_0_apply_P1ToP2ElementwiseGradient_1_0_macro_2D.cpp} (98%) rename operators/gradient/noarch/{P1ToP2ElementwiseGradient_1_0_apply_macro_3D.cpp => P1ToP2ElementwiseGradient_1_0_apply_P1ToP2ElementwiseGradient_1_0_macro_3D.cpp} (99%) rename operators/gradient/noarch/{P1ToP2ElementwiseGradient_1_0_toMatrix_macro_2D.cpp => P1ToP2ElementwiseGradient_1_0_toMatrix_P1ToP2ElementwiseGradient_1_0_macro_2D.cpp} (98%) rename operators/gradient/noarch/{P1ToP2ElementwiseGradient_1_0_toMatrix_macro_3D.cpp => P1ToP2ElementwiseGradient_1_0_toMatrix_P1ToP2ElementwiseGradient_1_0_macro_3D.cpp} (99%) rename operators/gradient/noarch/{P1ToP2ElementwiseGradient_2_0_apply_macro_3D.cpp => P1ToP2ElementwiseGradient_2_0_apply_P1ToP2ElementwiseGradient_2_0_macro_3D.cpp} (99%) rename operators/gradient/noarch/{P1ToP2ElementwiseGradient_2_0_toMatrix_macro_3D.cpp => P1ToP2ElementwiseGradient_2_0_toMatrix_P1ToP2ElementwiseGradient_2_0_macro_3D.cpp} (99%) rename operators/k_mass/avx/{P1ElementwiseKMassAnnulusMap_apply_macro_2D.cpp => P1ElementwiseKMassAnnulusMap_apply_P1ElementwiseKMassAnnulusMap_macro_2D.cpp} (99%) rename operators/k_mass/avx/{P1ElementwiseKMassAnnulusMap_computeInverseDiagonalOperatorValues_macro_2D.cpp => P1ElementwiseKMassAnnulusMap_computeInverseDiagonalOperatorValues_P1ElementwiseKMassAnnulusMap_macro_2D.cpp} (99%) rename operators/k_mass/avx/{P1ElementwiseKMassIcosahedralShellMap_apply_macro_3D.cpp => P1ElementwiseKMassIcosahedralShellMap_apply_P1ElementwiseKMassIcosahedralShellMap_macro_3D.cpp} (99%) rename operators/k_mass/avx/{P1ElementwiseKMassIcosahedralShellMap_computeInverseDiagonalOperatorValues_macro_3D.cpp => P1ElementwiseKMassIcosahedralShellMap_computeInverseDiagonalOperatorValues_P1ElementwiseKMassIcosahedralShellMap_macro_3D.cpp} (99%) rename operators/k_mass/avx/{P1ElementwiseKMass_apply_macro_2D.cpp => P1ElementwiseKMass_apply_P1ElementwiseKMass_macro_2D.cpp} (99%) rename operators/k_mass/avx/{P1ElementwiseKMass_apply_macro_3D.cpp => P1ElementwiseKMass_apply_P1ElementwiseKMass_macro_3D.cpp} (99%) rename operators/k_mass/avx/{P1ElementwiseKMass_computeInverseDiagonalOperatorValues_macro_2D.cpp => P1ElementwiseKMass_computeInverseDiagonalOperatorValues_P1ElementwiseKMass_macro_2D.cpp} (99%) rename operators/k_mass/avx/{P1ElementwiseKMass_computeInverseDiagonalOperatorValues_macro_3D.cpp => P1ElementwiseKMass_computeInverseDiagonalOperatorValues_P1ElementwiseKMass_macro_3D.cpp} (99%) rename operators/k_mass/avx/{P2ElementwiseKMassAnnulusMap_apply_macro_2D.cpp => P2ElementwiseKMassAnnulusMap_apply_P2ElementwiseKMassAnnulusMap_macro_2D.cpp} (99%) rename operators/k_mass/avx/{P2ElementwiseKMassAnnulusMap_computeInverseDiagonalOperatorValues_macro_2D.cpp => P2ElementwiseKMassAnnulusMap_computeInverseDiagonalOperatorValues_P2ElementwiseKMassAnnulusMap_macro_2D.cpp} (99%) rename operators/k_mass/avx/{P2ElementwiseKMassIcosahedralShellMap_apply_macro_3D.cpp => P2ElementwiseKMassIcosahedralShellMap_apply_P2ElementwiseKMassIcosahedralShellMap_macro_3D.cpp} (99%) rename operators/k_mass/avx/{P2ElementwiseKMassIcosahedralShellMap_computeInverseDiagonalOperatorValues_macro_3D.cpp => P2ElementwiseKMassIcosahedralShellMap_computeInverseDiagonalOperatorValues_P2ElementwiseKMassIcosahedralShellMap_macro_3D.cpp} (99%) rename operators/k_mass/avx/{P2ElementwiseKMass_apply_macro_2D.cpp => P2ElementwiseKMass_apply_P2ElementwiseKMass_macro_2D.cpp} (99%) rename operators/k_mass/avx/{P2ElementwiseKMass_apply_macro_3D.cpp => P2ElementwiseKMass_apply_P2ElementwiseKMass_macro_3D.cpp} (99%) rename operators/k_mass/avx/{P2ElementwiseKMass_computeInverseDiagonalOperatorValues_macro_2D.cpp => P2ElementwiseKMass_computeInverseDiagonalOperatorValues_P2ElementwiseKMass_macro_2D.cpp} (99%) rename operators/k_mass/avx/{P2ElementwiseKMass_computeInverseDiagonalOperatorValues_macro_3D.cpp => P2ElementwiseKMass_computeInverseDiagonalOperatorValues_P2ElementwiseKMass_macro_3D.cpp} (99%) rename operators/k_mass/avx/{P2ToP1ElementwiseKMassAnnulusMap_apply_macro_2D.cpp => P2ToP1ElementwiseKMassAnnulusMap_apply_P2ToP1ElementwiseKMassAnnulusMap_macro_2D.cpp} (99%) rename operators/k_mass/avx/{P2ToP1ElementwiseKMassIcosahedralShellMap_apply_macro_3D.cpp => P2ToP1ElementwiseKMassIcosahedralShellMap_apply_P2ToP1ElementwiseKMassIcosahedralShellMap_macro_3D.cpp} (99%) rename operators/k_mass/avx/{P2ToP1ElementwiseKMass_apply_macro_2D.cpp => P2ToP1ElementwiseKMass_apply_P2ToP1ElementwiseKMass_macro_2D.cpp} (99%) rename operators/k_mass/avx/{P2ToP1ElementwiseKMass_apply_macro_3D.cpp => P2ToP1ElementwiseKMass_apply_P2ToP1ElementwiseKMass_macro_3D.cpp} (99%) rename operators/k_mass/noarch/{P1ElementwiseKMassAnnulusMap_apply_macro_2D.cpp => P1ElementwiseKMassAnnulusMap_apply_P1ElementwiseKMassAnnulusMap_macro_2D.cpp} (98%) rename operators/k_mass/noarch/{P1ElementwiseKMassAnnulusMap_computeInverseDiagonalOperatorValues_macro_2D.cpp => P1ElementwiseKMassAnnulusMap_computeInverseDiagonalOperatorValues_P1ElementwiseKMassAnnulusMap_macro_2D.cpp} (98%) rename operators/k_mass/noarch/{P1ElementwiseKMassAnnulusMap_toMatrix_macro_2D.cpp => P1ElementwiseKMassAnnulusMap_toMatrix_P1ElementwiseKMassAnnulusMap_macro_2D.cpp} (98%) rename operators/k_mass/noarch/{P1ElementwiseKMassIcosahedralShellMap_apply_macro_3D.cpp => P1ElementwiseKMassIcosahedralShellMap_apply_P1ElementwiseKMassIcosahedralShellMap_macro_3D.cpp} (99%) rename operators/k_mass/noarch/{P1ElementwiseKMassIcosahedralShellMap_computeInverseDiagonalOperatorValues_macro_3D.cpp => P1ElementwiseKMassIcosahedralShellMap_computeInverseDiagonalOperatorValues_P1ElementwiseKMassIcosahedralShellMap_macro_3D.cpp} (99%) rename operators/k_mass/noarch/{P1ElementwiseKMassIcosahedralShellMap_toMatrix_macro_3D.cpp => P1ElementwiseKMassIcosahedralShellMap_toMatrix_P1ElementwiseKMassIcosahedralShellMap_macro_3D.cpp} (99%) rename operators/k_mass/noarch/{P1ElementwiseKMass_apply_macro_2D.cpp => P1ElementwiseKMass_apply_P1ElementwiseKMass_macro_2D.cpp} (97%) rename operators/k_mass/noarch/{P1ElementwiseKMass_apply_macro_3D.cpp => P1ElementwiseKMass_apply_P1ElementwiseKMass_macro_3D.cpp} (99%) rename operators/k_mass/noarch/{P1ElementwiseKMass_computeInverseDiagonalOperatorValues_macro_2D.cpp => P1ElementwiseKMass_computeInverseDiagonalOperatorValues_P1ElementwiseKMass_macro_2D.cpp} (97%) rename operators/k_mass/noarch/{P1ElementwiseKMass_computeInverseDiagonalOperatorValues_macro_3D.cpp => P1ElementwiseKMass_computeInverseDiagonalOperatorValues_P1ElementwiseKMass_macro_3D.cpp} (99%) rename operators/k_mass/noarch/{P1ElementwiseKMass_toMatrix_macro_2D.cpp => P1ElementwiseKMass_toMatrix_P1ElementwiseKMass_macro_2D.cpp} (97%) rename operators/k_mass/noarch/{P1ElementwiseKMass_toMatrix_macro_3D.cpp => P1ElementwiseKMass_toMatrix_P1ElementwiseKMass_macro_3D.cpp} (99%) rename operators/k_mass/noarch/{P2ElementwiseKMassAnnulusMap_apply_macro_2D.cpp => P2ElementwiseKMassAnnulusMap_apply_P2ElementwiseKMassAnnulusMap_macro_2D.cpp} (98%) rename operators/k_mass/noarch/{P2ElementwiseKMassAnnulusMap_computeInverseDiagonalOperatorValues_macro_2D.cpp => P2ElementwiseKMassAnnulusMap_computeInverseDiagonalOperatorValues_P2ElementwiseKMassAnnulusMap_macro_2D.cpp} (98%) rename operators/k_mass/noarch/{P2ElementwiseKMassAnnulusMap_toMatrix_macro_2D.cpp => P2ElementwiseKMassAnnulusMap_toMatrix_P2ElementwiseKMassAnnulusMap_macro_2D.cpp} (98%) rename operators/k_mass/noarch/{P2ElementwiseKMassIcosahedralShellMap_apply_macro_3D.cpp => P2ElementwiseKMassIcosahedralShellMap_apply_P2ElementwiseKMassIcosahedralShellMap_macro_3D.cpp} (99%) rename operators/k_mass/noarch/{P2ElementwiseKMassIcosahedralShellMap_computeInverseDiagonalOperatorValues_macro_3D.cpp => P2ElementwiseKMassIcosahedralShellMap_computeInverseDiagonalOperatorValues_P2ElementwiseKMassIcosahedralShellMap_macro_3D.cpp} (99%) rename operators/k_mass/noarch/{P2ElementwiseKMassIcosahedralShellMap_toMatrix_macro_3D.cpp => P2ElementwiseKMassIcosahedralShellMap_toMatrix_P2ElementwiseKMassIcosahedralShellMap_macro_3D.cpp} (99%) rename operators/k_mass/noarch/{P2ElementwiseKMass_apply_macro_2D.cpp => P2ElementwiseKMass_apply_P2ElementwiseKMass_macro_2D.cpp} (98%) rename operators/k_mass/noarch/{P2ElementwiseKMass_apply_macro_3D.cpp => P2ElementwiseKMass_apply_P2ElementwiseKMass_macro_3D.cpp} (99%) rename operators/k_mass/noarch/{P2ElementwiseKMass_computeInverseDiagonalOperatorValues_macro_2D.cpp => P2ElementwiseKMass_computeInverseDiagonalOperatorValues_P2ElementwiseKMass_macro_2D.cpp} (98%) rename operators/k_mass/noarch/{P2ElementwiseKMass_computeInverseDiagonalOperatorValues_macro_3D.cpp => P2ElementwiseKMass_computeInverseDiagonalOperatorValues_P2ElementwiseKMass_macro_3D.cpp} (99%) rename operators/k_mass/noarch/{P2ElementwiseKMass_toMatrix_macro_2D.cpp => P2ElementwiseKMass_toMatrix_P2ElementwiseKMass_macro_2D.cpp} (98%) rename operators/k_mass/noarch/{P2ElementwiseKMass_toMatrix_macro_3D.cpp => P2ElementwiseKMass_toMatrix_P2ElementwiseKMass_macro_3D.cpp} (99%) rename operators/k_mass/noarch/{P2ToP1ElementwiseKMassAnnulusMap_apply_macro_2D.cpp => P2ToP1ElementwiseKMassAnnulusMap_apply_P2ToP1ElementwiseKMassAnnulusMap_macro_2D.cpp} (98%) rename operators/k_mass/noarch/{P2ToP1ElementwiseKMassAnnulusMap_toMatrix_macro_2D.cpp => P2ToP1ElementwiseKMassAnnulusMap_toMatrix_P2ToP1ElementwiseKMassAnnulusMap_macro_2D.cpp} (98%) rename operators/k_mass/noarch/{P2ToP1ElementwiseKMassIcosahedralShellMap_apply_macro_3D.cpp => P2ToP1ElementwiseKMassIcosahedralShellMap_apply_P2ToP1ElementwiseKMassIcosahedralShellMap_macro_3D.cpp} (99%) rename operators/k_mass/noarch/{P2ToP1ElementwiseKMassIcosahedralShellMap_toMatrix_macro_3D.cpp => P2ToP1ElementwiseKMassIcosahedralShellMap_toMatrix_P2ToP1ElementwiseKMassIcosahedralShellMap_macro_3D.cpp} (99%) rename operators/k_mass/noarch/{P2ToP1ElementwiseKMass_apply_macro_2D.cpp => P2ToP1ElementwiseKMass_apply_P2ToP1ElementwiseKMass_macro_2D.cpp} (98%) rename operators/k_mass/noarch/{P2ToP1ElementwiseKMass_apply_macro_3D.cpp => P2ToP1ElementwiseKMass_apply_P2ToP1ElementwiseKMass_macro_3D.cpp} (99%) rename operators/k_mass/noarch/{P2ToP1ElementwiseKMass_toMatrix_macro_2D.cpp => P2ToP1ElementwiseKMass_toMatrix_P2ToP1ElementwiseKMass_macro_2D.cpp} (98%) rename operators/k_mass/noarch/{P2ToP1ElementwiseKMass_toMatrix_macro_3D.cpp => P2ToP1ElementwiseKMass_toMatrix_P2ToP1ElementwiseKMass_macro_3D.cpp} (99%) rename operators/mass/avx/{P1ElementwiseMass_apply_macro_2D.cpp => P1ElementwiseMass_apply_P1ElementwiseMass_macro_2D.cpp} (99%) rename operators/mass/avx/{P1ElementwiseMass_apply_macro_3D.cpp => P1ElementwiseMass_apply_P1ElementwiseMass_macro_3D.cpp} (99%) rename operators/mass/avx/{P1ElementwiseMass_computeInverseDiagonalOperatorValues_macro_2D.cpp => P1ElementwiseMass_computeInverseDiagonalOperatorValues_P1ElementwiseMass_macro_2D.cpp} (98%) rename operators/mass/avx/{P1ElementwiseMass_computeInverseDiagonalOperatorValues_macro_3D.cpp => P1ElementwiseMass_computeInverseDiagonalOperatorValues_P1ElementwiseMass_macro_3D.cpp} (99%) rename operators/mass/avx/{P2ElementwiseMassAnnulusMap_apply_macro_2D.cpp => P2ElementwiseMassAnnulusMap_apply_P2ElementwiseMassAnnulusMap_macro_2D.cpp} (99%) rename operators/mass/avx/{P2ElementwiseMassAnnulusMap_computeInverseDiagonalOperatorValues_macro_2D.cpp => P2ElementwiseMassAnnulusMap_computeInverseDiagonalOperatorValues_P2ElementwiseMassAnnulusMap_macro_2D.cpp} (99%) rename operators/mass/avx/{P2ElementwiseMassIcosahedralShellMap_apply_macro_3D.cpp => P2ElementwiseMassIcosahedralShellMap_apply_P2ElementwiseMassIcosahedralShellMap_macro_3D.cpp} (99%) rename operators/mass/avx/{P2ElementwiseMassIcosahedralShellMap_computeInverseDiagonalOperatorValues_macro_3D.cpp => P2ElementwiseMassIcosahedralShellMap_computeInverseDiagonalOperatorValues_P2ElementwiseMassIcosahedralShellMap_macro_3D.cpp} (99%) rename operators/mass/avx/{P2ElementwiseMass_apply_macro_2D.cpp => P2ElementwiseMass_apply_P2ElementwiseMass_macro_2D.cpp} (99%) rename operators/mass/avx/{P2ElementwiseMass_apply_macro_3D.cpp => P2ElementwiseMass_apply_P2ElementwiseMass_macro_3D.cpp} (99%) rename operators/mass/avx/{P2ElementwiseMass_computeInverseDiagonalOperatorValues_macro_2D.cpp => P2ElementwiseMass_computeInverseDiagonalOperatorValues_P2ElementwiseMass_macro_2D.cpp} (99%) rename operators/mass/avx/{P2ElementwiseMass_computeInverseDiagonalOperatorValues_macro_3D.cpp => P2ElementwiseMass_computeInverseDiagonalOperatorValues_P2ElementwiseMass_macro_3D.cpp} (99%) rename operators/mass/noarch/{P1ElementwiseMass_apply_macro_2D.cpp => P1ElementwiseMass_apply_P1ElementwiseMass_macro_2D.cpp} (97%) rename operators/mass/noarch/{P1ElementwiseMass_apply_macro_3D.cpp => P1ElementwiseMass_apply_P1ElementwiseMass_macro_3D.cpp} (99%) rename operators/mass/noarch/{P1ElementwiseMass_computeInverseDiagonalOperatorValues_macro_2D.cpp => P1ElementwiseMass_computeInverseDiagonalOperatorValues_P1ElementwiseMass_macro_2D.cpp} (97%) rename operators/mass/noarch/{P1ElementwiseMass_computeInverseDiagonalOperatorValues_macro_3D.cpp => P1ElementwiseMass_computeInverseDiagonalOperatorValues_P1ElementwiseMass_macro_3D.cpp} (99%) rename operators/mass/noarch/{P1ElementwiseMass_toMatrix_macro_2D.cpp => P1ElementwiseMass_toMatrix_P1ElementwiseMass_macro_2D.cpp} (97%) rename operators/mass/noarch/{P1ElementwiseMass_toMatrix_macro_3D.cpp => P1ElementwiseMass_toMatrix_P1ElementwiseMass_macro_3D.cpp} (99%) rename operators/mass/noarch/{P2ElementwiseMassAnnulusMap_apply_macro_2D.cpp => P2ElementwiseMassAnnulusMap_apply_P2ElementwiseMassAnnulusMap_macro_2D.cpp} (98%) rename operators/mass/noarch/{P2ElementwiseMassAnnulusMap_computeInverseDiagonalOperatorValues_macro_2D.cpp => P2ElementwiseMassAnnulusMap_computeInverseDiagonalOperatorValues_P2ElementwiseMassAnnulusMap_macro_2D.cpp} (98%) rename operators/mass/noarch/{P2ElementwiseMassAnnulusMap_toMatrix_macro_2D.cpp => P2ElementwiseMassAnnulusMap_toMatrix_P2ElementwiseMassAnnulusMap_macro_2D.cpp} (98%) rename operators/mass/noarch/{P2ElementwiseMassIcosahedralShellMap_apply_macro_3D.cpp => P2ElementwiseMassIcosahedralShellMap_apply_P2ElementwiseMassIcosahedralShellMap_macro_3D.cpp} (99%) rename operators/mass/noarch/{P2ElementwiseMassIcosahedralShellMap_computeInverseDiagonalOperatorValues_macro_3D.cpp => P2ElementwiseMassIcosahedralShellMap_computeInverseDiagonalOperatorValues_P2ElementwiseMassIcosahedralShellMap_macro_3D.cpp} (99%) rename operators/mass/noarch/{P2ElementwiseMassIcosahedralShellMap_toMatrix_macro_3D.cpp => P2ElementwiseMassIcosahedralShellMap_toMatrix_P2ElementwiseMassIcosahedralShellMap_macro_3D.cpp} (99%) rename operators/mass/noarch/{P2ElementwiseMass_apply_macro_2D.cpp => P2ElementwiseMass_apply_P2ElementwiseMass_macro_2D.cpp} (98%) rename operators/mass/noarch/{P2ElementwiseMass_apply_macro_3D.cpp => P2ElementwiseMass_apply_P2ElementwiseMass_macro_3D.cpp} (99%) rename operators/mass/noarch/{P2ElementwiseMass_computeInverseDiagonalOperatorValues_macro_2D.cpp => P2ElementwiseMass_computeInverseDiagonalOperatorValues_P2ElementwiseMass_macro_2D.cpp} (97%) rename operators/mass/noarch/{P2ElementwiseMass_computeInverseDiagonalOperatorValues_macro_3D.cpp => P2ElementwiseMass_computeInverseDiagonalOperatorValues_P2ElementwiseMass_macro_3D.cpp} (99%) rename operators/mass/noarch/{P2ElementwiseMass_toMatrix_macro_2D.cpp => P2ElementwiseMass_toMatrix_P2ElementwiseMass_macro_2D.cpp} (98%) rename operators/mass/noarch/{P2ElementwiseMass_toMatrix_macro_3D.cpp => P2ElementwiseMass_toMatrix_P2ElementwiseMass_macro_3D.cpp} (99%) rename operators/shear_heating/avx/{P2ElementwiseShearHeatingAnnulusMap_apply_macro_2D.cpp => P2ElementwiseShearHeatingAnnulusMap_apply_P2ElementwiseShearHeatingAnnulusMap_macro_2D.cpp} (99%) rename operators/shear_heating/avx/{P2ElementwiseShearHeatingAnnulusMap_computeInverseDiagonalOperatorValues_macro_2D.cpp => P2ElementwiseShearHeatingAnnulusMap_computeInverseDiagonalOperatorValues_P2ElementwiseShearHeatingAnnulusMap_macro_2D.cpp} (99%) rename operators/shear_heating/avx/{P2ElementwiseShearHeatingIcosahedralShellMap_apply_macro_3D.cpp => P2ElementwiseShearHeatingIcosahedralShellMap_apply_P2ElementwiseShearHeatingIcosahedralShellMap_macro_3D.cpp} (99%) rename operators/shear_heating/avx/{P2ElementwiseShearHeatingIcosahedralShellMap_computeInverseDiagonalOperatorValues_macro_3D.cpp => P2ElementwiseShearHeatingIcosahedralShellMap_computeInverseDiagonalOperatorValues_P2ElementwiseShearHeatingIcosahedralShellMap_macro_3D.cpp} (99%) rename operators/shear_heating/avx/{P2ElementwiseShearHeating_apply_macro_2D.cpp => P2ElementwiseShearHeating_apply_P2ElementwiseShearHeating_macro_2D.cpp} (99%) rename operators/shear_heating/avx/{P2ElementwiseShearHeating_apply_macro_3D.cpp => P2ElementwiseShearHeating_apply_P2ElementwiseShearHeating_macro_3D.cpp} (99%) rename operators/shear_heating/avx/{P2ElementwiseShearHeating_computeInverseDiagonalOperatorValues_macro_2D.cpp => P2ElementwiseShearHeating_computeInverseDiagonalOperatorValues_P2ElementwiseShearHeating_macro_2D.cpp} (99%) rename operators/shear_heating/avx/{P2ElementwiseShearHeating_computeInverseDiagonalOperatorValues_macro_3D.cpp => P2ElementwiseShearHeating_computeInverseDiagonalOperatorValues_P2ElementwiseShearHeating_macro_3D.cpp} (99%) rename operators/shear_heating/noarch/{P2ElementwiseShearHeatingAnnulusMap_apply_macro_2D.cpp => P2ElementwiseShearHeatingAnnulusMap_apply_P2ElementwiseShearHeatingAnnulusMap_macro_2D.cpp} (98%) rename operators/shear_heating/noarch/{P2ElementwiseShearHeatingAnnulusMap_computeInverseDiagonalOperatorValues_macro_2D.cpp => P2ElementwiseShearHeatingAnnulusMap_computeInverseDiagonalOperatorValues_P2ElementwiseShearHeatingAnnulusMap_macro_2D.cpp} (98%) rename operators/shear_heating/noarch/{P2ElementwiseShearHeatingAnnulusMap_toMatrix_macro_2D.cpp => P2ElementwiseShearHeatingAnnulusMap_toMatrix_P2ElementwiseShearHeatingAnnulusMap_macro_2D.cpp} (98%) rename operators/shear_heating/noarch/{P2ElementwiseShearHeatingIcosahedralShellMap_apply_macro_3D.cpp => P2ElementwiseShearHeatingIcosahedralShellMap_apply_P2ElementwiseShearHeatingIcosahedralShellMap_macro_3D.cpp} (99%) rename operators/shear_heating/noarch/{P2ElementwiseShearHeatingIcosahedralShellMap_computeInverseDiagonalOperatorValues_macro_3D.cpp => P2ElementwiseShearHeatingIcosahedralShellMap_computeInverseDiagonalOperatorValues_P2ElementwiseShearHeatingIcosahedralShellMap_macro_3D.cpp} (99%) rename operators/shear_heating/noarch/{P2ElementwiseShearHeatingIcosahedralShellMap_toMatrix_macro_3D.cpp => P2ElementwiseShearHeatingIcosahedralShellMap_toMatrix_P2ElementwiseShearHeatingIcosahedralShellMap_macro_3D.cpp} (99%) rename operators/shear_heating/noarch/{P2ElementwiseShearHeating_apply_macro_2D.cpp => P2ElementwiseShearHeating_apply_P2ElementwiseShearHeating_macro_2D.cpp} (98%) rename operators/shear_heating/noarch/{P2ElementwiseShearHeating_apply_macro_3D.cpp => P2ElementwiseShearHeating_apply_P2ElementwiseShearHeating_macro_3D.cpp} (99%) rename operators/shear_heating/noarch/{P2ElementwiseShearHeating_computeInverseDiagonalOperatorValues_macro_2D.cpp => P2ElementwiseShearHeating_computeInverseDiagonalOperatorValues_P2ElementwiseShearHeating_macro_2D.cpp} (97%) rename operators/shear_heating/noarch/{P2ElementwiseShearHeating_computeInverseDiagonalOperatorValues_macro_3D.cpp => P2ElementwiseShearHeating_computeInverseDiagonalOperatorValues_P2ElementwiseShearHeating_macro_3D.cpp} (99%) rename operators/shear_heating/noarch/{P2ElementwiseShearHeating_toMatrix_macro_2D.cpp => P2ElementwiseShearHeating_toMatrix_P2ElementwiseShearHeating_macro_2D.cpp} (98%) rename operators/shear_heating/noarch/{P2ElementwiseShearHeating_toMatrix_macro_3D.cpp => P2ElementwiseShearHeating_toMatrix_P2ElementwiseShearHeating_macro_3D.cpp} (99%) diff --git a/generate/generate.py b/generate/generate.py index e55020df..edb7e720 100644 --- a/generate/generate.py +++ b/generate/generate.py @@ -382,7 +382,6 @@ def generate_operator( operator = operator_generation.operators.HyTeGElementwiseOperator( name, symbolizer, - opts=optimizations, kernel_wrapper_types=kernel_types, type_descriptor=type_descriptor, ) @@ -400,20 +399,20 @@ def generate_operator( blending=blending, # type: ignore[call-arg] # kw-args are not supported by Callable ) - operator.add_integral( + operator.add_volume_integral( name="".join(name.split()), - dim=geometry.dimensions, - geometry=geometry, - integration_domain=operator_generation.operators.MacroIntegrationDomain.VOLUME, + volume_geometry=geometry, quad=quad, blending=blending, form=form, loop_strategy=loop_strategies[spec["loop-strategy"]], + optimizations=optimizations, ) dir_path = os.path.join(args.output, form_str) operator.generate_class_code( dir_path, + class_files=operator_generation.operators.CppClassFiles.HEADER_IMPL_AND_VARIANTS, clang_format_binary=args.clang_format_binary, ) diff --git a/generate/requirements.txt b/generate/requirements.txt index 0afd3ab8..930783ac 100644 --- a/generate/requirements.txt +++ b/generate/requirements.txt @@ -1,5 +1,5 @@ --extra-index-url https://test.pypi.org/simple/ -hog @ git+ssh://git@i10git.cs.fau.de/hyteg/hog@26f110bc235ad20bff58416a4dba4e1730e74c4e +hog @ git+https://i10git.cs.fau.de/hyteg/hog@516f33ba88809c2174d316883f09221ed0e7ce02 tomli >= 1.1.0 ; python_version < "3.11" clang-format diff --git a/operators.toml b/operators.toml index 01bda864..806f89a2 100644 --- a/operators.toml +++ b/operators.toml @@ -367,7 +367,7 @@ dimensions = [2, 3] quadrature = 3 blending = "IdentityMap" loop-strategy = "sawtooth" -optimizations = ["quadloops"] +optimizations = ["moveconstants", "vectorize", "quadloops"] [[grad_rho_by_rho_dot_u]] trial-space = "P2Vector" @@ -377,7 +377,7 @@ dimensions = [2] quadrature = 3 blending = "AnnulusMap" loop-strategy = "sawtooth" -optimizations = ["quadloops"] +optimizations = ["moveconstants", "vectorize", "quadloops"] [[grad_rho_by_rho_dot_u]] trial-space = "P2Vector" @@ -387,4 +387,4 @@ dimensions = [3] quadrature = 3 blending = "IcosahedralShellMap" loop-strategy = "sawtooth" -optimizations = ["quadloops"] +optimizations = ["moveconstants", "vectorize", "quadloops"] diff --git a/operators/curl_curl/CMakeLists.txt b/operators/curl_curl/CMakeLists.txt index 8599561f..56728228 100644 --- a/operators/curl_curl/CMakeLists.txt +++ b/operators/curl_curl/CMakeLists.txt @@ -7,15 +7,15 @@ add_library( opgen-curl_curl if(HYTEG_BUILD_WITH_AVX AND WALBERLA_DOUBLE_ACCURACY) target_sources(opgen-curl_curl PRIVATE - avx/N1E1ElementwiseCurlCurl_apply_macro_3D.cpp - avx/N1E1ElementwiseCurlCurl_computeInverseDiagonalOperatorValues_macro_3D.cpp - noarch/N1E1ElementwiseCurlCurl_toMatrix_macro_3D.cpp + avx/N1E1ElementwiseCurlCurl_apply_N1E1ElementwiseCurlCurl_macro_3D.cpp + avx/N1E1ElementwiseCurlCurl_computeInverseDiagonalOperatorValues_N1E1ElementwiseCurlCurl_macro_3D.cpp + noarch/N1E1ElementwiseCurlCurl_toMatrix_N1E1ElementwiseCurlCurl_macro_3D.cpp ) set_source_files_properties( - avx/N1E1ElementwiseCurlCurl_apply_macro_3D.cpp - avx/N1E1ElementwiseCurlCurl_computeInverseDiagonalOperatorValues_macro_3D.cpp + avx/N1E1ElementwiseCurlCurl_apply_N1E1ElementwiseCurlCurl_macro_3D.cpp + avx/N1E1ElementwiseCurlCurl_computeInverseDiagonalOperatorValues_N1E1ElementwiseCurlCurl_macro_3D.cpp PROPERTIES COMPILE_OPTIONS ${HYTEG_COMPILER_NATIVE_FLAGS} ) @@ -26,9 +26,9 @@ else() target_sources(opgen-curl_curl PRIVATE - noarch/N1E1ElementwiseCurlCurl_apply_macro_3D.cpp - noarch/N1E1ElementwiseCurlCurl_computeInverseDiagonalOperatorValues_macro_3D.cpp - noarch/N1E1ElementwiseCurlCurl_toMatrix_macro_3D.cpp + noarch/N1E1ElementwiseCurlCurl_apply_N1E1ElementwiseCurlCurl_macro_3D.cpp + noarch/N1E1ElementwiseCurlCurl_computeInverseDiagonalOperatorValues_N1E1ElementwiseCurlCurl_macro_3D.cpp + noarch/N1E1ElementwiseCurlCurl_toMatrix_N1E1ElementwiseCurlCurl_macro_3D.cpp ) endif() diff --git a/operators/curl_curl/N1E1ElementwiseCurlCurl.cpp b/operators/curl_curl/N1E1ElementwiseCurlCurl.cpp index 84d74b53..92232dfb 100644 --- a/operators/curl_curl/N1E1ElementwiseCurlCurl.cpp +++ b/operators/curl_curl/N1E1ElementwiseCurlCurl.cpp @@ -118,7 +118,7 @@ void N1E1ElementwiseCurlCurl::apply( const n1e1::N1E1VectorFunction< real_t >& s this->timingTree_->start( "kernel" ); - apply_macro_3D( + apply_N1E1ElementwiseCurlCurl_macro_3D( _data_dst, _data_src, @@ -136,6 +136,7 @@ void N1E1ElementwiseCurlCurl::apply( const n1e1::N1E1VectorFunction< real_t >& s macro_vertex_coord_id_3comp2, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } @@ -200,7 +201,7 @@ void N1E1ElementwiseCurlCurl::toMatrix( const std::shared_ptr< SparseMatrixProxy this->timingTree_->start( "kernel" ); - toMatrix_macro_3D( + toMatrix_N1E1ElementwiseCurlCurl_macro_3D( _data_dst, _data_src, @@ -221,6 +222,7 @@ void N1E1ElementwiseCurlCurl::toMatrix( const std::shared_ptr< SparseMatrixProxy mat, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } } @@ -278,7 +280,7 @@ void N1E1ElementwiseCurlCurl::computeInverseDiagonalOperatorValues() this->timingTree_->start( "kernel" ); - computeInverseDiagonalOperatorValues_macro_3D( + computeInverseDiagonalOperatorValues_N1E1ElementwiseCurlCurl_macro_3D( _data_invDiag_, macro_vertex_coord_id_0comp0, @@ -295,6 +297,7 @@ void N1E1ElementwiseCurlCurl::computeInverseDiagonalOperatorValues() macro_vertex_coord_id_3comp2, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } diff --git a/operators/curl_curl/N1E1ElementwiseCurlCurl.hpp b/operators/curl_curl/N1E1ElementwiseCurlCurl.hpp index b66be43c..5560334f 100644 --- a/operators/curl_curl/N1E1ElementwiseCurlCurl.hpp +++ b/operators/curl_curl/N1E1ElementwiseCurlCurl.hpp @@ -29,6 +29,7 @@ #include "core/DataTypes.h" #include "hyteg/LikwidWrapper.hpp" +#include "hyteg/boundary/BoundaryConditions.hpp" #include "hyteg/communication/Syncing.hpp" #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp" #include "hyteg/n1e1functionspace/N1E1MacroCell.hpp" @@ -37,6 +38,7 @@ #include "hyteg/primitivestorage/PrimitiveStorage.hpp" #include "hyteg/solvers/Smoothables.hpp" #include "hyteg/sparseassembly/SparseMatrixProxy.hpp" +#include "hyteg/types/types.hpp" #define FUNC_PREFIX @@ -68,74 +70,88 @@ class N1E1ElementwiseCurlCurl : public Operator< n1e1::N1E1VectorFunction< real_ protected: private: - /// Kernel type: apply + /// Integral: N1E1ElementwiseCurlCurl + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: apply + /// - loop strategy: SAWTOOTH /// - quadrature rule: Keast 0 | points: 1, degree: 1 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 180 202 37 0 0 0 0 1 - void apply_macro_3D( real_t* RESTRICT _data_dst, - real_t* RESTRICT _data_src, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; - /// Kernel type: toMatrix + void apply_N1E1ElementwiseCurlCurl_macro_3D( real_t* RESTRICT _data_dst, + real_t* RESTRICT _data_src, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; + + /// Integral: N1E1ElementwiseCurlCurl + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: toMatrix + /// - loop strategy: SAWTOOTH /// - quadrature rule: Keast 0 | points: 1, degree: 1 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 144 253 37 0 0 0 0 4 - void toMatrix_macro_3D( idx_t* RESTRICT _data_dst, - idx_t* RESTRICT _data_src, - const Cell& cell, - const uint_t level, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - std::shared_ptr< SparseMatrixProxy > mat, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; - /// Kernel type: computeInverseDiagonalOperatorValues + void toMatrix_N1E1ElementwiseCurlCurl_macro_3D( idx_t* RESTRICT _data_dst, + idx_t* RESTRICT _data_src, + const Cell& cell, + const uint_t level, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + std::shared_ptr< SparseMatrixProxy > mat, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; + + /// Integral: N1E1ElementwiseCurlCurl + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: computeInverseDiagonalOperatorValues + /// - loop strategy: SAWTOOTH /// - quadrature rule: Keast 0 | points: 1, degree: 1 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 120 115 37 0 0 0 0 1 - void computeInverseDiagonalOperatorValues_macro_3D( real_t* RESTRICT _data_invDiag_, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; + void computeInverseDiagonalOperatorValues_N1E1ElementwiseCurlCurl_macro_3D( real_t* RESTRICT _data_invDiag_, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; std::shared_ptr< n1e1::N1E1VectorFunction< real_t > > invDiag_; }; diff --git a/operators/curl_curl/avx/N1E1ElementwiseCurlCurl_apply_macro_3D.cpp b/operators/curl_curl/avx/N1E1ElementwiseCurlCurl_apply_N1E1ElementwiseCurlCurl_macro_3D.cpp similarity index 99% rename from operators/curl_curl/avx/N1E1ElementwiseCurlCurl_apply_macro_3D.cpp rename to operators/curl_curl/avx/N1E1ElementwiseCurlCurl_apply_N1E1ElementwiseCurlCurl_macro_3D.cpp index 25560670..16022bda 100644 --- a/operators/curl_curl/avx/N1E1ElementwiseCurlCurl_apply_macro_3D.cpp +++ b/operators/curl_curl/avx/N1E1ElementwiseCurlCurl_apply_N1E1ElementwiseCurlCurl_macro_3D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void N1E1ElementwiseCurlCurl::apply_macro_3D( real_t * RESTRICT _data_dst, real_t * RESTRICT _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void N1E1ElementwiseCurlCurl::apply_N1E1ElementwiseCurlCurl_macro_3D( real_t * RESTRICT _data_dst, real_t * RESTRICT _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t tmp_coords_jac_0_WHITE_UP = 1.0 / (micro_edges_per_macro_edge_float)*1.0; diff --git a/operators/curl_curl/avx/N1E1ElementwiseCurlCurl_computeInverseDiagonalOperatorValues_macro_3D.cpp b/operators/curl_curl/avx/N1E1ElementwiseCurlCurl_computeInverseDiagonalOperatorValues_N1E1ElementwiseCurlCurl_macro_3D.cpp similarity index 99% rename from operators/curl_curl/avx/N1E1ElementwiseCurlCurl_computeInverseDiagonalOperatorValues_macro_3D.cpp rename to operators/curl_curl/avx/N1E1ElementwiseCurlCurl_computeInverseDiagonalOperatorValues_N1E1ElementwiseCurlCurl_macro_3D.cpp index 4397e237..9e45450f 100644 --- a/operators/curl_curl/avx/N1E1ElementwiseCurlCurl_computeInverseDiagonalOperatorValues_macro_3D.cpp +++ b/operators/curl_curl/avx/N1E1ElementwiseCurlCurl_computeInverseDiagonalOperatorValues_N1E1ElementwiseCurlCurl_macro_3D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void N1E1ElementwiseCurlCurl::computeInverseDiagonalOperatorValues_macro_3D( real_t * RESTRICT _data_invDiag_, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void N1E1ElementwiseCurlCurl::computeInverseDiagonalOperatorValues_N1E1ElementwiseCurlCurl_macro_3D( real_t * RESTRICT _data_invDiag_, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t tmp_coords_jac_0_WHITE_UP = 1.0 / (micro_edges_per_macro_edge_float)*1.0; diff --git a/operators/curl_curl/noarch/N1E1ElementwiseCurlCurl_apply_macro_3D.cpp b/operators/curl_curl/noarch/N1E1ElementwiseCurlCurl_apply_N1E1ElementwiseCurlCurl_macro_3D.cpp similarity index 99% rename from operators/curl_curl/noarch/N1E1ElementwiseCurlCurl_apply_macro_3D.cpp rename to operators/curl_curl/noarch/N1E1ElementwiseCurlCurl_apply_N1E1ElementwiseCurlCurl_macro_3D.cpp index b2ddf686..7a290c00 100644 --- a/operators/curl_curl/noarch/N1E1ElementwiseCurlCurl_apply_macro_3D.cpp +++ b/operators/curl_curl/noarch/N1E1ElementwiseCurlCurl_apply_N1E1ElementwiseCurlCurl_macro_3D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void N1E1ElementwiseCurlCurl::apply_macro_3D( real_t * RESTRICT _data_dst, real_t * RESTRICT _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void N1E1ElementwiseCurlCurl::apply_N1E1ElementwiseCurlCurl_macro_3D( real_t * RESTRICT _data_dst, real_t * RESTRICT _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t tmp_coords_jac_0_WHITE_UP = 1.0 / (micro_edges_per_macro_edge_float)*1.0; diff --git a/operators/curl_curl/noarch/N1E1ElementwiseCurlCurl_computeInverseDiagonalOperatorValues_macro_3D.cpp b/operators/curl_curl/noarch/N1E1ElementwiseCurlCurl_computeInverseDiagonalOperatorValues_N1E1ElementwiseCurlCurl_macro_3D.cpp similarity index 99% rename from operators/curl_curl/noarch/N1E1ElementwiseCurlCurl_computeInverseDiagonalOperatorValues_macro_3D.cpp rename to operators/curl_curl/noarch/N1E1ElementwiseCurlCurl_computeInverseDiagonalOperatorValues_N1E1ElementwiseCurlCurl_macro_3D.cpp index f8a8c7e2..7279d4a6 100644 --- a/operators/curl_curl/noarch/N1E1ElementwiseCurlCurl_computeInverseDiagonalOperatorValues_macro_3D.cpp +++ b/operators/curl_curl/noarch/N1E1ElementwiseCurlCurl_computeInverseDiagonalOperatorValues_N1E1ElementwiseCurlCurl_macro_3D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void N1E1ElementwiseCurlCurl::computeInverseDiagonalOperatorValues_macro_3D( real_t * RESTRICT _data_invDiag_, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void N1E1ElementwiseCurlCurl::computeInverseDiagonalOperatorValues_N1E1ElementwiseCurlCurl_macro_3D( real_t * RESTRICT _data_invDiag_, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t tmp_coords_jac_0_WHITE_UP = 1.0 / (micro_edges_per_macro_edge_float)*1.0; diff --git a/operators/curl_curl/noarch/N1E1ElementwiseCurlCurl_toMatrix_macro_3D.cpp b/operators/curl_curl/noarch/N1E1ElementwiseCurlCurl_toMatrix_N1E1ElementwiseCurlCurl_macro_3D.cpp similarity index 99% rename from operators/curl_curl/noarch/N1E1ElementwiseCurlCurl_toMatrix_macro_3D.cpp rename to operators/curl_curl/noarch/N1E1ElementwiseCurlCurl_toMatrix_N1E1ElementwiseCurlCurl_macro_3D.cpp index a46f9611..cf06b5a5 100644 --- a/operators/curl_curl/noarch/N1E1ElementwiseCurlCurl_toMatrix_macro_3D.cpp +++ b/operators/curl_curl/noarch/N1E1ElementwiseCurlCurl_toMatrix_N1E1ElementwiseCurlCurl_macro_3D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void N1E1ElementwiseCurlCurl::toMatrix_macro_3D( idx_t * RESTRICT _data_dst, idx_t * RESTRICT _data_src, const Cell& cell, const uint_t level, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void N1E1ElementwiseCurlCurl::toMatrix_N1E1ElementwiseCurlCurl_macro_3D( idx_t * RESTRICT _data_dst, idx_t * RESTRICT _data_src, const Cell& cell, const uint_t level, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t tmp_coords_jac_0_WHITE_UP = 1.0 / (micro_edges_per_macro_edge_float)*1.0; diff --git a/operators/diffusion/CMakeLists.txt b/operators/diffusion/CMakeLists.txt index 89d33aa9..694448a6 100644 --- a/operators/diffusion/CMakeLists.txt +++ b/operators/diffusion/CMakeLists.txt @@ -13,40 +13,40 @@ add_library( opgen-diffusion if(HYTEG_BUILD_WITH_AVX AND WALBERLA_DOUBLE_ACCURACY) target_sources(opgen-diffusion PRIVATE - avx/P1ElementwiseDiffusion_apply_macro_2D.cpp - avx/P1ElementwiseDiffusion_apply_macro_3D.cpp - avx/P1ElementwiseDiffusion_computeInverseDiagonalOperatorValues_macro_2D.cpp - avx/P1ElementwiseDiffusion_computeInverseDiagonalOperatorValues_macro_3D.cpp - avx/P2ElementwiseDiffusionAnnulusMap_apply_macro_2D.cpp - avx/P2ElementwiseDiffusionAnnulusMap_computeInverseDiagonalOperatorValues_macro_2D.cpp - avx/P2ElementwiseDiffusionIcosahedralShellMap_apply_macro_3D.cpp - avx/P2ElementwiseDiffusionIcosahedralShellMap_computeInverseDiagonalOperatorValues_macro_3D.cpp - avx/P2ElementwiseDiffusion_apply_macro_2D.cpp - avx/P2ElementwiseDiffusion_apply_macro_3D.cpp - avx/P2ElementwiseDiffusion_computeInverseDiagonalOperatorValues_macro_2D.cpp - avx/P2ElementwiseDiffusion_computeInverseDiagonalOperatorValues_macro_3D.cpp - noarch/P1ElementwiseDiffusion_toMatrix_macro_2D.cpp - noarch/P1ElementwiseDiffusion_toMatrix_macro_3D.cpp - noarch/P2ElementwiseDiffusionAnnulusMap_toMatrix_macro_2D.cpp - noarch/P2ElementwiseDiffusionIcosahedralShellMap_toMatrix_macro_3D.cpp - noarch/P2ElementwiseDiffusion_toMatrix_macro_2D.cpp - noarch/P2ElementwiseDiffusion_toMatrix_macro_3D.cpp + avx/P1ElementwiseDiffusion_apply_P1ElementwiseDiffusion_macro_2D.cpp + avx/P1ElementwiseDiffusion_apply_P1ElementwiseDiffusion_macro_3D.cpp + avx/P1ElementwiseDiffusion_computeInverseDiagonalOperatorValues_P1ElementwiseDiffusion_macro_2D.cpp + avx/P1ElementwiseDiffusion_computeInverseDiagonalOperatorValues_P1ElementwiseDiffusion_macro_3D.cpp + avx/P2ElementwiseDiffusionAnnulusMap_apply_P2ElementwiseDiffusionAnnulusMap_macro_2D.cpp + avx/P2ElementwiseDiffusionAnnulusMap_computeInverseDiagonalOperatorValues_P2ElementwiseDiffusionAnnulusMap_macro_2D.cpp + avx/P2ElementwiseDiffusionIcosahedralShellMap_apply_P2ElementwiseDiffusionIcosahedralShellMap_macro_3D.cpp + avx/P2ElementwiseDiffusionIcosahedralShellMap_computeInverseDiagonalOperatorValues_P2ElementwiseDiffusionIcosahedralShellMap_macro_3D.cpp + avx/P2ElementwiseDiffusion_apply_P2ElementwiseDiffusion_macro_2D.cpp + avx/P2ElementwiseDiffusion_apply_P2ElementwiseDiffusion_macro_3D.cpp + avx/P2ElementwiseDiffusion_computeInverseDiagonalOperatorValues_P2ElementwiseDiffusion_macro_2D.cpp + avx/P2ElementwiseDiffusion_computeInverseDiagonalOperatorValues_P2ElementwiseDiffusion_macro_3D.cpp + noarch/P1ElementwiseDiffusion_toMatrix_P1ElementwiseDiffusion_macro_2D.cpp + noarch/P1ElementwiseDiffusion_toMatrix_P1ElementwiseDiffusion_macro_3D.cpp + noarch/P2ElementwiseDiffusionAnnulusMap_toMatrix_P2ElementwiseDiffusionAnnulusMap_macro_2D.cpp + noarch/P2ElementwiseDiffusionIcosahedralShellMap_toMatrix_P2ElementwiseDiffusionIcosahedralShellMap_macro_3D.cpp + noarch/P2ElementwiseDiffusion_toMatrix_P2ElementwiseDiffusion_macro_2D.cpp + noarch/P2ElementwiseDiffusion_toMatrix_P2ElementwiseDiffusion_macro_3D.cpp ) set_source_files_properties( - avx/P1ElementwiseDiffusion_apply_macro_2D.cpp - avx/P1ElementwiseDiffusion_apply_macro_3D.cpp - avx/P1ElementwiseDiffusion_computeInverseDiagonalOperatorValues_macro_2D.cpp - avx/P1ElementwiseDiffusion_computeInverseDiagonalOperatorValues_macro_3D.cpp - avx/P2ElementwiseDiffusionAnnulusMap_apply_macro_2D.cpp - avx/P2ElementwiseDiffusionAnnulusMap_computeInverseDiagonalOperatorValues_macro_2D.cpp - avx/P2ElementwiseDiffusionIcosahedralShellMap_apply_macro_3D.cpp - avx/P2ElementwiseDiffusionIcosahedralShellMap_computeInverseDiagonalOperatorValues_macro_3D.cpp - avx/P2ElementwiseDiffusion_apply_macro_2D.cpp - avx/P2ElementwiseDiffusion_apply_macro_3D.cpp - avx/P2ElementwiseDiffusion_computeInverseDiagonalOperatorValues_macro_2D.cpp - avx/P2ElementwiseDiffusion_computeInverseDiagonalOperatorValues_macro_3D.cpp + avx/P1ElementwiseDiffusion_apply_P1ElementwiseDiffusion_macro_2D.cpp + avx/P1ElementwiseDiffusion_apply_P1ElementwiseDiffusion_macro_3D.cpp + avx/P1ElementwiseDiffusion_computeInverseDiagonalOperatorValues_P1ElementwiseDiffusion_macro_2D.cpp + avx/P1ElementwiseDiffusion_computeInverseDiagonalOperatorValues_P1ElementwiseDiffusion_macro_3D.cpp + avx/P2ElementwiseDiffusionAnnulusMap_apply_P2ElementwiseDiffusionAnnulusMap_macro_2D.cpp + avx/P2ElementwiseDiffusionAnnulusMap_computeInverseDiagonalOperatorValues_P2ElementwiseDiffusionAnnulusMap_macro_2D.cpp + avx/P2ElementwiseDiffusionIcosahedralShellMap_apply_P2ElementwiseDiffusionIcosahedralShellMap_macro_3D.cpp + avx/P2ElementwiseDiffusionIcosahedralShellMap_computeInverseDiagonalOperatorValues_P2ElementwiseDiffusionIcosahedralShellMap_macro_3D.cpp + avx/P2ElementwiseDiffusion_apply_P2ElementwiseDiffusion_macro_2D.cpp + avx/P2ElementwiseDiffusion_apply_P2ElementwiseDiffusion_macro_3D.cpp + avx/P2ElementwiseDiffusion_computeInverseDiagonalOperatorValues_P2ElementwiseDiffusion_macro_2D.cpp + avx/P2ElementwiseDiffusion_computeInverseDiagonalOperatorValues_P2ElementwiseDiffusion_macro_3D.cpp PROPERTIES COMPILE_OPTIONS ${HYTEG_COMPILER_NATIVE_FLAGS} ) @@ -57,24 +57,24 @@ else() target_sources(opgen-diffusion PRIVATE - noarch/P1ElementwiseDiffusion_apply_macro_2D.cpp - noarch/P1ElementwiseDiffusion_apply_macro_3D.cpp - noarch/P1ElementwiseDiffusion_computeInverseDiagonalOperatorValues_macro_2D.cpp - noarch/P1ElementwiseDiffusion_computeInverseDiagonalOperatorValues_macro_3D.cpp - noarch/P1ElementwiseDiffusion_toMatrix_macro_2D.cpp - noarch/P1ElementwiseDiffusion_toMatrix_macro_3D.cpp - noarch/P2ElementwiseDiffusionAnnulusMap_apply_macro_2D.cpp - noarch/P2ElementwiseDiffusionAnnulusMap_computeInverseDiagonalOperatorValues_macro_2D.cpp - noarch/P2ElementwiseDiffusionAnnulusMap_toMatrix_macro_2D.cpp - noarch/P2ElementwiseDiffusionIcosahedralShellMap_apply_macro_3D.cpp - noarch/P2ElementwiseDiffusionIcosahedralShellMap_computeInverseDiagonalOperatorValues_macro_3D.cpp - noarch/P2ElementwiseDiffusionIcosahedralShellMap_toMatrix_macro_3D.cpp - noarch/P2ElementwiseDiffusion_apply_macro_2D.cpp - noarch/P2ElementwiseDiffusion_apply_macro_3D.cpp - noarch/P2ElementwiseDiffusion_computeInverseDiagonalOperatorValues_macro_2D.cpp - noarch/P2ElementwiseDiffusion_computeInverseDiagonalOperatorValues_macro_3D.cpp - noarch/P2ElementwiseDiffusion_toMatrix_macro_2D.cpp - noarch/P2ElementwiseDiffusion_toMatrix_macro_3D.cpp + noarch/P1ElementwiseDiffusion_apply_P1ElementwiseDiffusion_macro_2D.cpp + noarch/P1ElementwiseDiffusion_apply_P1ElementwiseDiffusion_macro_3D.cpp + noarch/P1ElementwiseDiffusion_computeInverseDiagonalOperatorValues_P1ElementwiseDiffusion_macro_2D.cpp + noarch/P1ElementwiseDiffusion_computeInverseDiagonalOperatorValues_P1ElementwiseDiffusion_macro_3D.cpp + noarch/P1ElementwiseDiffusion_toMatrix_P1ElementwiseDiffusion_macro_2D.cpp + noarch/P1ElementwiseDiffusion_toMatrix_P1ElementwiseDiffusion_macro_3D.cpp + noarch/P2ElementwiseDiffusionAnnulusMap_apply_P2ElementwiseDiffusionAnnulusMap_macro_2D.cpp + noarch/P2ElementwiseDiffusionAnnulusMap_computeInverseDiagonalOperatorValues_P2ElementwiseDiffusionAnnulusMap_macro_2D.cpp + noarch/P2ElementwiseDiffusionAnnulusMap_toMatrix_P2ElementwiseDiffusionAnnulusMap_macro_2D.cpp + noarch/P2ElementwiseDiffusionIcosahedralShellMap_apply_P2ElementwiseDiffusionIcosahedralShellMap_macro_3D.cpp + noarch/P2ElementwiseDiffusionIcosahedralShellMap_computeInverseDiagonalOperatorValues_P2ElementwiseDiffusionIcosahedralShellMap_macro_3D.cpp + noarch/P2ElementwiseDiffusionIcosahedralShellMap_toMatrix_P2ElementwiseDiffusionIcosahedralShellMap_macro_3D.cpp + noarch/P2ElementwiseDiffusion_apply_P2ElementwiseDiffusion_macro_2D.cpp + noarch/P2ElementwiseDiffusion_apply_P2ElementwiseDiffusion_macro_3D.cpp + noarch/P2ElementwiseDiffusion_computeInverseDiagonalOperatorValues_P2ElementwiseDiffusion_macro_2D.cpp + noarch/P2ElementwiseDiffusion_computeInverseDiagonalOperatorValues_P2ElementwiseDiffusion_macro_3D.cpp + noarch/P2ElementwiseDiffusion_toMatrix_P2ElementwiseDiffusion_macro_2D.cpp + noarch/P2ElementwiseDiffusion_toMatrix_P2ElementwiseDiffusion_macro_3D.cpp ) endif() diff --git a/operators/diffusion/P1ElementwiseDiffusion.cpp b/operators/diffusion/P1ElementwiseDiffusion.cpp index f61cda10..48131e46 100644 --- a/operators/diffusion/P1ElementwiseDiffusion.cpp +++ b/operators/diffusion/P1ElementwiseDiffusion.cpp @@ -126,7 +126,7 @@ void P1ElementwiseDiffusion::apply( const P1Function< real_t >& src, this->timingTree_->start( "kernel" ); - apply_macro_3D( + apply_P1ElementwiseDiffusion_macro_3D( _data_dst, _data_src, @@ -144,6 +144,7 @@ void P1ElementwiseDiffusion::apply( const P1Function< real_t >& src, macro_vertex_coord_id_3comp2, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } @@ -191,7 +192,7 @@ void P1ElementwiseDiffusion::apply( const P1Function< real_t >& src, this->timingTree_->start( "kernel" ); - apply_macro_2D( + apply_P1ElementwiseDiffusion_macro_2D( _data_dst, _data_src, @@ -203,6 +204,7 @@ void P1ElementwiseDiffusion::apply( const P1Function< real_t >& src, macro_vertex_coord_id_2comp1, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } @@ -263,7 +265,7 @@ void P1ElementwiseDiffusion::toMatrix( const std::shared_ptr< SparseMatrixProxy this->timingTree_->start( "kernel" ); - toMatrix_macro_3D( + toMatrix_P1ElementwiseDiffusion_macro_3D( _data_dst, _data_src, @@ -282,6 +284,7 @@ void P1ElementwiseDiffusion::toMatrix( const std::shared_ptr< SparseMatrixProxy mat, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } } @@ -310,7 +313,7 @@ void P1ElementwiseDiffusion::toMatrix( const std::shared_ptr< SparseMatrixProxy this->timingTree_->start( "kernel" ); - toMatrix_macro_2D( + toMatrix_P1ElementwiseDiffusion_macro_2D( _data_dst, _data_src, @@ -323,6 +326,7 @@ void P1ElementwiseDiffusion::toMatrix( const std::shared_ptr< SparseMatrixProxy mat, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } } @@ -371,7 +375,7 @@ void P1ElementwiseDiffusion::computeInverseDiagonalOperatorValues() this->timingTree_->start( "kernel" ); - computeInverseDiagonalOperatorValues_macro_3D( + computeInverseDiagonalOperatorValues_P1ElementwiseDiffusion_macro_3D( _data_invDiag_, macro_vertex_coord_id_0comp0, @@ -388,6 +392,7 @@ void P1ElementwiseDiffusion::computeInverseDiagonalOperatorValues() macro_vertex_coord_id_3comp2, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } @@ -426,7 +431,7 @@ void P1ElementwiseDiffusion::computeInverseDiagonalOperatorValues() this->timingTree_->start( "kernel" ); - computeInverseDiagonalOperatorValues_macro_2D( + computeInverseDiagonalOperatorValues_P1ElementwiseDiffusion_macro_2D( _data_invDiag_, macro_vertex_coord_id_0comp0, @@ -437,6 +442,7 @@ void P1ElementwiseDiffusion::computeInverseDiagonalOperatorValues() macro_vertex_coord_id_2comp1, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } diff --git a/operators/diffusion/P1ElementwiseDiffusion.hpp b/operators/diffusion/P1ElementwiseDiffusion.hpp index 3829a003..af6e18e0 100644 --- a/operators/diffusion/P1ElementwiseDiffusion.hpp +++ b/operators/diffusion/P1ElementwiseDiffusion.hpp @@ -29,6 +29,7 @@ #include "core/DataTypes.h" #include "hyteg/LikwidWrapper.hpp" +#include "hyteg/boundary/BoundaryConditions.hpp" #include "hyteg/communication/Syncing.hpp" #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp" #include "hyteg/operators/Operator.hpp" @@ -36,6 +37,7 @@ #include "hyteg/primitivestorage/PrimitiveStorage.hpp" #include "hyteg/solvers/Smoothables.hpp" #include "hyteg/sparseassembly/SparseMatrixProxy.hpp" +#include "hyteg/types/types.hpp" #define FUNC_PREFIX @@ -82,120 +84,149 @@ class P1ElementwiseDiffusion : public Operator< P1Function< real_t >, P1Function protected: private: - /// Kernel type: apply + /// Integral: P1ElementwiseDiffusion + /// - volume element: triangle, dim: 2, vertices: 3, spacedim: 2 + /// - kernel type: apply + /// - loop strategy: CUBES /// - quadrature rule: Centroid rule | points: 1, degree: 1 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 49 49 12 0 0 0 0 1 - void apply_macro_2D( real_t* RESTRICT _data_dst, - real_t* RESTRICT _data_src, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; - /// Kernel type: apply + void apply_P1ElementwiseDiffusion_macro_2D( real_t* RESTRICT _data_dst, + real_t* RESTRICT _data_src, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; + + /// Integral: P1ElementwiseDiffusion + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: apply + /// - loop strategy: CUBES /// - quadrature rule: Keast 0 | points: 1, degree: 1 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 135 123 36 0 0 0 0 1 - void apply_macro_3D( real_t* RESTRICT _data_dst, - real_t* RESTRICT _data_src, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; - /// Kernel type: toMatrix + void apply_P1ElementwiseDiffusion_macro_3D( real_t* RESTRICT _data_dst, + real_t* RESTRICT _data_src, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; + + /// Integral: P1ElementwiseDiffusion + /// - volume element: triangle, dim: 2, vertices: 3, spacedim: 2 + /// - kernel type: toMatrix + /// - loop strategy: CUBES /// - quadrature rule: Centroid rule | points: 1, degree: 1 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 40 43 12 0 0 0 0 4 - void toMatrix_macro_2D( idx_t* RESTRICT _data_dst, - idx_t* RESTRICT _data_src, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - std::shared_ptr< SparseMatrixProxy > mat, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; - /// Kernel type: toMatrix + void toMatrix_P1ElementwiseDiffusion_macro_2D( idx_t* RESTRICT _data_dst, + idx_t* RESTRICT _data_src, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + std::shared_ptr< SparseMatrixProxy > mat, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; + + /// Integral: P1ElementwiseDiffusion + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: toMatrix + /// - loop strategy: CUBES /// - quadrature rule: Keast 0 | points: 1, degree: 1 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 119 113 36 0 0 0 0 4 - void toMatrix_macro_3D( idx_t* RESTRICT _data_dst, - idx_t* RESTRICT _data_src, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - std::shared_ptr< SparseMatrixProxy > mat, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; - /// Kernel type: computeInverseDiagonalOperatorValues + void toMatrix_P1ElementwiseDiffusion_macro_3D( idx_t* RESTRICT _data_dst, + idx_t* RESTRICT _data_src, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + std::shared_ptr< SparseMatrixProxy > mat, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; + + /// Integral: P1ElementwiseDiffusion + /// - volume element: triangle, dim: 2, vertices: 3, spacedim: 2 + /// - kernel type: computeInverseDiagonalOperatorValues + /// - loop strategy: CUBES /// - quadrature rule: Centroid rule | points: 1, degree: 1 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 40 34 12 0 0 0 0 1 - void computeInverseDiagonalOperatorValues_macro_2D( real_t* RESTRICT _data_invDiag_, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; - /// Kernel type: computeInverseDiagonalOperatorValues + void computeInverseDiagonalOperatorValues_P1ElementwiseDiffusion_macro_2D( real_t* RESTRICT _data_invDiag_, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; + + /// Integral: P1ElementwiseDiffusion + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: computeInverseDiagonalOperatorValues + /// - loop strategy: CUBES /// - quadrature rule: Keast 0 | points: 1, degree: 1 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 111 89 36 0 0 0 0 1 - void computeInverseDiagonalOperatorValues_macro_3D( real_t* RESTRICT _data_invDiag_, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; + void computeInverseDiagonalOperatorValues_P1ElementwiseDiffusion_macro_3D( real_t* RESTRICT _data_invDiag_, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; std::shared_ptr< P1Function< real_t > > invDiag_; }; diff --git a/operators/diffusion/P2ElementwiseDiffusion.cpp b/operators/diffusion/P2ElementwiseDiffusion.cpp index 67280970..1a42c563 100644 --- a/operators/diffusion/P2ElementwiseDiffusion.cpp +++ b/operators/diffusion/P2ElementwiseDiffusion.cpp @@ -129,7 +129,7 @@ void P2ElementwiseDiffusion::apply( const P2Function< real_t >& src, this->timingTree_->start( "kernel" ); - apply_macro_3D( + apply_P2ElementwiseDiffusion_macro_3D( _data_dstEdge, _data_dstVertex, @@ -149,6 +149,7 @@ void P2ElementwiseDiffusion::apply( const P2Function< real_t >& src, macro_vertex_coord_id_3comp2, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } @@ -216,7 +217,7 @@ void P2ElementwiseDiffusion::apply( const P2Function< real_t >& src, this->timingTree_->start( "kernel" ); - apply_macro_2D( + apply_P2ElementwiseDiffusion_macro_2D( _data_dstEdge, _data_dstVertex, @@ -230,6 +231,7 @@ void P2ElementwiseDiffusion::apply( const P2Function< real_t >& src, macro_vertex_coord_id_2comp1, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } @@ -296,7 +298,7 @@ void P2ElementwiseDiffusion::toMatrix( const std::shared_ptr< SparseMatrixProxy this->timingTree_->start( "kernel" ); - toMatrix_macro_3D( + toMatrix_P2ElementwiseDiffusion_macro_3D( _data_dstEdge, _data_dstVertex, @@ -317,6 +319,7 @@ void P2ElementwiseDiffusion::toMatrix( const std::shared_ptr< SparseMatrixProxy mat, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } } @@ -347,7 +350,7 @@ void P2ElementwiseDiffusion::toMatrix( const std::shared_ptr< SparseMatrixProxy this->timingTree_->start( "kernel" ); - toMatrix_macro_2D( + toMatrix_P2ElementwiseDiffusion_macro_2D( _data_dstEdge, _data_dstVertex, @@ -362,6 +365,7 @@ void P2ElementwiseDiffusion::toMatrix( const std::shared_ptr< SparseMatrixProxy mat, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } } @@ -412,7 +416,7 @@ void P2ElementwiseDiffusion::computeInverseDiagonalOperatorValues() this->timingTree_->start( "kernel" ); - computeInverseDiagonalOperatorValues_macro_3D( + computeInverseDiagonalOperatorValues_P2ElementwiseDiffusion_macro_3D( _data_invDiag_Edge, _data_invDiag_Vertex, @@ -430,6 +434,7 @@ void P2ElementwiseDiffusion::computeInverseDiagonalOperatorValues() macro_vertex_coord_id_3comp2, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } @@ -472,7 +477,7 @@ void P2ElementwiseDiffusion::computeInverseDiagonalOperatorValues() this->timingTree_->start( "kernel" ); - computeInverseDiagonalOperatorValues_macro_2D( + computeInverseDiagonalOperatorValues_P2ElementwiseDiffusion_macro_2D( _data_invDiag_Edge, _data_invDiag_Vertex, @@ -484,6 +489,7 @@ void P2ElementwiseDiffusion::computeInverseDiagonalOperatorValues() macro_vertex_coord_id_2comp1, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } diff --git a/operators/diffusion/P2ElementwiseDiffusion.hpp b/operators/diffusion/P2ElementwiseDiffusion.hpp index 6eca940a..919440af 100644 --- a/operators/diffusion/P2ElementwiseDiffusion.hpp +++ b/operators/diffusion/P2ElementwiseDiffusion.hpp @@ -29,6 +29,7 @@ #include "core/DataTypes.h" #include "hyteg/LikwidWrapper.hpp" +#include "hyteg/boundary/BoundaryConditions.hpp" #include "hyteg/communication/Syncing.hpp" #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp" #include "hyteg/operators/Operator.hpp" @@ -36,6 +37,7 @@ #include "hyteg/primitivestorage/PrimitiveStorage.hpp" #include "hyteg/solvers/Smoothables.hpp" #include "hyteg/sparseassembly/SparseMatrixProxy.hpp" +#include "hyteg/types/types.hpp" #define FUNC_PREFIX @@ -82,130 +84,159 @@ class P2ElementwiseDiffusion : public Operator< P2Function< real_t >, P2Function protected: private: - /// Kernel type: apply + /// Integral: P2ElementwiseDiffusion + /// - volume element: triangle, dim: 2, vertices: 3, spacedim: 2 + /// - kernel type: apply + /// - loop strategy: CUBES /// - quadrature rule: Dunavant 2 | points: 3, degree: 2 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 215 310 12 0 0 0 0 1 - void apply_macro_2D( real_t* RESTRICT _data_dstEdge, - real_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_srcEdge, - real_t* RESTRICT _data_srcVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; - /// Kernel type: apply + void apply_P2ElementwiseDiffusion_macro_2D( real_t* RESTRICT _data_dstEdge, + real_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_srcEdge, + real_t* RESTRICT _data_srcVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; + + /// Integral: P2ElementwiseDiffusion + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: apply + /// - loop strategy: CUBES /// - quadrature rule: Hammer-Marlowe-Stroud 1 | points: 4, degree: 2 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 1086 1461 36 0 0 0 0 1 - void apply_macro_3D( real_t* RESTRICT _data_dstEdge, - real_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_srcEdge, - real_t* RESTRICT _data_srcVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; - /// Kernel type: toMatrix + void apply_P2ElementwiseDiffusion_macro_3D( real_t* RESTRICT _data_dstEdge, + real_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_srcEdge, + real_t* RESTRICT _data_srcVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; + + /// Integral: P2ElementwiseDiffusion + /// - volume element: triangle, dim: 2, vertices: 3, spacedim: 2 + /// - kernel type: toMatrix + /// - loop strategy: CUBES /// - quadrature rule: Dunavant 2 | points: 3, degree: 2 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 179 274 12 0 0 0 0 4 - void toMatrix_macro_2D( idx_t* RESTRICT _data_dstEdge, - idx_t* RESTRICT _data_dstVertex, - idx_t* RESTRICT _data_srcEdge, - idx_t* RESTRICT _data_srcVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - std::shared_ptr< SparseMatrixProxy > mat, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; - /// Kernel type: toMatrix + void toMatrix_P2ElementwiseDiffusion_macro_2D( idx_t* RESTRICT _data_dstEdge, + idx_t* RESTRICT _data_dstVertex, + idx_t* RESTRICT _data_srcEdge, + idx_t* RESTRICT _data_srcVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + std::shared_ptr< SparseMatrixProxy > mat, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; + + /// Integral: P2ElementwiseDiffusion + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: toMatrix + /// - loop strategy: CUBES /// - quadrature rule: Hammer-Marlowe-Stroud 1 | points: 4, degree: 2 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 986 1361 36 0 0 0 0 4 - void toMatrix_macro_3D( idx_t* RESTRICT _data_dstEdge, - idx_t* RESTRICT _data_dstVertex, - idx_t* RESTRICT _data_srcEdge, - idx_t* RESTRICT _data_srcVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - std::shared_ptr< SparseMatrixProxy > mat, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; - /// Kernel type: computeInverseDiagonalOperatorValues + void toMatrix_P2ElementwiseDiffusion_macro_3D( idx_t* RESTRICT _data_dstEdge, + idx_t* RESTRICT _data_dstVertex, + idx_t* RESTRICT _data_srcEdge, + idx_t* RESTRICT _data_srcVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + std::shared_ptr< SparseMatrixProxy > mat, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; + + /// Integral: P2ElementwiseDiffusion + /// - volume element: triangle, dim: 2, vertices: 3, spacedim: 2 + /// - kernel type: computeInverseDiagonalOperatorValues + /// - loop strategy: CUBES /// - quadrature rule: Dunavant 2 | points: 3, degree: 2 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 110 127 12 0 0 0 0 1 - void computeInverseDiagonalOperatorValues_macro_2D( real_t* RESTRICT _data_invDiag_Edge, - real_t* RESTRICT _data_invDiag_Vertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; - /// Kernel type: computeInverseDiagonalOperatorValues + void computeInverseDiagonalOperatorValues_P2ElementwiseDiffusion_macro_2D( real_t* RESTRICT _data_invDiag_Edge, + real_t* RESTRICT _data_invDiag_Vertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; + + /// Integral: P2ElementwiseDiffusion + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: computeInverseDiagonalOperatorValues + /// - loop strategy: CUBES /// - quadrature rule: Hammer-Marlowe-Stroud 1 | points: 4, degree: 2 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 381 497 36 0 0 0 0 1 - void computeInverseDiagonalOperatorValues_macro_3D( real_t* RESTRICT _data_invDiag_Edge, - real_t* RESTRICT _data_invDiag_Vertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; + void computeInverseDiagonalOperatorValues_P2ElementwiseDiffusion_macro_3D( real_t* RESTRICT _data_invDiag_Edge, + real_t* RESTRICT _data_invDiag_Vertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; std::shared_ptr< P2Function< real_t > > invDiag_; }; diff --git a/operators/diffusion/P2ElementwiseDiffusionAnnulusMap.cpp b/operators/diffusion/P2ElementwiseDiffusionAnnulusMap.cpp index 5733867e..8dd27ac9 100644 --- a/operators/diffusion/P2ElementwiseDiffusionAnnulusMap.cpp +++ b/operators/diffusion/P2ElementwiseDiffusionAnnulusMap.cpp @@ -144,7 +144,7 @@ void P2ElementwiseDiffusionAnnulusMap::apply( const P2Function< real_t >& src, this->timingTree_->start( "kernel" ); - apply_macro_2D( + apply_P2ElementwiseDiffusionAnnulusMap_macro_2D( _data_dstEdge, _data_dstVertex, @@ -166,6 +166,7 @@ void P2ElementwiseDiffusionAnnulusMap::apply( const P2Function< real_t >& src, refVertex_1, thrVertex_0, thrVertex_1 ); + this->timingTree_->stop( "kernel" ); } @@ -245,7 +246,7 @@ void P2ElementwiseDiffusionAnnulusMap::toMatrix( const std::shared_ptr< SparseMa this->timingTree_->start( "kernel" ); - toMatrix_macro_2D( + toMatrix_P2ElementwiseDiffusionAnnulusMap_macro_2D( _data_dstEdge, _data_dstVertex, @@ -268,6 +269,7 @@ void P2ElementwiseDiffusionAnnulusMap::toMatrix( const std::shared_ptr< SparseMa refVertex_1, thrVertex_0, thrVertex_1 ); + this->timingTree_->stop( "kernel" ); } } @@ -332,7 +334,7 @@ void P2ElementwiseDiffusionAnnulusMap::computeInverseDiagonalOperatorValues() this->timingTree_->start( "kernel" ); - computeInverseDiagonalOperatorValues_macro_2D( + computeInverseDiagonalOperatorValues_P2ElementwiseDiffusionAnnulusMap_macro_2D( _data_invDiag_Edge, _data_invDiag_Vertex, @@ -352,6 +354,7 @@ void P2ElementwiseDiffusionAnnulusMap::computeInverseDiagonalOperatorValues() refVertex_1, thrVertex_0, thrVertex_1 ); + this->timingTree_->stop( "kernel" ); } diff --git a/operators/diffusion/P2ElementwiseDiffusionAnnulusMap.hpp b/operators/diffusion/P2ElementwiseDiffusionAnnulusMap.hpp index 5a5fa0bf..f08fa6df 100644 --- a/operators/diffusion/P2ElementwiseDiffusionAnnulusMap.hpp +++ b/operators/diffusion/P2ElementwiseDiffusionAnnulusMap.hpp @@ -29,6 +29,7 @@ #include "core/DataTypes.h" #include "hyteg/LikwidWrapper.hpp" +#include "hyteg/boundary/BoundaryConditions.hpp" #include "hyteg/communication/Syncing.hpp" #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp" #include "hyteg/geometry/AnnulusMap.hpp" @@ -37,6 +38,7 @@ #include "hyteg/primitivestorage/PrimitiveStorage.hpp" #include "hyteg/solvers/Smoothables.hpp" #include "hyteg/sparseassembly/SparseMatrixProxy.hpp" +#include "hyteg/types/types.hpp" #define FUNC_PREFIX @@ -83,83 +85,97 @@ class P2ElementwiseDiffusionAnnulusMap : public Operator< P2Function< real_t >, protected: private: - /// Kernel type: apply + /// Integral: P2ElementwiseDiffusionAnnulusMap + /// - volume element: triangle, dim: 2, vertices: 3, spacedim: 2 + /// - kernel type: apply + /// - loop strategy: SAWTOOTH /// - quadrature rule: Dunavant 3 | points: 4, degree: 3 + /// - blending map: AnnulusMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 385 607 17 8 0 0 0 1 - void apply_macro_2D( real_t* RESTRICT _data_dstEdge, - real_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_srcEdge, - real_t* RESTRICT _data_srcVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t refVertex_0, - real_t refVertex_1, - real_t thrVertex_0, - real_t thrVertex_1 ) const; - /// Kernel type: toMatrix + void apply_P2ElementwiseDiffusionAnnulusMap_macro_2D( real_t* RESTRICT _data_dstEdge, + real_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_srcEdge, + real_t* RESTRICT _data_srcVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t refVertex_0, + real_t refVertex_1, + real_t thrVertex_0, + real_t thrVertex_1 ) const; + + /// Integral: P2ElementwiseDiffusionAnnulusMap + /// - volume element: triangle, dim: 2, vertices: 3, spacedim: 2 + /// - kernel type: toMatrix + /// - loop strategy: SAWTOOTH /// - quadrature rule: Dunavant 3 | points: 4, degree: 3 + /// - blending map: AnnulusMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 349 571 17 8 0 0 0 4 - void toMatrix_macro_2D( idx_t* RESTRICT _data_dstEdge, - idx_t* RESTRICT _data_dstVertex, - idx_t* RESTRICT _data_srcEdge, - idx_t* RESTRICT _data_srcVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - std::shared_ptr< SparseMatrixProxy > mat, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t refVertex_0, - real_t refVertex_1, - real_t thrVertex_0, - real_t thrVertex_1 ) const; - /// Kernel type: computeInverseDiagonalOperatorValues + void toMatrix_P2ElementwiseDiffusionAnnulusMap_macro_2D( idx_t* RESTRICT _data_dstEdge, + idx_t* RESTRICT _data_dstVertex, + idx_t* RESTRICT _data_srcEdge, + idx_t* RESTRICT _data_srcVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + std::shared_ptr< SparseMatrixProxy > mat, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t refVertex_0, + real_t refVertex_1, + real_t thrVertex_0, + real_t thrVertex_1 ) const; + + /// Integral: P2ElementwiseDiffusionAnnulusMap + /// - volume element: triangle, dim: 2, vertices: 3, spacedim: 2 + /// - kernel type: computeInverseDiagonalOperatorValues + /// - loop strategy: SAWTOOTH /// - quadrature rule: Dunavant 3 | points: 4, degree: 3 + /// - blending map: AnnulusMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 250 391 17 8 0 0 0 1 - void computeInverseDiagonalOperatorValues_macro_2D( real_t* RESTRICT _data_invDiag_Edge, - real_t* RESTRICT _data_invDiag_Vertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t refVertex_0, - real_t refVertex_1, - real_t thrVertex_0, - real_t thrVertex_1 ) const; + void computeInverseDiagonalOperatorValues_P2ElementwiseDiffusionAnnulusMap_macro_2D( real_t* RESTRICT _data_invDiag_Edge, + real_t* RESTRICT _data_invDiag_Vertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t refVertex_0, + real_t refVertex_1, + real_t thrVertex_0, + real_t thrVertex_1 ) const; std::shared_ptr< P2Function< real_t > > invDiag_; }; diff --git a/operators/diffusion/P2ElementwiseDiffusionIcosahedralShellMap.cpp b/operators/diffusion/P2ElementwiseDiffusionIcosahedralShellMap.cpp index 4f8d29bc..f639eaae 100644 --- a/operators/diffusion/P2ElementwiseDiffusionIcosahedralShellMap.cpp +++ b/operators/diffusion/P2ElementwiseDiffusionIcosahedralShellMap.cpp @@ -147,7 +147,7 @@ void P2ElementwiseDiffusionIcosahedralShellMap::apply( const P2Function< real_t this->timingTree_->start( "kernel" ); - apply_macro_3D( + apply_P2ElementwiseDiffusionIcosahedralShellMap_macro_3D( _data_dstEdge, _data_dstVertex, @@ -181,6 +181,7 @@ void P2ElementwiseDiffusionIcosahedralShellMap::apply( const P2Function< real_t thrVertex_0, thrVertex_1, thrVertex_2 ); + this->timingTree_->stop( "kernel" ); } @@ -272,7 +273,7 @@ void P2ElementwiseDiffusionIcosahedralShellMap::toMatrix( const std::shared_ptr< this->timingTree_->start( "kernel" ); - toMatrix_macro_3D( + toMatrix_P2ElementwiseDiffusionIcosahedralShellMap_macro_3D( _data_dstEdge, _data_dstVertex, @@ -307,6 +308,7 @@ void P2ElementwiseDiffusionIcosahedralShellMap::toMatrix( const std::shared_ptr< thrVertex_0, thrVertex_1, thrVertex_2 ); + this->timingTree_->stop( "kernel" ); } } @@ -382,7 +384,7 @@ void P2ElementwiseDiffusionIcosahedralShellMap::computeInverseDiagonalOperatorVa this->timingTree_->start( "kernel" ); - computeInverseDiagonalOperatorValues_macro_3D( + computeInverseDiagonalOperatorValues_P2ElementwiseDiffusionIcosahedralShellMap_macro_3D( _data_invDiag_Edge, _data_invDiag_Vertex, @@ -414,6 +416,7 @@ void P2ElementwiseDiffusionIcosahedralShellMap::computeInverseDiagonalOperatorVa thrVertex_0, thrVertex_1, thrVertex_2 ); + this->timingTree_->stop( "kernel" ); } diff --git a/operators/diffusion/P2ElementwiseDiffusionIcosahedralShellMap.hpp b/operators/diffusion/P2ElementwiseDiffusionIcosahedralShellMap.hpp index e5ef874b..70628126 100644 --- a/operators/diffusion/P2ElementwiseDiffusionIcosahedralShellMap.hpp +++ b/operators/diffusion/P2ElementwiseDiffusionIcosahedralShellMap.hpp @@ -29,6 +29,7 @@ #include "core/DataTypes.h" #include "hyteg/LikwidWrapper.hpp" +#include "hyteg/boundary/BoundaryConditions.hpp" #include "hyteg/communication/Syncing.hpp" #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp" #include "hyteg/geometry/IcosahedralShellMap.hpp" @@ -37,6 +38,7 @@ #include "hyteg/primitivestorage/PrimitiveStorage.hpp" #include "hyteg/solvers/Smoothables.hpp" #include "hyteg/sparseassembly/SparseMatrixProxy.hpp" +#include "hyteg/types/types.hpp" #define FUNC_PREFIX @@ -85,119 +87,134 @@ class P2ElementwiseDiffusionIcosahedralShellMap : public Operator< P2Function< r protected: private: - /// Kernel type: apply + /// Integral: P2ElementwiseDiffusionIcosahedralShellMap + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: apply + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 3 | points: 5, degree: 3 + /// - blending map: IcosahedralShellMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 1729 2398 42 5 0 0 0 1 - void apply_macro_3D( real_t* RESTRICT _data_dstEdge, - real_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_srcEdge, - real_t* RESTRICT _data_srcVertex, - real_t forVertex_0, - real_t forVertex_1, - real_t forVertex_2, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t rayVertex_2, - real_t refVertex_0, - real_t refVertex_1, - real_t refVertex_2, - real_t thrVertex_0, - real_t thrVertex_1, - real_t thrVertex_2 ) const; - /// Kernel type: toMatrix + void apply_P2ElementwiseDiffusionIcosahedralShellMap_macro_3D( real_t* RESTRICT _data_dstEdge, + real_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_srcEdge, + real_t* RESTRICT _data_srcVertex, + real_t forVertex_0, + real_t forVertex_1, + real_t forVertex_2, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t rayVertex_2, + real_t refVertex_0, + real_t refVertex_1, + real_t refVertex_2, + real_t thrVertex_0, + real_t thrVertex_1, + real_t thrVertex_2 ) const; + + /// Integral: P2ElementwiseDiffusionIcosahedralShellMap + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: toMatrix + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 3 | points: 5, degree: 3 + /// - blending map: IcosahedralShellMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 1629 2298 42 5 0 0 0 4 - void toMatrix_macro_3D( idx_t* RESTRICT _data_dstEdge, - idx_t* RESTRICT _data_dstVertex, - idx_t* RESTRICT _data_srcEdge, - idx_t* RESTRICT _data_srcVertex, - real_t forVertex_0, - real_t forVertex_1, - real_t forVertex_2, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - std::shared_ptr< SparseMatrixProxy > mat, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t rayVertex_2, - real_t refVertex_0, - real_t refVertex_1, - real_t refVertex_2, - real_t thrVertex_0, - real_t thrVertex_1, - real_t thrVertex_2 ) const; - /// Kernel type: computeInverseDiagonalOperatorValues + void toMatrix_P2ElementwiseDiffusionIcosahedralShellMap_macro_3D( idx_t* RESTRICT _data_dstEdge, + idx_t* RESTRICT _data_dstVertex, + idx_t* RESTRICT _data_srcEdge, + idx_t* RESTRICT _data_srcVertex, + real_t forVertex_0, + real_t forVertex_1, + real_t forVertex_2, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + std::shared_ptr< SparseMatrixProxy > mat, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t rayVertex_2, + real_t refVertex_0, + real_t refVertex_1, + real_t refVertex_2, + real_t thrVertex_0, + real_t thrVertex_1, + real_t thrVertex_2 ) const; + + /// Integral: P2ElementwiseDiffusionIcosahedralShellMap + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: computeInverseDiagonalOperatorValues + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 3 | points: 5, degree: 3 + /// - blending map: IcosahedralShellMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 1009 1398 42 5 0 0 0 1 - void computeInverseDiagonalOperatorValues_macro_3D( real_t* RESTRICT _data_invDiag_Edge, - real_t* RESTRICT _data_invDiag_Vertex, - real_t forVertex_0, - real_t forVertex_1, - real_t forVertex_2, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t rayVertex_2, - real_t refVertex_0, - real_t refVertex_1, - real_t refVertex_2, - real_t thrVertex_0, - real_t thrVertex_1, - real_t thrVertex_2 ) const; + void computeInverseDiagonalOperatorValues_P2ElementwiseDiffusionIcosahedralShellMap_macro_3D( + real_t* RESTRICT _data_invDiag_Edge, + real_t* RESTRICT _data_invDiag_Vertex, + real_t forVertex_0, + real_t forVertex_1, + real_t forVertex_2, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t rayVertex_2, + real_t refVertex_0, + real_t refVertex_1, + real_t refVertex_2, + real_t thrVertex_0, + real_t thrVertex_1, + real_t thrVertex_2 ) const; std::shared_ptr< P2Function< real_t > > invDiag_; }; diff --git a/operators/diffusion/avx/P1ElementwiseDiffusion_apply_macro_2D.cpp b/operators/diffusion/avx/P1ElementwiseDiffusion_apply_P1ElementwiseDiffusion_macro_2D.cpp similarity index 99% rename from operators/diffusion/avx/P1ElementwiseDiffusion_apply_macro_2D.cpp rename to operators/diffusion/avx/P1ElementwiseDiffusion_apply_P1ElementwiseDiffusion_macro_2D.cpp index d9b0c4db..60b87ab9 100644 --- a/operators/diffusion/avx/P1ElementwiseDiffusion_apply_macro_2D.cpp +++ b/operators/diffusion/avx/P1ElementwiseDiffusion_apply_P1ElementwiseDiffusion_macro_2D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P1ElementwiseDiffusion::apply_macro_2D( real_t * RESTRICT _data_dst, real_t * RESTRICT _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P1ElementwiseDiffusion::apply_P1ElementwiseDiffusion_macro_2D( real_t * RESTRICT _data_dst, real_t * RESTRICT _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t tmp_coords_jac_0_BLUE = 1.0 / (micro_edges_per_macro_edge_float)*1.0; diff --git a/operators/diffusion/avx/P1ElementwiseDiffusion_apply_macro_3D.cpp b/operators/diffusion/avx/P1ElementwiseDiffusion_apply_P1ElementwiseDiffusion_macro_3D.cpp similarity index 99% rename from operators/diffusion/avx/P1ElementwiseDiffusion_apply_macro_3D.cpp rename to operators/diffusion/avx/P1ElementwiseDiffusion_apply_P1ElementwiseDiffusion_macro_3D.cpp index f6023630..b41a0d92 100644 --- a/operators/diffusion/avx/P1ElementwiseDiffusion_apply_macro_3D.cpp +++ b/operators/diffusion/avx/P1ElementwiseDiffusion_apply_P1ElementwiseDiffusion_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P1ElementwiseDiffusion::apply_macro_3D( real_t * RESTRICT _data_dst, real_t * RESTRICT _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P1ElementwiseDiffusion::apply_P1ElementwiseDiffusion_macro_3D( real_t * RESTRICT _data_dst, real_t * RESTRICT _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t tmp_coords_jac_0_GREEN_DOWN = 1.0 / (micro_edges_per_macro_edge_float)*1.0; diff --git a/operators/diffusion/avx/P1ElementwiseDiffusion_computeInverseDiagonalOperatorValues_macro_2D.cpp b/operators/diffusion/avx/P1ElementwiseDiffusion_computeInverseDiagonalOperatorValues_P1ElementwiseDiffusion_macro_2D.cpp similarity index 99% rename from operators/diffusion/avx/P1ElementwiseDiffusion_computeInverseDiagonalOperatorValues_macro_2D.cpp rename to operators/diffusion/avx/P1ElementwiseDiffusion_computeInverseDiagonalOperatorValues_P1ElementwiseDiffusion_macro_2D.cpp index 0f3d04a1..7f00b2b8 100644 --- a/operators/diffusion/avx/P1ElementwiseDiffusion_computeInverseDiagonalOperatorValues_macro_2D.cpp +++ b/operators/diffusion/avx/P1ElementwiseDiffusion_computeInverseDiagonalOperatorValues_P1ElementwiseDiffusion_macro_2D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P1ElementwiseDiffusion::computeInverseDiagonalOperatorValues_macro_2D( real_t * RESTRICT _data_invDiag_, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P1ElementwiseDiffusion::computeInverseDiagonalOperatorValues_P1ElementwiseDiffusion_macro_2D( real_t * RESTRICT _data_invDiag_, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t tmp_coords_jac_0_BLUE = 1.0 / (micro_edges_per_macro_edge_float)*1.0; diff --git a/operators/diffusion/avx/P1ElementwiseDiffusion_computeInverseDiagonalOperatorValues_macro_3D.cpp b/operators/diffusion/avx/P1ElementwiseDiffusion_computeInverseDiagonalOperatorValues_P1ElementwiseDiffusion_macro_3D.cpp similarity index 99% rename from operators/diffusion/avx/P1ElementwiseDiffusion_computeInverseDiagonalOperatorValues_macro_3D.cpp rename to operators/diffusion/avx/P1ElementwiseDiffusion_computeInverseDiagonalOperatorValues_P1ElementwiseDiffusion_macro_3D.cpp index afaf36e1..9d11044c 100644 --- a/operators/diffusion/avx/P1ElementwiseDiffusion_computeInverseDiagonalOperatorValues_macro_3D.cpp +++ b/operators/diffusion/avx/P1ElementwiseDiffusion_computeInverseDiagonalOperatorValues_P1ElementwiseDiffusion_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P1ElementwiseDiffusion::computeInverseDiagonalOperatorValues_macro_3D( real_t * RESTRICT _data_invDiag_, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P1ElementwiseDiffusion::computeInverseDiagonalOperatorValues_P1ElementwiseDiffusion_macro_3D( real_t * RESTRICT _data_invDiag_, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t tmp_coords_jac_0_GREEN_DOWN = 1.0 / (micro_edges_per_macro_edge_float)*1.0; diff --git a/operators/diffusion/avx/P2ElementwiseDiffusionAnnulusMap_apply_macro_2D.cpp b/operators/diffusion/avx/P2ElementwiseDiffusionAnnulusMap_apply_P2ElementwiseDiffusionAnnulusMap_macro_2D.cpp similarity index 99% rename from operators/diffusion/avx/P2ElementwiseDiffusionAnnulusMap_apply_macro_2D.cpp rename to operators/diffusion/avx/P2ElementwiseDiffusionAnnulusMap_apply_P2ElementwiseDiffusionAnnulusMap_macro_2D.cpp index 3cc11289..ac0e0104 100644 --- a/operators/diffusion/avx/P2ElementwiseDiffusionAnnulusMap_apply_macro_2D.cpp +++ b/operators/diffusion/avx/P2ElementwiseDiffusionAnnulusMap_apply_P2ElementwiseDiffusionAnnulusMap_macro_2D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseDiffusionAnnulusMap::apply_macro_2D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const +void P2ElementwiseDiffusionAnnulusMap::apply_P2ElementwiseDiffusionAnnulusMap_macro_2D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const { { const real_t tmp_coords_jac_0_GRAY = 1.0 / (micro_edges_per_macro_edge_float)*1.0; diff --git a/operators/diffusion/avx/P2ElementwiseDiffusionAnnulusMap_computeInverseDiagonalOperatorValues_macro_2D.cpp b/operators/diffusion/avx/P2ElementwiseDiffusionAnnulusMap_computeInverseDiagonalOperatorValues_P2ElementwiseDiffusionAnnulusMap_macro_2D.cpp similarity index 99% rename from operators/diffusion/avx/P2ElementwiseDiffusionAnnulusMap_computeInverseDiagonalOperatorValues_macro_2D.cpp rename to operators/diffusion/avx/P2ElementwiseDiffusionAnnulusMap_computeInverseDiagonalOperatorValues_P2ElementwiseDiffusionAnnulusMap_macro_2D.cpp index 2b6fdcb8..bdcda6ad 100644 --- a/operators/diffusion/avx/P2ElementwiseDiffusionAnnulusMap_computeInverseDiagonalOperatorValues_macro_2D.cpp +++ b/operators/diffusion/avx/P2ElementwiseDiffusionAnnulusMap_computeInverseDiagonalOperatorValues_P2ElementwiseDiffusionAnnulusMap_macro_2D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseDiffusionAnnulusMap::computeInverseDiagonalOperatorValues_macro_2D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const +void P2ElementwiseDiffusionAnnulusMap::computeInverseDiagonalOperatorValues_P2ElementwiseDiffusionAnnulusMap_macro_2D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const { { const real_t tmp_coords_jac_0_GRAY = 1.0 / (micro_edges_per_macro_edge_float)*1.0; diff --git a/operators/diffusion/avx/P2ElementwiseDiffusionIcosahedralShellMap_apply_macro_3D.cpp b/operators/diffusion/avx/P2ElementwiseDiffusionIcosahedralShellMap_apply_P2ElementwiseDiffusionIcosahedralShellMap_macro_3D.cpp similarity index 99% rename from operators/diffusion/avx/P2ElementwiseDiffusionIcosahedralShellMap_apply_macro_3D.cpp rename to operators/diffusion/avx/P2ElementwiseDiffusionIcosahedralShellMap_apply_P2ElementwiseDiffusionIcosahedralShellMap_macro_3D.cpp index 371da2f5..79e9770d 100644 --- a/operators/diffusion/avx/P2ElementwiseDiffusionIcosahedralShellMap_apply_macro_3D.cpp +++ b/operators/diffusion/avx/P2ElementwiseDiffusionIcosahedralShellMap_apply_P2ElementwiseDiffusionIcosahedralShellMap_macro_3D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseDiffusionIcosahedralShellMap::apply_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const +void P2ElementwiseDiffusionIcosahedralShellMap::apply_P2ElementwiseDiffusionIcosahedralShellMap_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const { { const real_t tmp_coords_jac_0_WHITE_UP = 1.0 / (micro_edges_per_macro_edge_float)*1.0; diff --git a/operators/diffusion/avx/P2ElementwiseDiffusionIcosahedralShellMap_computeInverseDiagonalOperatorValues_macro_3D.cpp b/operators/diffusion/avx/P2ElementwiseDiffusionIcosahedralShellMap_computeInverseDiagonalOperatorValues_P2ElementwiseDiffusionIcosahedralShellMap_macro_3D.cpp similarity index 99% rename from operators/diffusion/avx/P2ElementwiseDiffusionIcosahedralShellMap_computeInverseDiagonalOperatorValues_macro_3D.cpp rename to operators/diffusion/avx/P2ElementwiseDiffusionIcosahedralShellMap_computeInverseDiagonalOperatorValues_P2ElementwiseDiffusionIcosahedralShellMap_macro_3D.cpp index 334bc530..911470e4 100644 --- a/operators/diffusion/avx/P2ElementwiseDiffusionIcosahedralShellMap_computeInverseDiagonalOperatorValues_macro_3D.cpp +++ b/operators/diffusion/avx/P2ElementwiseDiffusionIcosahedralShellMap_computeInverseDiagonalOperatorValues_P2ElementwiseDiffusionIcosahedralShellMap_macro_3D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseDiffusionIcosahedralShellMap::computeInverseDiagonalOperatorValues_macro_3D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const +void P2ElementwiseDiffusionIcosahedralShellMap::computeInverseDiagonalOperatorValues_P2ElementwiseDiffusionIcosahedralShellMap_macro_3D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const { { const real_t tmp_coords_jac_0_WHITE_UP = 1.0 / (micro_edges_per_macro_edge_float)*1.0; diff --git a/operators/diffusion/avx/P2ElementwiseDiffusion_apply_macro_2D.cpp b/operators/diffusion/avx/P2ElementwiseDiffusion_apply_P2ElementwiseDiffusion_macro_2D.cpp similarity index 99% rename from operators/diffusion/avx/P2ElementwiseDiffusion_apply_macro_2D.cpp rename to operators/diffusion/avx/P2ElementwiseDiffusion_apply_P2ElementwiseDiffusion_macro_2D.cpp index 35800f77..631a97d6 100644 --- a/operators/diffusion/avx/P2ElementwiseDiffusion_apply_macro_2D.cpp +++ b/operators/diffusion/avx/P2ElementwiseDiffusion_apply_P2ElementwiseDiffusion_macro_2D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseDiffusion::apply_macro_2D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseDiffusion::apply_P2ElementwiseDiffusion_macro_2D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t tmp_coords_jac_0_BLUE = 1.0 / (micro_edges_per_macro_edge_float)*1.0; diff --git a/operators/diffusion/avx/P2ElementwiseDiffusion_apply_macro_3D.cpp b/operators/diffusion/avx/P2ElementwiseDiffusion_apply_P2ElementwiseDiffusion_macro_3D.cpp similarity index 99% rename from operators/diffusion/avx/P2ElementwiseDiffusion_apply_macro_3D.cpp rename to operators/diffusion/avx/P2ElementwiseDiffusion_apply_P2ElementwiseDiffusion_macro_3D.cpp index 5420835a..50b747de 100644 --- a/operators/diffusion/avx/P2ElementwiseDiffusion_apply_macro_3D.cpp +++ b/operators/diffusion/avx/P2ElementwiseDiffusion_apply_P2ElementwiseDiffusion_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseDiffusion::apply_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseDiffusion::apply_P2ElementwiseDiffusion_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t tmp_coords_jac_0_GREEN_DOWN = 1.0 / (micro_edges_per_macro_edge_float)*1.0; diff --git a/operators/diffusion/avx/P2ElementwiseDiffusion_computeInverseDiagonalOperatorValues_macro_2D.cpp b/operators/diffusion/avx/P2ElementwiseDiffusion_computeInverseDiagonalOperatorValues_P2ElementwiseDiffusion_macro_2D.cpp similarity index 99% rename from operators/diffusion/avx/P2ElementwiseDiffusion_computeInverseDiagonalOperatorValues_macro_2D.cpp rename to operators/diffusion/avx/P2ElementwiseDiffusion_computeInverseDiagonalOperatorValues_P2ElementwiseDiffusion_macro_2D.cpp index 96c7ff88..6438fa3d 100644 --- a/operators/diffusion/avx/P2ElementwiseDiffusion_computeInverseDiagonalOperatorValues_macro_2D.cpp +++ b/operators/diffusion/avx/P2ElementwiseDiffusion_computeInverseDiagonalOperatorValues_P2ElementwiseDiffusion_macro_2D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseDiffusion::computeInverseDiagonalOperatorValues_macro_2D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseDiffusion::computeInverseDiagonalOperatorValues_P2ElementwiseDiffusion_macro_2D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t tmp_coords_jac_0_BLUE = 1.0 / (micro_edges_per_macro_edge_float)*1.0; diff --git a/operators/diffusion/avx/P2ElementwiseDiffusion_computeInverseDiagonalOperatorValues_macro_3D.cpp b/operators/diffusion/avx/P2ElementwiseDiffusion_computeInverseDiagonalOperatorValues_P2ElementwiseDiffusion_macro_3D.cpp similarity index 99% rename from operators/diffusion/avx/P2ElementwiseDiffusion_computeInverseDiagonalOperatorValues_macro_3D.cpp rename to operators/diffusion/avx/P2ElementwiseDiffusion_computeInverseDiagonalOperatorValues_P2ElementwiseDiffusion_macro_3D.cpp index 114ae31f..0b778cc7 100644 --- a/operators/diffusion/avx/P2ElementwiseDiffusion_computeInverseDiagonalOperatorValues_macro_3D.cpp +++ b/operators/diffusion/avx/P2ElementwiseDiffusion_computeInverseDiagonalOperatorValues_P2ElementwiseDiffusion_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseDiffusion::computeInverseDiagonalOperatorValues_macro_3D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseDiffusion::computeInverseDiagonalOperatorValues_P2ElementwiseDiffusion_macro_3D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t tmp_coords_jac_0_GREEN_DOWN = 1.0 / (micro_edges_per_macro_edge_float)*1.0; diff --git a/operators/diffusion/noarch/P1ElementwiseDiffusion_apply_macro_2D.cpp b/operators/diffusion/noarch/P1ElementwiseDiffusion_apply_P1ElementwiseDiffusion_macro_2D.cpp similarity index 98% rename from operators/diffusion/noarch/P1ElementwiseDiffusion_apply_macro_2D.cpp rename to operators/diffusion/noarch/P1ElementwiseDiffusion_apply_P1ElementwiseDiffusion_macro_2D.cpp index ae6d97a5..e6389f85 100644 --- a/operators/diffusion/noarch/P1ElementwiseDiffusion_apply_macro_2D.cpp +++ b/operators/diffusion/noarch/P1ElementwiseDiffusion_apply_P1ElementwiseDiffusion_macro_2D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P1ElementwiseDiffusion::apply_macro_2D( real_t * RESTRICT _data_dst, real_t * RESTRICT _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P1ElementwiseDiffusion::apply_P1ElementwiseDiffusion_macro_2D( real_t * RESTRICT _data_dst, real_t * RESTRICT _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t tmp_coords_jac_0_BLUE = 1.0 / (micro_edges_per_macro_edge_float)*1.0; diff --git a/operators/diffusion/noarch/P1ElementwiseDiffusion_apply_macro_3D.cpp b/operators/diffusion/noarch/P1ElementwiseDiffusion_apply_P1ElementwiseDiffusion_macro_3D.cpp similarity index 99% rename from operators/diffusion/noarch/P1ElementwiseDiffusion_apply_macro_3D.cpp rename to operators/diffusion/noarch/P1ElementwiseDiffusion_apply_P1ElementwiseDiffusion_macro_3D.cpp index ff9d3ee6..90d1f0f2 100644 --- a/operators/diffusion/noarch/P1ElementwiseDiffusion_apply_macro_3D.cpp +++ b/operators/diffusion/noarch/P1ElementwiseDiffusion_apply_P1ElementwiseDiffusion_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P1ElementwiseDiffusion::apply_macro_3D( real_t * RESTRICT _data_dst, real_t * RESTRICT _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P1ElementwiseDiffusion::apply_P1ElementwiseDiffusion_macro_3D( real_t * RESTRICT _data_dst, real_t * RESTRICT _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t tmp_coords_jac_0_GREEN_DOWN = 1.0 / (micro_edges_per_macro_edge_float)*1.0; diff --git a/operators/diffusion/noarch/P1ElementwiseDiffusion_computeInverseDiagonalOperatorValues_macro_2D.cpp b/operators/diffusion/noarch/P1ElementwiseDiffusion_computeInverseDiagonalOperatorValues_P1ElementwiseDiffusion_macro_2D.cpp similarity index 98% rename from operators/diffusion/noarch/P1ElementwiseDiffusion_computeInverseDiagonalOperatorValues_macro_2D.cpp rename to operators/diffusion/noarch/P1ElementwiseDiffusion_computeInverseDiagonalOperatorValues_P1ElementwiseDiffusion_macro_2D.cpp index 84296219..8d5dc9bb 100644 --- a/operators/diffusion/noarch/P1ElementwiseDiffusion_computeInverseDiagonalOperatorValues_macro_2D.cpp +++ b/operators/diffusion/noarch/P1ElementwiseDiffusion_computeInverseDiagonalOperatorValues_P1ElementwiseDiffusion_macro_2D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P1ElementwiseDiffusion::computeInverseDiagonalOperatorValues_macro_2D( real_t * RESTRICT _data_invDiag_, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P1ElementwiseDiffusion::computeInverseDiagonalOperatorValues_P1ElementwiseDiffusion_macro_2D( real_t * RESTRICT _data_invDiag_, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t tmp_coords_jac_0_BLUE = 1.0 / (micro_edges_per_macro_edge_float)*1.0; diff --git a/operators/diffusion/noarch/P1ElementwiseDiffusion_computeInverseDiagonalOperatorValues_macro_3D.cpp b/operators/diffusion/noarch/P1ElementwiseDiffusion_computeInverseDiagonalOperatorValues_P1ElementwiseDiffusion_macro_3D.cpp similarity index 99% rename from operators/diffusion/noarch/P1ElementwiseDiffusion_computeInverseDiagonalOperatorValues_macro_3D.cpp rename to operators/diffusion/noarch/P1ElementwiseDiffusion_computeInverseDiagonalOperatorValues_P1ElementwiseDiffusion_macro_3D.cpp index b6973f15..c4d8f511 100644 --- a/operators/diffusion/noarch/P1ElementwiseDiffusion_computeInverseDiagonalOperatorValues_macro_3D.cpp +++ b/operators/diffusion/noarch/P1ElementwiseDiffusion_computeInverseDiagonalOperatorValues_P1ElementwiseDiffusion_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P1ElementwiseDiffusion::computeInverseDiagonalOperatorValues_macro_3D( real_t * RESTRICT _data_invDiag_, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P1ElementwiseDiffusion::computeInverseDiagonalOperatorValues_P1ElementwiseDiffusion_macro_3D( real_t * RESTRICT _data_invDiag_, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t tmp_coords_jac_0_GREEN_DOWN = 1.0 / (micro_edges_per_macro_edge_float)*1.0; diff --git a/operators/diffusion/noarch/P1ElementwiseDiffusion_toMatrix_macro_2D.cpp b/operators/diffusion/noarch/P1ElementwiseDiffusion_toMatrix_P1ElementwiseDiffusion_macro_2D.cpp similarity index 98% rename from operators/diffusion/noarch/P1ElementwiseDiffusion_toMatrix_macro_2D.cpp rename to operators/diffusion/noarch/P1ElementwiseDiffusion_toMatrix_P1ElementwiseDiffusion_macro_2D.cpp index a8c60c59..3260de76 100644 --- a/operators/diffusion/noarch/P1ElementwiseDiffusion_toMatrix_macro_2D.cpp +++ b/operators/diffusion/noarch/P1ElementwiseDiffusion_toMatrix_P1ElementwiseDiffusion_macro_2D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P1ElementwiseDiffusion::toMatrix_macro_2D( idx_t * RESTRICT _data_dst, idx_t * RESTRICT _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P1ElementwiseDiffusion::toMatrix_P1ElementwiseDiffusion_macro_2D( idx_t * RESTRICT _data_dst, idx_t * RESTRICT _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t tmp_coords_jac_0_BLUE = 1.0 / (micro_edges_per_macro_edge_float)*1.0; diff --git a/operators/diffusion/noarch/P1ElementwiseDiffusion_toMatrix_macro_3D.cpp b/operators/diffusion/noarch/P1ElementwiseDiffusion_toMatrix_P1ElementwiseDiffusion_macro_3D.cpp similarity index 99% rename from operators/diffusion/noarch/P1ElementwiseDiffusion_toMatrix_macro_3D.cpp rename to operators/diffusion/noarch/P1ElementwiseDiffusion_toMatrix_P1ElementwiseDiffusion_macro_3D.cpp index 5190dd49..b4b506a9 100644 --- a/operators/diffusion/noarch/P1ElementwiseDiffusion_toMatrix_macro_3D.cpp +++ b/operators/diffusion/noarch/P1ElementwiseDiffusion_toMatrix_P1ElementwiseDiffusion_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P1ElementwiseDiffusion::toMatrix_macro_3D( idx_t * RESTRICT _data_dst, idx_t * RESTRICT _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P1ElementwiseDiffusion::toMatrix_P1ElementwiseDiffusion_macro_3D( idx_t * RESTRICT _data_dst, idx_t * RESTRICT _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t tmp_coords_jac_0_GREEN_DOWN = 1.0 / (micro_edges_per_macro_edge_float)*1.0; diff --git a/operators/diffusion/noarch/P2ElementwiseDiffusionAnnulusMap_apply_macro_2D.cpp b/operators/diffusion/noarch/P2ElementwiseDiffusionAnnulusMap_apply_P2ElementwiseDiffusionAnnulusMap_macro_2D.cpp similarity index 99% rename from operators/diffusion/noarch/P2ElementwiseDiffusionAnnulusMap_apply_macro_2D.cpp rename to operators/diffusion/noarch/P2ElementwiseDiffusionAnnulusMap_apply_P2ElementwiseDiffusionAnnulusMap_macro_2D.cpp index 365494c8..7896b446 100644 --- a/operators/diffusion/noarch/P2ElementwiseDiffusionAnnulusMap_apply_macro_2D.cpp +++ b/operators/diffusion/noarch/P2ElementwiseDiffusionAnnulusMap_apply_P2ElementwiseDiffusionAnnulusMap_macro_2D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseDiffusionAnnulusMap::apply_macro_2D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const +void P2ElementwiseDiffusionAnnulusMap::apply_P2ElementwiseDiffusionAnnulusMap_macro_2D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const { { const real_t tmp_coords_jac_0_GRAY = 1.0 / (micro_edges_per_macro_edge_float)*1.0; diff --git a/operators/diffusion/noarch/P2ElementwiseDiffusionAnnulusMap_computeInverseDiagonalOperatorValues_macro_2D.cpp b/operators/diffusion/noarch/P2ElementwiseDiffusionAnnulusMap_computeInverseDiagonalOperatorValues_P2ElementwiseDiffusionAnnulusMap_macro_2D.cpp similarity index 99% rename from operators/diffusion/noarch/P2ElementwiseDiffusionAnnulusMap_computeInverseDiagonalOperatorValues_macro_2D.cpp rename to operators/diffusion/noarch/P2ElementwiseDiffusionAnnulusMap_computeInverseDiagonalOperatorValues_P2ElementwiseDiffusionAnnulusMap_macro_2D.cpp index e1d257fc..59c2a883 100644 --- a/operators/diffusion/noarch/P2ElementwiseDiffusionAnnulusMap_computeInverseDiagonalOperatorValues_macro_2D.cpp +++ b/operators/diffusion/noarch/P2ElementwiseDiffusionAnnulusMap_computeInverseDiagonalOperatorValues_P2ElementwiseDiffusionAnnulusMap_macro_2D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseDiffusionAnnulusMap::computeInverseDiagonalOperatorValues_macro_2D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const +void P2ElementwiseDiffusionAnnulusMap::computeInverseDiagonalOperatorValues_P2ElementwiseDiffusionAnnulusMap_macro_2D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const { { const real_t tmp_coords_jac_0_GRAY = 1.0 / (micro_edges_per_macro_edge_float)*1.0; diff --git a/operators/diffusion/noarch/P2ElementwiseDiffusionAnnulusMap_toMatrix_macro_2D.cpp b/operators/diffusion/noarch/P2ElementwiseDiffusionAnnulusMap_toMatrix_P2ElementwiseDiffusionAnnulusMap_macro_2D.cpp similarity index 99% rename from operators/diffusion/noarch/P2ElementwiseDiffusionAnnulusMap_toMatrix_macro_2D.cpp rename to operators/diffusion/noarch/P2ElementwiseDiffusionAnnulusMap_toMatrix_P2ElementwiseDiffusionAnnulusMap_macro_2D.cpp index 82d4f6a6..982ff306 100644 --- a/operators/diffusion/noarch/P2ElementwiseDiffusionAnnulusMap_toMatrix_macro_2D.cpp +++ b/operators/diffusion/noarch/P2ElementwiseDiffusionAnnulusMap_toMatrix_P2ElementwiseDiffusionAnnulusMap_macro_2D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseDiffusionAnnulusMap::toMatrix_macro_2D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const +void P2ElementwiseDiffusionAnnulusMap::toMatrix_P2ElementwiseDiffusionAnnulusMap_macro_2D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const { { const real_t tmp_coords_jac_0_GRAY = 1.0 / (micro_edges_per_macro_edge_float)*1.0; diff --git a/operators/diffusion/noarch/P2ElementwiseDiffusionIcosahedralShellMap_apply_macro_3D.cpp b/operators/diffusion/noarch/P2ElementwiseDiffusionIcosahedralShellMap_apply_P2ElementwiseDiffusionIcosahedralShellMap_macro_3D.cpp similarity index 99% rename from operators/diffusion/noarch/P2ElementwiseDiffusionIcosahedralShellMap_apply_macro_3D.cpp rename to operators/diffusion/noarch/P2ElementwiseDiffusionIcosahedralShellMap_apply_P2ElementwiseDiffusionIcosahedralShellMap_macro_3D.cpp index 4645c9f6..6860a492 100644 --- a/operators/diffusion/noarch/P2ElementwiseDiffusionIcosahedralShellMap_apply_macro_3D.cpp +++ b/operators/diffusion/noarch/P2ElementwiseDiffusionIcosahedralShellMap_apply_P2ElementwiseDiffusionIcosahedralShellMap_macro_3D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseDiffusionIcosahedralShellMap::apply_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const +void P2ElementwiseDiffusionIcosahedralShellMap::apply_P2ElementwiseDiffusionIcosahedralShellMap_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const { { const real_t tmp_coords_jac_0_WHITE_UP = 1.0 / (micro_edges_per_macro_edge_float)*1.0; diff --git a/operators/diffusion/noarch/P2ElementwiseDiffusionIcosahedralShellMap_computeInverseDiagonalOperatorValues_macro_3D.cpp b/operators/diffusion/noarch/P2ElementwiseDiffusionIcosahedralShellMap_computeInverseDiagonalOperatorValues_P2ElementwiseDiffusionIcosahedralShellMap_macro_3D.cpp similarity index 99% rename from operators/diffusion/noarch/P2ElementwiseDiffusionIcosahedralShellMap_computeInverseDiagonalOperatorValues_macro_3D.cpp rename to operators/diffusion/noarch/P2ElementwiseDiffusionIcosahedralShellMap_computeInverseDiagonalOperatorValues_P2ElementwiseDiffusionIcosahedralShellMap_macro_3D.cpp index abd0fc64..d532a935 100644 --- a/operators/diffusion/noarch/P2ElementwiseDiffusionIcosahedralShellMap_computeInverseDiagonalOperatorValues_macro_3D.cpp +++ b/operators/diffusion/noarch/P2ElementwiseDiffusionIcosahedralShellMap_computeInverseDiagonalOperatorValues_P2ElementwiseDiffusionIcosahedralShellMap_macro_3D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseDiffusionIcosahedralShellMap::computeInverseDiagonalOperatorValues_macro_3D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const +void P2ElementwiseDiffusionIcosahedralShellMap::computeInverseDiagonalOperatorValues_P2ElementwiseDiffusionIcosahedralShellMap_macro_3D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const { { const real_t tmp_coords_jac_0_WHITE_UP = 1.0 / (micro_edges_per_macro_edge_float)*1.0; diff --git a/operators/diffusion/noarch/P2ElementwiseDiffusionIcosahedralShellMap_toMatrix_macro_3D.cpp b/operators/diffusion/noarch/P2ElementwiseDiffusionIcosahedralShellMap_toMatrix_P2ElementwiseDiffusionIcosahedralShellMap_macro_3D.cpp similarity index 99% rename from operators/diffusion/noarch/P2ElementwiseDiffusionIcosahedralShellMap_toMatrix_macro_3D.cpp rename to operators/diffusion/noarch/P2ElementwiseDiffusionIcosahedralShellMap_toMatrix_P2ElementwiseDiffusionIcosahedralShellMap_macro_3D.cpp index 2c546d5d..89f5f78a 100644 --- a/operators/diffusion/noarch/P2ElementwiseDiffusionIcosahedralShellMap_toMatrix_macro_3D.cpp +++ b/operators/diffusion/noarch/P2ElementwiseDiffusionIcosahedralShellMap_toMatrix_P2ElementwiseDiffusionIcosahedralShellMap_macro_3D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseDiffusionIcosahedralShellMap::toMatrix_macro_3D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const +void P2ElementwiseDiffusionIcosahedralShellMap::toMatrix_P2ElementwiseDiffusionIcosahedralShellMap_macro_3D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const { { const real_t tmp_coords_jac_0_WHITE_UP = 1.0 / (micro_edges_per_macro_edge_float)*1.0; diff --git a/operators/diffusion/noarch/P2ElementwiseDiffusion_apply_macro_2D.cpp b/operators/diffusion/noarch/P2ElementwiseDiffusion_apply_P2ElementwiseDiffusion_macro_2D.cpp similarity index 99% rename from operators/diffusion/noarch/P2ElementwiseDiffusion_apply_macro_2D.cpp rename to operators/diffusion/noarch/P2ElementwiseDiffusion_apply_P2ElementwiseDiffusion_macro_2D.cpp index 27134c90..b2ae9b4c 100644 --- a/operators/diffusion/noarch/P2ElementwiseDiffusion_apply_macro_2D.cpp +++ b/operators/diffusion/noarch/P2ElementwiseDiffusion_apply_P2ElementwiseDiffusion_macro_2D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseDiffusion::apply_macro_2D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseDiffusion::apply_P2ElementwiseDiffusion_macro_2D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t tmp_coords_jac_0_BLUE = 1.0 / (micro_edges_per_macro_edge_float)*1.0; diff --git a/operators/diffusion/noarch/P2ElementwiseDiffusion_apply_macro_3D.cpp b/operators/diffusion/noarch/P2ElementwiseDiffusion_apply_P2ElementwiseDiffusion_macro_3D.cpp similarity index 99% rename from operators/diffusion/noarch/P2ElementwiseDiffusion_apply_macro_3D.cpp rename to operators/diffusion/noarch/P2ElementwiseDiffusion_apply_P2ElementwiseDiffusion_macro_3D.cpp index 2d62014f..ea6ed321 100644 --- a/operators/diffusion/noarch/P2ElementwiseDiffusion_apply_macro_3D.cpp +++ b/operators/diffusion/noarch/P2ElementwiseDiffusion_apply_P2ElementwiseDiffusion_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseDiffusion::apply_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseDiffusion::apply_P2ElementwiseDiffusion_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t tmp_coords_jac_0_GREEN_DOWN = 1.0 / (micro_edges_per_macro_edge_float)*1.0; diff --git a/operators/diffusion/noarch/P2ElementwiseDiffusion_computeInverseDiagonalOperatorValues_macro_2D.cpp b/operators/diffusion/noarch/P2ElementwiseDiffusion_computeInverseDiagonalOperatorValues_P2ElementwiseDiffusion_macro_2D.cpp similarity index 98% rename from operators/diffusion/noarch/P2ElementwiseDiffusion_computeInverseDiagonalOperatorValues_macro_2D.cpp rename to operators/diffusion/noarch/P2ElementwiseDiffusion_computeInverseDiagonalOperatorValues_P2ElementwiseDiffusion_macro_2D.cpp index 5ee134f2..00858d29 100644 --- a/operators/diffusion/noarch/P2ElementwiseDiffusion_computeInverseDiagonalOperatorValues_macro_2D.cpp +++ b/operators/diffusion/noarch/P2ElementwiseDiffusion_computeInverseDiagonalOperatorValues_P2ElementwiseDiffusion_macro_2D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseDiffusion::computeInverseDiagonalOperatorValues_macro_2D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseDiffusion::computeInverseDiagonalOperatorValues_P2ElementwiseDiffusion_macro_2D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t tmp_coords_jac_0_BLUE = 1.0 / (micro_edges_per_macro_edge_float)*1.0; diff --git a/operators/diffusion/noarch/P2ElementwiseDiffusion_computeInverseDiagonalOperatorValues_macro_3D.cpp b/operators/diffusion/noarch/P2ElementwiseDiffusion_computeInverseDiagonalOperatorValues_P2ElementwiseDiffusion_macro_3D.cpp similarity index 99% rename from operators/diffusion/noarch/P2ElementwiseDiffusion_computeInverseDiagonalOperatorValues_macro_3D.cpp rename to operators/diffusion/noarch/P2ElementwiseDiffusion_computeInverseDiagonalOperatorValues_P2ElementwiseDiffusion_macro_3D.cpp index 9b0bd97e..195e8de9 100644 --- a/operators/diffusion/noarch/P2ElementwiseDiffusion_computeInverseDiagonalOperatorValues_macro_3D.cpp +++ b/operators/diffusion/noarch/P2ElementwiseDiffusion_computeInverseDiagonalOperatorValues_P2ElementwiseDiffusion_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseDiffusion::computeInverseDiagonalOperatorValues_macro_3D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseDiffusion::computeInverseDiagonalOperatorValues_P2ElementwiseDiffusion_macro_3D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t tmp_coords_jac_0_GREEN_DOWN = 1.0 / (micro_edges_per_macro_edge_float)*1.0; diff --git a/operators/diffusion/noarch/P2ElementwiseDiffusion_toMatrix_macro_2D.cpp b/operators/diffusion/noarch/P2ElementwiseDiffusion_toMatrix_P2ElementwiseDiffusion_macro_2D.cpp similarity index 99% rename from operators/diffusion/noarch/P2ElementwiseDiffusion_toMatrix_macro_2D.cpp rename to operators/diffusion/noarch/P2ElementwiseDiffusion_toMatrix_P2ElementwiseDiffusion_macro_2D.cpp index 23e440f5..81ca2f5b 100644 --- a/operators/diffusion/noarch/P2ElementwiseDiffusion_toMatrix_macro_2D.cpp +++ b/operators/diffusion/noarch/P2ElementwiseDiffusion_toMatrix_P2ElementwiseDiffusion_macro_2D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseDiffusion::toMatrix_macro_2D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseDiffusion::toMatrix_P2ElementwiseDiffusion_macro_2D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t tmp_coords_jac_0_BLUE = 1.0 / (micro_edges_per_macro_edge_float)*1.0; diff --git a/operators/diffusion/noarch/P2ElementwiseDiffusion_toMatrix_macro_3D.cpp b/operators/diffusion/noarch/P2ElementwiseDiffusion_toMatrix_P2ElementwiseDiffusion_macro_3D.cpp similarity index 99% rename from operators/diffusion/noarch/P2ElementwiseDiffusion_toMatrix_macro_3D.cpp rename to operators/diffusion/noarch/P2ElementwiseDiffusion_toMatrix_P2ElementwiseDiffusion_macro_3D.cpp index 0a27a28b..a687924a 100644 --- a/operators/diffusion/noarch/P2ElementwiseDiffusion_toMatrix_macro_3D.cpp +++ b/operators/diffusion/noarch/P2ElementwiseDiffusion_toMatrix_P2ElementwiseDiffusion_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseDiffusion::toMatrix_macro_3D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseDiffusion::toMatrix_P2ElementwiseDiffusion_macro_3D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t tmp_coords_jac_0_GREEN_DOWN = 1.0 / (micro_edges_per_macro_edge_float)*1.0; diff --git a/operators/div_k_grad/CMakeLists.txt b/operators/div_k_grad/CMakeLists.txt index bd44475b..a917f326 100644 --- a/operators/div_k_grad/CMakeLists.txt +++ b/operators/div_k_grad/CMakeLists.txt @@ -13,40 +13,40 @@ add_library( opgen-div_k_grad if(HYTEG_BUILD_WITH_AVX AND WALBERLA_DOUBLE_ACCURACY) target_sources(opgen-div_k_grad PRIVATE - avx/P1ElementwiseDivKGrad_apply_macro_2D.cpp - avx/P1ElementwiseDivKGrad_apply_macro_3D.cpp - avx/P1ElementwiseDivKGrad_computeInverseDiagonalOperatorValues_macro_2D.cpp - avx/P1ElementwiseDivKGrad_computeInverseDiagonalOperatorValues_macro_3D.cpp - avx/P2ElementwiseDivKGradAnnulusMap_apply_macro_2D.cpp - avx/P2ElementwiseDivKGradAnnulusMap_computeInverseDiagonalOperatorValues_macro_2D.cpp - avx/P2ElementwiseDivKGradIcosahedralShellMap_apply_macro_3D.cpp - avx/P2ElementwiseDivKGradIcosahedralShellMap_computeInverseDiagonalOperatorValues_macro_3D.cpp - avx/P2ElementwiseDivKGrad_apply_macro_2D.cpp - avx/P2ElementwiseDivKGrad_apply_macro_3D.cpp - avx/P2ElementwiseDivKGrad_computeInverseDiagonalOperatorValues_macro_2D.cpp - avx/P2ElementwiseDivKGrad_computeInverseDiagonalOperatorValues_macro_3D.cpp - noarch/P1ElementwiseDivKGrad_toMatrix_macro_2D.cpp - noarch/P1ElementwiseDivKGrad_toMatrix_macro_3D.cpp - noarch/P2ElementwiseDivKGradAnnulusMap_toMatrix_macro_2D.cpp - noarch/P2ElementwiseDivKGradIcosahedralShellMap_toMatrix_macro_3D.cpp - noarch/P2ElementwiseDivKGrad_toMatrix_macro_2D.cpp - noarch/P2ElementwiseDivKGrad_toMatrix_macro_3D.cpp + avx/P1ElementwiseDivKGrad_apply_P1ElementwiseDivKGrad_macro_2D.cpp + avx/P1ElementwiseDivKGrad_apply_P1ElementwiseDivKGrad_macro_3D.cpp + avx/P1ElementwiseDivKGrad_computeInverseDiagonalOperatorValues_P1ElementwiseDivKGrad_macro_2D.cpp + avx/P1ElementwiseDivKGrad_computeInverseDiagonalOperatorValues_P1ElementwiseDivKGrad_macro_3D.cpp + avx/P2ElementwiseDivKGradAnnulusMap_apply_P2ElementwiseDivKGradAnnulusMap_macro_2D.cpp + avx/P2ElementwiseDivKGradAnnulusMap_computeInverseDiagonalOperatorValues_P2ElementwiseDivKGradAnnulusMap_macro_2D.cpp + avx/P2ElementwiseDivKGradIcosahedralShellMap_apply_P2ElementwiseDivKGradIcosahedralShellMap_macro_3D.cpp + avx/P2ElementwiseDivKGradIcosahedralShellMap_computeInverseDiagonalOperatorValues_P2ElementwiseDivKGradIcosahedralShellMap_macro_3D.cpp + avx/P2ElementwiseDivKGrad_apply_P2ElementwiseDivKGrad_macro_2D.cpp + avx/P2ElementwiseDivKGrad_apply_P2ElementwiseDivKGrad_macro_3D.cpp + avx/P2ElementwiseDivKGrad_computeInverseDiagonalOperatorValues_P2ElementwiseDivKGrad_macro_2D.cpp + avx/P2ElementwiseDivKGrad_computeInverseDiagonalOperatorValues_P2ElementwiseDivKGrad_macro_3D.cpp + noarch/P1ElementwiseDivKGrad_toMatrix_P1ElementwiseDivKGrad_macro_2D.cpp + noarch/P1ElementwiseDivKGrad_toMatrix_P1ElementwiseDivKGrad_macro_3D.cpp + noarch/P2ElementwiseDivKGradAnnulusMap_toMatrix_P2ElementwiseDivKGradAnnulusMap_macro_2D.cpp + noarch/P2ElementwiseDivKGradIcosahedralShellMap_toMatrix_P2ElementwiseDivKGradIcosahedralShellMap_macro_3D.cpp + noarch/P2ElementwiseDivKGrad_toMatrix_P2ElementwiseDivKGrad_macro_2D.cpp + noarch/P2ElementwiseDivKGrad_toMatrix_P2ElementwiseDivKGrad_macro_3D.cpp ) set_source_files_properties( - avx/P1ElementwiseDivKGrad_apply_macro_2D.cpp - avx/P1ElementwiseDivKGrad_apply_macro_3D.cpp - avx/P1ElementwiseDivKGrad_computeInverseDiagonalOperatorValues_macro_2D.cpp - avx/P1ElementwiseDivKGrad_computeInverseDiagonalOperatorValues_macro_3D.cpp - avx/P2ElementwiseDivKGradAnnulusMap_apply_macro_2D.cpp - avx/P2ElementwiseDivKGradAnnulusMap_computeInverseDiagonalOperatorValues_macro_2D.cpp - avx/P2ElementwiseDivKGradIcosahedralShellMap_apply_macro_3D.cpp - avx/P2ElementwiseDivKGradIcosahedralShellMap_computeInverseDiagonalOperatorValues_macro_3D.cpp - avx/P2ElementwiseDivKGrad_apply_macro_2D.cpp - avx/P2ElementwiseDivKGrad_apply_macro_3D.cpp - avx/P2ElementwiseDivKGrad_computeInverseDiagonalOperatorValues_macro_2D.cpp - avx/P2ElementwiseDivKGrad_computeInverseDiagonalOperatorValues_macro_3D.cpp + avx/P1ElementwiseDivKGrad_apply_P1ElementwiseDivKGrad_macro_2D.cpp + avx/P1ElementwiseDivKGrad_apply_P1ElementwiseDivKGrad_macro_3D.cpp + avx/P1ElementwiseDivKGrad_computeInverseDiagonalOperatorValues_P1ElementwiseDivKGrad_macro_2D.cpp + avx/P1ElementwiseDivKGrad_computeInverseDiagonalOperatorValues_P1ElementwiseDivKGrad_macro_3D.cpp + avx/P2ElementwiseDivKGradAnnulusMap_apply_P2ElementwiseDivKGradAnnulusMap_macro_2D.cpp + avx/P2ElementwiseDivKGradAnnulusMap_computeInverseDiagonalOperatorValues_P2ElementwiseDivKGradAnnulusMap_macro_2D.cpp + avx/P2ElementwiseDivKGradIcosahedralShellMap_apply_P2ElementwiseDivKGradIcosahedralShellMap_macro_3D.cpp + avx/P2ElementwiseDivKGradIcosahedralShellMap_computeInverseDiagonalOperatorValues_P2ElementwiseDivKGradIcosahedralShellMap_macro_3D.cpp + avx/P2ElementwiseDivKGrad_apply_P2ElementwiseDivKGrad_macro_2D.cpp + avx/P2ElementwiseDivKGrad_apply_P2ElementwiseDivKGrad_macro_3D.cpp + avx/P2ElementwiseDivKGrad_computeInverseDiagonalOperatorValues_P2ElementwiseDivKGrad_macro_2D.cpp + avx/P2ElementwiseDivKGrad_computeInverseDiagonalOperatorValues_P2ElementwiseDivKGrad_macro_3D.cpp PROPERTIES COMPILE_OPTIONS ${HYTEG_COMPILER_NATIVE_FLAGS} ) @@ -57,24 +57,24 @@ else() target_sources(opgen-div_k_grad PRIVATE - noarch/P1ElementwiseDivKGrad_apply_macro_2D.cpp - noarch/P1ElementwiseDivKGrad_apply_macro_3D.cpp - noarch/P1ElementwiseDivKGrad_computeInverseDiagonalOperatorValues_macro_2D.cpp - noarch/P1ElementwiseDivKGrad_computeInverseDiagonalOperatorValues_macro_3D.cpp - noarch/P1ElementwiseDivKGrad_toMatrix_macro_2D.cpp - noarch/P1ElementwiseDivKGrad_toMatrix_macro_3D.cpp - noarch/P2ElementwiseDivKGradAnnulusMap_apply_macro_2D.cpp - noarch/P2ElementwiseDivKGradAnnulusMap_computeInverseDiagonalOperatorValues_macro_2D.cpp - noarch/P2ElementwiseDivKGradAnnulusMap_toMatrix_macro_2D.cpp - noarch/P2ElementwiseDivKGradIcosahedralShellMap_apply_macro_3D.cpp - noarch/P2ElementwiseDivKGradIcosahedralShellMap_computeInverseDiagonalOperatorValues_macro_3D.cpp - noarch/P2ElementwiseDivKGradIcosahedralShellMap_toMatrix_macro_3D.cpp - noarch/P2ElementwiseDivKGrad_apply_macro_2D.cpp - noarch/P2ElementwiseDivKGrad_apply_macro_3D.cpp - noarch/P2ElementwiseDivKGrad_computeInverseDiagonalOperatorValues_macro_2D.cpp - noarch/P2ElementwiseDivKGrad_computeInverseDiagonalOperatorValues_macro_3D.cpp - noarch/P2ElementwiseDivKGrad_toMatrix_macro_2D.cpp - noarch/P2ElementwiseDivKGrad_toMatrix_macro_3D.cpp + noarch/P1ElementwiseDivKGrad_apply_P1ElementwiseDivKGrad_macro_2D.cpp + noarch/P1ElementwiseDivKGrad_apply_P1ElementwiseDivKGrad_macro_3D.cpp + noarch/P1ElementwiseDivKGrad_computeInverseDiagonalOperatorValues_P1ElementwiseDivKGrad_macro_2D.cpp + noarch/P1ElementwiseDivKGrad_computeInverseDiagonalOperatorValues_P1ElementwiseDivKGrad_macro_3D.cpp + noarch/P1ElementwiseDivKGrad_toMatrix_P1ElementwiseDivKGrad_macro_2D.cpp + noarch/P1ElementwiseDivKGrad_toMatrix_P1ElementwiseDivKGrad_macro_3D.cpp + noarch/P2ElementwiseDivKGradAnnulusMap_apply_P2ElementwiseDivKGradAnnulusMap_macro_2D.cpp + noarch/P2ElementwiseDivKGradAnnulusMap_computeInverseDiagonalOperatorValues_P2ElementwiseDivKGradAnnulusMap_macro_2D.cpp + noarch/P2ElementwiseDivKGradAnnulusMap_toMatrix_P2ElementwiseDivKGradAnnulusMap_macro_2D.cpp + noarch/P2ElementwiseDivKGradIcosahedralShellMap_apply_P2ElementwiseDivKGradIcosahedralShellMap_macro_3D.cpp + noarch/P2ElementwiseDivKGradIcosahedralShellMap_computeInverseDiagonalOperatorValues_P2ElementwiseDivKGradIcosahedralShellMap_macro_3D.cpp + noarch/P2ElementwiseDivKGradIcosahedralShellMap_toMatrix_P2ElementwiseDivKGradIcosahedralShellMap_macro_3D.cpp + noarch/P2ElementwiseDivKGrad_apply_P2ElementwiseDivKGrad_macro_2D.cpp + noarch/P2ElementwiseDivKGrad_apply_P2ElementwiseDivKGrad_macro_3D.cpp + noarch/P2ElementwiseDivKGrad_computeInverseDiagonalOperatorValues_P2ElementwiseDivKGrad_macro_2D.cpp + noarch/P2ElementwiseDivKGrad_computeInverseDiagonalOperatorValues_P2ElementwiseDivKGrad_macro_3D.cpp + noarch/P2ElementwiseDivKGrad_toMatrix_P2ElementwiseDivKGrad_macro_2D.cpp + noarch/P2ElementwiseDivKGrad_toMatrix_P2ElementwiseDivKGrad_macro_3D.cpp ) endif() diff --git a/operators/div_k_grad/P1ElementwiseDivKGrad.cpp b/operators/div_k_grad/P1ElementwiseDivKGrad.cpp index ee98f134..a5dd1f51 100644 --- a/operators/div_k_grad/P1ElementwiseDivKGrad.cpp +++ b/operators/div_k_grad/P1ElementwiseDivKGrad.cpp @@ -133,7 +133,7 @@ void P1ElementwiseDivKGrad::apply( const P1Function< real_t >& src, this->timingTree_->start( "kernel" ); - apply_macro_3D( + apply_P1ElementwiseDivKGrad_macro_3D( _data_dst, _data_k, @@ -152,6 +152,7 @@ void P1ElementwiseDivKGrad::apply( const P1Function< real_t >& src, macro_vertex_coord_id_3comp2, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } @@ -200,7 +201,7 @@ void P1ElementwiseDivKGrad::apply( const P1Function< real_t >& src, this->timingTree_->start( "kernel" ); - apply_macro_2D( + apply_P1ElementwiseDivKGrad_macro_2D( _data_dst, _data_k, @@ -213,6 +214,7 @@ void P1ElementwiseDivKGrad::apply( const P1Function< real_t >& src, macro_vertex_coord_id_2comp1, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } @@ -276,7 +278,7 @@ void P1ElementwiseDivKGrad::toMatrix( const std::shared_ptr< SparseMatrixProxy > this->timingTree_->start( "kernel" ); - toMatrix_macro_3D( + toMatrix_P1ElementwiseDivKGrad_macro_3D( _data_dst, _data_k, @@ -296,6 +298,7 @@ void P1ElementwiseDivKGrad::toMatrix( const std::shared_ptr< SparseMatrixProxy > mat, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } } @@ -325,7 +328,7 @@ void P1ElementwiseDivKGrad::toMatrix( const std::shared_ptr< SparseMatrixProxy > this->timingTree_->start( "kernel" ); - toMatrix_macro_2D( + toMatrix_P1ElementwiseDivKGrad_macro_2D( _data_dst, _data_k, @@ -339,6 +342,7 @@ void P1ElementwiseDivKGrad::toMatrix( const std::shared_ptr< SparseMatrixProxy > mat, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } } @@ -390,7 +394,7 @@ void P1ElementwiseDivKGrad::computeInverseDiagonalOperatorValues() this->timingTree_->start( "kernel" ); - computeInverseDiagonalOperatorValues_macro_3D( + computeInverseDiagonalOperatorValues_P1ElementwiseDivKGrad_macro_3D( _data_invDiag_, _data_k, @@ -408,6 +412,7 @@ void P1ElementwiseDivKGrad::computeInverseDiagonalOperatorValues() macro_vertex_coord_id_3comp2, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } @@ -447,7 +452,7 @@ void P1ElementwiseDivKGrad::computeInverseDiagonalOperatorValues() this->timingTree_->start( "kernel" ); - computeInverseDiagonalOperatorValues_macro_2D( + computeInverseDiagonalOperatorValues_P1ElementwiseDivKGrad_macro_2D( _data_invDiag_, _data_k, @@ -459,6 +464,7 @@ void P1ElementwiseDivKGrad::computeInverseDiagonalOperatorValues() macro_vertex_coord_id_2comp1, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } diff --git a/operators/div_k_grad/P1ElementwiseDivKGrad.hpp b/operators/div_k_grad/P1ElementwiseDivKGrad.hpp index f1c5b366..3862da35 100644 --- a/operators/div_k_grad/P1ElementwiseDivKGrad.hpp +++ b/operators/div_k_grad/P1ElementwiseDivKGrad.hpp @@ -29,6 +29,7 @@ #include "core/DataTypes.h" #include "hyteg/LikwidWrapper.hpp" +#include "hyteg/boundary/BoundaryConditions.hpp" #include "hyteg/communication/Syncing.hpp" #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp" #include "hyteg/operators/Operator.hpp" @@ -36,6 +37,7 @@ #include "hyteg/primitivestorage/PrimitiveStorage.hpp" #include "hyteg/solvers/Smoothables.hpp" #include "hyteg/sparseassembly/SparseMatrixProxy.hpp" +#include "hyteg/types/types.hpp" #define FUNC_PREFIX @@ -82,126 +84,155 @@ class P1ElementwiseDivKGrad : public Operator< P1Function< real_t >, P1Function< protected: private: - /// Kernel type: apply + /// Integral: P1ElementwiseDivKGrad + /// - volume element: triangle, dim: 2, vertices: 3, spacedim: 2 + /// - kernel type: apply + /// - loop strategy: SAWTOOTH /// - quadrature rule: Dunavant 2 | points: 3, degree: 2 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 95 102 12 0 0 0 0 1 - void apply_macro_2D( real_t* RESTRICT _data_dst, - real_t* RESTRICT _data_k, - real_t* RESTRICT _data_src, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; - /// Kernel type: apply + void apply_P1ElementwiseDivKGrad_macro_2D( real_t* RESTRICT _data_dst, + real_t* RESTRICT _data_k, + real_t* RESTRICT _data_src, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; + + /// Integral: P1ElementwiseDivKGrad + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: apply + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 1 | points: 4, degree: 2 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 277 272 36 0 0 0 0 1 - void apply_macro_3D( real_t* RESTRICT _data_dst, - real_t* RESTRICT _data_k, - real_t* RESTRICT _data_src, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; - /// Kernel type: toMatrix + void apply_P1ElementwiseDivKGrad_macro_3D( real_t* RESTRICT _data_dst, + real_t* RESTRICT _data_k, + real_t* RESTRICT _data_src, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; + + /// Integral: P1ElementwiseDivKGrad + /// - volume element: triangle, dim: 2, vertices: 3, spacedim: 2 + /// - kernel type: toMatrix + /// - loop strategy: SAWTOOTH /// - quadrature rule: Dunavant 2 | points: 3, degree: 2 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 86 93 12 0 0 0 0 4 - void toMatrix_macro_2D( idx_t* RESTRICT _data_dst, - real_t* RESTRICT _data_k, - idx_t* RESTRICT _data_src, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - std::shared_ptr< SparseMatrixProxy > mat, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; - /// Kernel type: toMatrix + void toMatrix_P1ElementwiseDivKGrad_macro_2D( idx_t* RESTRICT _data_dst, + real_t* RESTRICT _data_k, + idx_t* RESTRICT _data_src, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + std::shared_ptr< SparseMatrixProxy > mat, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; + + /// Integral: P1ElementwiseDivKGrad + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: toMatrix + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 1 | points: 4, degree: 2 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 261 256 36 0 0 0 0 4 - void toMatrix_macro_3D( idx_t* RESTRICT _data_dst, - real_t* RESTRICT _data_k, - idx_t* RESTRICT _data_src, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - std::shared_ptr< SparseMatrixProxy > mat, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; - /// Kernel type: computeInverseDiagonalOperatorValues + void toMatrix_P1ElementwiseDivKGrad_macro_3D( idx_t* RESTRICT _data_dst, + real_t* RESTRICT _data_k, + idx_t* RESTRICT _data_src, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + std::shared_ptr< SparseMatrixProxy > mat, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; + + /// Integral: P1ElementwiseDivKGrad + /// - volume element: triangle, dim: 2, vertices: 3, spacedim: 2 + /// - kernel type: computeInverseDiagonalOperatorValues + /// - loop strategy: SAWTOOTH /// - quadrature rule: Dunavant 2 | points: 3, degree: 2 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 71 66 12 0 0 0 0 1 - void computeInverseDiagonalOperatorValues_macro_2D( real_t* RESTRICT _data_invDiag_, - real_t* RESTRICT _data_k, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; - /// Kernel type: computeInverseDiagonalOperatorValues + void computeInverseDiagonalOperatorValues_P1ElementwiseDivKGrad_macro_2D( real_t* RESTRICT _data_invDiag_, + real_t* RESTRICT _data_k, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; + + /// Integral: P1ElementwiseDivKGrad + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: computeInverseDiagonalOperatorValues + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 1 | points: 4, degree: 2 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 193 160 36 0 0 0 0 1 - void computeInverseDiagonalOperatorValues_macro_3D( real_t* RESTRICT _data_invDiag_, - real_t* RESTRICT _data_k, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; + void computeInverseDiagonalOperatorValues_P1ElementwiseDivKGrad_macro_3D( real_t* RESTRICT _data_invDiag_, + real_t* RESTRICT _data_k, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; std::shared_ptr< P1Function< real_t > > invDiag_; P1Function< real_t > k; diff --git a/operators/div_k_grad/P2ElementwiseDivKGrad.cpp b/operators/div_k_grad/P2ElementwiseDivKGrad.cpp index 76309616..9474445b 100644 --- a/operators/div_k_grad/P2ElementwiseDivKGrad.cpp +++ b/operators/div_k_grad/P2ElementwiseDivKGrad.cpp @@ -137,7 +137,7 @@ void P2ElementwiseDivKGrad::apply( const P2Function< real_t >& src, this->timingTree_->start( "kernel" ); - apply_macro_3D( + apply_P2ElementwiseDivKGrad_macro_3D( _data_dstEdge, _data_dstVertex, @@ -159,6 +159,7 @@ void P2ElementwiseDivKGrad::apply( const P2Function< real_t >& src, macro_vertex_coord_id_3comp2, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } @@ -228,7 +229,7 @@ void P2ElementwiseDivKGrad::apply( const P2Function< real_t >& src, this->timingTree_->start( "kernel" ); - apply_macro_2D( + apply_P2ElementwiseDivKGrad_macro_2D( _data_dstEdge, _data_dstVertex, @@ -244,6 +245,7 @@ void P2ElementwiseDivKGrad::apply( const P2Function< real_t >& src, macro_vertex_coord_id_2comp1, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } @@ -314,7 +316,7 @@ void P2ElementwiseDivKGrad::toMatrix( const std::shared_ptr< SparseMatrixProxy > this->timingTree_->start( "kernel" ); - toMatrix_macro_3D( + toMatrix_P2ElementwiseDivKGrad_macro_3D( _data_dstEdge, _data_dstVertex, @@ -337,6 +339,7 @@ void P2ElementwiseDivKGrad::toMatrix( const std::shared_ptr< SparseMatrixProxy > mat, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } } @@ -369,7 +372,7 @@ void P2ElementwiseDivKGrad::toMatrix( const std::shared_ptr< SparseMatrixProxy > this->timingTree_->start( "kernel" ); - toMatrix_macro_2D( + toMatrix_P2ElementwiseDivKGrad_macro_2D( _data_dstEdge, _data_dstVertex, @@ -386,6 +389,7 @@ void P2ElementwiseDivKGrad::toMatrix( const std::shared_ptr< SparseMatrixProxy > mat, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } } @@ -440,7 +444,7 @@ void P2ElementwiseDivKGrad::computeInverseDiagonalOperatorValues() this->timingTree_->start( "kernel" ); - computeInverseDiagonalOperatorValues_macro_3D( + computeInverseDiagonalOperatorValues_P2ElementwiseDivKGrad_macro_3D( _data_invDiag_Edge, _data_invDiag_Vertex, @@ -460,6 +464,7 @@ void P2ElementwiseDivKGrad::computeInverseDiagonalOperatorValues() macro_vertex_coord_id_3comp2, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } @@ -504,7 +509,7 @@ void P2ElementwiseDivKGrad::computeInverseDiagonalOperatorValues() this->timingTree_->start( "kernel" ); - computeInverseDiagonalOperatorValues_macro_2D( + computeInverseDiagonalOperatorValues_P2ElementwiseDivKGrad_macro_2D( _data_invDiag_Edge, _data_invDiag_Vertex, @@ -518,6 +523,7 @@ void P2ElementwiseDivKGrad::computeInverseDiagonalOperatorValues() macro_vertex_coord_id_2comp1, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } diff --git a/operators/div_k_grad/P2ElementwiseDivKGrad.hpp b/operators/div_k_grad/P2ElementwiseDivKGrad.hpp index 8029975a..4dc8becf 100644 --- a/operators/div_k_grad/P2ElementwiseDivKGrad.hpp +++ b/operators/div_k_grad/P2ElementwiseDivKGrad.hpp @@ -29,6 +29,7 @@ #include "core/DataTypes.h" #include "hyteg/LikwidWrapper.hpp" +#include "hyteg/boundary/BoundaryConditions.hpp" #include "hyteg/communication/Syncing.hpp" #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp" #include "hyteg/operators/Operator.hpp" @@ -36,6 +37,7 @@ #include "hyteg/primitivestorage/PrimitiveStorage.hpp" #include "hyteg/solvers/Smoothables.hpp" #include "hyteg/sparseassembly/SparseMatrixProxy.hpp" +#include "hyteg/types/types.hpp" #define FUNC_PREFIX @@ -82,142 +84,171 @@ class P2ElementwiseDivKGrad : public Operator< P2Function< real_t >, P2Function< protected: private: - /// Kernel type: apply + /// Integral: P2ElementwiseDivKGrad + /// - volume element: triangle, dim: 2, vertices: 3, spacedim: 2 + /// - kernel type: apply + /// - loop strategy: SAWTOOTH /// - quadrature rule: Dunavant 2 | points: 3, degree: 2 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 290 378 12 0 0 0 0 1 - void apply_macro_2D( real_t* RESTRICT _data_dstEdge, - real_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_kEdge, - real_t* RESTRICT _data_kVertex, - real_t* RESTRICT _data_srcEdge, - real_t* RESTRICT _data_srcVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; - /// Kernel type: apply + void apply_P2ElementwiseDivKGrad_macro_2D( real_t* RESTRICT _data_dstEdge, + real_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_kEdge, + real_t* RESTRICT _data_kVertex, + real_t* RESTRICT _data_srcEdge, + real_t* RESTRICT _data_srcVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; + + /// Integral: P2ElementwiseDivKGrad + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: apply + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 1 | points: 4, degree: 2 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 1273 1640 36 0 0 0 0 1 - void apply_macro_3D( real_t* RESTRICT _data_dstEdge, - real_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_kEdge, - real_t* RESTRICT _data_kVertex, - real_t* RESTRICT _data_srcEdge, - real_t* RESTRICT _data_srcVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; - /// Kernel type: toMatrix + void apply_P2ElementwiseDivKGrad_macro_3D( real_t* RESTRICT _data_dstEdge, + real_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_kEdge, + real_t* RESTRICT _data_kVertex, + real_t* RESTRICT _data_srcEdge, + real_t* RESTRICT _data_srcVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; + + /// Integral: P2ElementwiseDivKGrad + /// - volume element: triangle, dim: 2, vertices: 3, spacedim: 2 + /// - kernel type: toMatrix + /// - loop strategy: SAWTOOTH /// - quadrature rule: Dunavant 2 | points: 3, degree: 2 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 254 342 12 0 0 0 0 4 - void toMatrix_macro_2D( idx_t* RESTRICT _data_dstEdge, - idx_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_kEdge, - real_t* RESTRICT _data_kVertex, - idx_t* RESTRICT _data_srcEdge, - idx_t* RESTRICT _data_srcVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - std::shared_ptr< SparseMatrixProxy > mat, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; - /// Kernel type: toMatrix + void toMatrix_P2ElementwiseDivKGrad_macro_2D( idx_t* RESTRICT _data_dstEdge, + idx_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_kEdge, + real_t* RESTRICT _data_kVertex, + idx_t* RESTRICT _data_srcEdge, + idx_t* RESTRICT _data_srcVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + std::shared_ptr< SparseMatrixProxy > mat, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; + + /// Integral: P2ElementwiseDivKGrad + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: toMatrix + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 1 | points: 4, degree: 2 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 1173 1540 36 0 0 0 0 4 - void toMatrix_macro_3D( idx_t* RESTRICT _data_dstEdge, - idx_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_kEdge, - real_t* RESTRICT _data_kVertex, - idx_t* RESTRICT _data_srcEdge, - idx_t* RESTRICT _data_srcVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - std::shared_ptr< SparseMatrixProxy > mat, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; - /// Kernel type: computeInverseDiagonalOperatorValues + void toMatrix_P2ElementwiseDivKGrad_macro_3D( idx_t* RESTRICT _data_dstEdge, + idx_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_kEdge, + real_t* RESTRICT _data_kVertex, + idx_t* RESTRICT _data_srcEdge, + idx_t* RESTRICT _data_srcVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + std::shared_ptr< SparseMatrixProxy > mat, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; + + /// Integral: P2ElementwiseDivKGrad + /// - volume element: triangle, dim: 2, vertices: 3, spacedim: 2 + /// - kernel type: computeInverseDiagonalOperatorValues + /// - loop strategy: SAWTOOTH /// - quadrature rule: Dunavant 2 | points: 3, degree: 2 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 170 195 12 0 0 0 0 1 - void computeInverseDiagonalOperatorValues_macro_2D( real_t* RESTRICT _data_invDiag_Edge, - real_t* RESTRICT _data_invDiag_Vertex, - real_t* RESTRICT _data_kEdge, - real_t* RESTRICT _data_kVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; - /// Kernel type: computeInverseDiagonalOperatorValues + void computeInverseDiagonalOperatorValues_P2ElementwiseDivKGrad_macro_2D( real_t* RESTRICT _data_invDiag_Edge, + real_t* RESTRICT _data_invDiag_Vertex, + real_t* RESTRICT _data_kEdge, + real_t* RESTRICT _data_kVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; + + /// Integral: P2ElementwiseDivKGrad + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: computeInverseDiagonalOperatorValues + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 1 | points: 4, degree: 2 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 523 676 36 0 0 0 0 1 - void computeInverseDiagonalOperatorValues_macro_3D( real_t* RESTRICT _data_invDiag_Edge, - real_t* RESTRICT _data_invDiag_Vertex, - real_t* RESTRICT _data_kEdge, - real_t* RESTRICT _data_kVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; + void computeInverseDiagonalOperatorValues_P2ElementwiseDivKGrad_macro_3D( real_t* RESTRICT _data_invDiag_Edge, + real_t* RESTRICT _data_invDiag_Vertex, + real_t* RESTRICT _data_kEdge, + real_t* RESTRICT _data_kVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; std::shared_ptr< P2Function< real_t > > invDiag_; P2Function< real_t > k; diff --git a/operators/div_k_grad/P2ElementwiseDivKGradAnnulusMap.cpp b/operators/div_k_grad/P2ElementwiseDivKGradAnnulusMap.cpp index 65d43fda..6804e03d 100644 --- a/operators/div_k_grad/P2ElementwiseDivKGradAnnulusMap.cpp +++ b/operators/div_k_grad/P2ElementwiseDivKGradAnnulusMap.cpp @@ -149,7 +149,7 @@ void P2ElementwiseDivKGradAnnulusMap::apply( const P2Function< real_t >& src, this->timingTree_->start( "kernel" ); - apply_macro_2D( + apply_P2ElementwiseDivKGradAnnulusMap_macro_2D( _data_dstEdge, _data_dstVertex, @@ -173,6 +173,7 @@ void P2ElementwiseDivKGradAnnulusMap::apply( const P2Function< real_t >& src, refVertex_1, thrVertex_0, thrVertex_1 ); + this->timingTree_->stop( "kernel" ); } @@ -256,7 +257,7 @@ void P2ElementwiseDivKGradAnnulusMap::toMatrix( const std::shared_ptr< SparseMat this->timingTree_->start( "kernel" ); - toMatrix_macro_2D( + toMatrix_P2ElementwiseDivKGradAnnulusMap_macro_2D( _data_dstEdge, _data_dstVertex, @@ -281,6 +282,7 @@ void P2ElementwiseDivKGradAnnulusMap::toMatrix( const std::shared_ptr< SparseMat refVertex_1, thrVertex_0, thrVertex_1 ); + this->timingTree_->stop( "kernel" ); } } @@ -349,7 +351,7 @@ void P2ElementwiseDivKGradAnnulusMap::computeInverseDiagonalOperatorValues() this->timingTree_->start( "kernel" ); - computeInverseDiagonalOperatorValues_macro_2D( + computeInverseDiagonalOperatorValues_P2ElementwiseDivKGradAnnulusMap_macro_2D( _data_invDiag_Edge, _data_invDiag_Vertex, @@ -371,6 +373,7 @@ void P2ElementwiseDivKGradAnnulusMap::computeInverseDiagonalOperatorValues() refVertex_1, thrVertex_0, thrVertex_1 ); + this->timingTree_->stop( "kernel" ); } diff --git a/operators/div_k_grad/P2ElementwiseDivKGradAnnulusMap.hpp b/operators/div_k_grad/P2ElementwiseDivKGradAnnulusMap.hpp index fed80068..511b0056 100644 --- a/operators/div_k_grad/P2ElementwiseDivKGradAnnulusMap.hpp +++ b/operators/div_k_grad/P2ElementwiseDivKGradAnnulusMap.hpp @@ -29,6 +29,7 @@ #include "core/DataTypes.h" #include "hyteg/LikwidWrapper.hpp" +#include "hyteg/boundary/BoundaryConditions.hpp" #include "hyteg/communication/Syncing.hpp" #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp" #include "hyteg/geometry/AnnulusMap.hpp" @@ -37,6 +38,7 @@ #include "hyteg/primitivestorage/PrimitiveStorage.hpp" #include "hyteg/solvers/Smoothables.hpp" #include "hyteg/sparseassembly/SparseMatrixProxy.hpp" +#include "hyteg/types/types.hpp" #define FUNC_PREFIX @@ -83,89 +85,103 @@ class P2ElementwiseDivKGradAnnulusMap : public Operator< P2Function< real_t >, P protected: private: - /// Kernel type: apply + /// Integral: P2ElementwiseDivKGradAnnulusMap + /// - volume element: triangle, dim: 2, vertices: 3, spacedim: 2 + /// - kernel type: apply + /// - loop strategy: SAWTOOTH /// - quadrature rule: Dunavant 3 | points: 4, degree: 3 + /// - blending map: AnnulusMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 668 1044 28 20 4 0 0 1 - void apply_macro_2D( real_t* RESTRICT _data_dstEdge, - real_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_kEdge, - real_t* RESTRICT _data_kVertex, - real_t* RESTRICT _data_srcEdge, - real_t* RESTRICT _data_srcVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t refVertex_0, - real_t refVertex_1, - real_t thrVertex_0, - real_t thrVertex_1 ) const; - /// Kernel type: toMatrix + void apply_P2ElementwiseDivKGradAnnulusMap_macro_2D( real_t* RESTRICT _data_dstEdge, + real_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_kEdge, + real_t* RESTRICT _data_kVertex, + real_t* RESTRICT _data_srcEdge, + real_t* RESTRICT _data_srcVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t refVertex_0, + real_t refVertex_1, + real_t thrVertex_0, + real_t thrVertex_1 ) const; + + /// Integral: P2ElementwiseDivKGradAnnulusMap + /// - volume element: triangle, dim: 2, vertices: 3, spacedim: 2 + /// - kernel type: toMatrix + /// - loop strategy: SAWTOOTH /// - quadrature rule: Dunavant 3 | points: 4, degree: 3 + /// - blending map: AnnulusMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 632 1008 28 20 4 0 0 4 - void toMatrix_macro_2D( idx_t* RESTRICT _data_dstEdge, - idx_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_kEdge, - real_t* RESTRICT _data_kVertex, - idx_t* RESTRICT _data_srcEdge, - idx_t* RESTRICT _data_srcVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - std::shared_ptr< SparseMatrixProxy > mat, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t refVertex_0, - real_t refVertex_1, - real_t thrVertex_0, - real_t thrVertex_1 ) const; - /// Kernel type: computeInverseDiagonalOperatorValues + void toMatrix_P2ElementwiseDivKGradAnnulusMap_macro_2D( idx_t* RESTRICT _data_dstEdge, + idx_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_kEdge, + real_t* RESTRICT _data_kVertex, + idx_t* RESTRICT _data_srcEdge, + idx_t* RESTRICT _data_srcVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + std::shared_ptr< SparseMatrixProxy > mat, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t refVertex_0, + real_t refVertex_1, + real_t thrVertex_0, + real_t thrVertex_1 ) const; + + /// Integral: P2ElementwiseDivKGradAnnulusMap + /// - volume element: triangle, dim: 2, vertices: 3, spacedim: 2 + /// - kernel type: computeInverseDiagonalOperatorValues + /// - loop strategy: SAWTOOTH /// - quadrature rule: Dunavant 3 | points: 4, degree: 3 + /// - blending map: AnnulusMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 518 828 28 20 4 0 0 1 - void computeInverseDiagonalOperatorValues_macro_2D( real_t* RESTRICT _data_invDiag_Edge, - real_t* RESTRICT _data_invDiag_Vertex, - real_t* RESTRICT _data_kEdge, - real_t* RESTRICT _data_kVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t refVertex_0, - real_t refVertex_1, - real_t thrVertex_0, - real_t thrVertex_1 ) const; + void computeInverseDiagonalOperatorValues_P2ElementwiseDivKGradAnnulusMap_macro_2D( real_t* RESTRICT _data_invDiag_Edge, + real_t* RESTRICT _data_invDiag_Vertex, + real_t* RESTRICT _data_kEdge, + real_t* RESTRICT _data_kVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t refVertex_0, + real_t refVertex_1, + real_t thrVertex_0, + real_t thrVertex_1 ) const; std::shared_ptr< P2Function< real_t > > invDiag_; P2Function< real_t > k; diff --git a/operators/div_k_grad/P2ElementwiseDivKGradIcosahedralShellMap.cpp b/operators/div_k_grad/P2ElementwiseDivKGradIcosahedralShellMap.cpp index 07cd64ae..aae2d630 100644 --- a/operators/div_k_grad/P2ElementwiseDivKGradIcosahedralShellMap.cpp +++ b/operators/div_k_grad/P2ElementwiseDivKGradIcosahedralShellMap.cpp @@ -154,7 +154,7 @@ void P2ElementwiseDivKGradIcosahedralShellMap::apply( const P2Function< real_t > this->timingTree_->start( "kernel" ); - apply_macro_3D( + apply_P2ElementwiseDivKGradIcosahedralShellMap_macro_3D( _data_dstEdge, _data_dstVertex, @@ -190,6 +190,7 @@ void P2ElementwiseDivKGradIcosahedralShellMap::apply( const P2Function< real_t > thrVertex_0, thrVertex_1, thrVertex_2 ); + this->timingTree_->stop( "kernel" ); } @@ -285,7 +286,7 @@ void P2ElementwiseDivKGradIcosahedralShellMap::toMatrix( const std::shared_ptr< this->timingTree_->start( "kernel" ); - toMatrix_macro_3D( + toMatrix_P2ElementwiseDivKGradIcosahedralShellMap_macro_3D( _data_dstEdge, _data_dstVertex, @@ -322,6 +323,7 @@ void P2ElementwiseDivKGradIcosahedralShellMap::toMatrix( const std::shared_ptr< thrVertex_0, thrVertex_1, thrVertex_2 ); + this->timingTree_->stop( "kernel" ); } } @@ -401,7 +403,7 @@ void P2ElementwiseDivKGradIcosahedralShellMap::computeInverseDiagonalOperatorVal this->timingTree_->start( "kernel" ); - computeInverseDiagonalOperatorValues_macro_3D( + computeInverseDiagonalOperatorValues_P2ElementwiseDivKGradIcosahedralShellMap_macro_3D( _data_invDiag_Edge, _data_invDiag_Vertex, @@ -435,6 +437,7 @@ void P2ElementwiseDivKGradIcosahedralShellMap::computeInverseDiagonalOperatorVal thrVertex_0, thrVertex_1, thrVertex_2 ); + this->timingTree_->stop( "kernel" ); } diff --git a/operators/div_k_grad/P2ElementwiseDivKGradIcosahedralShellMap.hpp b/operators/div_k_grad/P2ElementwiseDivKGradIcosahedralShellMap.hpp index ae3ca25c..81e94998 100644 --- a/operators/div_k_grad/P2ElementwiseDivKGradIcosahedralShellMap.hpp +++ b/operators/div_k_grad/P2ElementwiseDivKGradIcosahedralShellMap.hpp @@ -29,6 +29,7 @@ #include "core/DataTypes.h" #include "hyteg/LikwidWrapper.hpp" +#include "hyteg/boundary/BoundaryConditions.hpp" #include "hyteg/communication/Syncing.hpp" #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp" #include "hyteg/geometry/IcosahedralShellMap.hpp" @@ -37,6 +38,7 @@ #include "hyteg/primitivestorage/PrimitiveStorage.hpp" #include "hyteg/solvers/Smoothables.hpp" #include "hyteg/sparseassembly/SparseMatrixProxy.hpp" +#include "hyteg/types/types.hpp" #define FUNC_PREFIX @@ -83,125 +85,140 @@ class P2ElementwiseDivKGradIcosahedralShellMap : public Operator< P2Function< re protected: private: - /// Kernel type: apply + /// Integral: P2ElementwiseDivKGradIcosahedralShellMap + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: apply + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 3 | points: 5, degree: 3 + /// - blending map: IcosahedralShellMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 2453 3892 66 15 5 0 0 1 - void apply_macro_3D( real_t* RESTRICT _data_dstEdge, - real_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_kEdge, - real_t* RESTRICT _data_kVertex, - real_t* RESTRICT _data_srcEdge, - real_t* RESTRICT _data_srcVertex, - real_t forVertex_0, - real_t forVertex_1, - real_t forVertex_2, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t rayVertex_2, - real_t refVertex_0, - real_t refVertex_1, - real_t refVertex_2, - real_t thrVertex_0, - real_t thrVertex_1, - real_t thrVertex_2 ) const; - /// Kernel type: toMatrix + void apply_P2ElementwiseDivKGradIcosahedralShellMap_macro_3D( real_t* RESTRICT _data_dstEdge, + real_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_kEdge, + real_t* RESTRICT _data_kVertex, + real_t* RESTRICT _data_srcEdge, + real_t* RESTRICT _data_srcVertex, + real_t forVertex_0, + real_t forVertex_1, + real_t forVertex_2, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t rayVertex_2, + real_t refVertex_0, + real_t refVertex_1, + real_t refVertex_2, + real_t thrVertex_0, + real_t thrVertex_1, + real_t thrVertex_2 ) const; + + /// Integral: P2ElementwiseDivKGradIcosahedralShellMap + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: toMatrix + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 3 | points: 5, degree: 3 + /// - blending map: IcosahedralShellMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 2353 3792 66 15 5 0 0 4 - void toMatrix_macro_3D( idx_t* RESTRICT _data_dstEdge, - idx_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_kEdge, - real_t* RESTRICT _data_kVertex, - idx_t* RESTRICT _data_srcEdge, - idx_t* RESTRICT _data_srcVertex, - real_t forVertex_0, - real_t forVertex_1, - real_t forVertex_2, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - std::shared_ptr< SparseMatrixProxy > mat, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t rayVertex_2, - real_t refVertex_0, - real_t refVertex_1, - real_t refVertex_2, - real_t thrVertex_0, - real_t thrVertex_1, - real_t thrVertex_2 ) const; - /// Kernel type: computeInverseDiagonalOperatorValues + void toMatrix_P2ElementwiseDivKGradIcosahedralShellMap_macro_3D( idx_t* RESTRICT _data_dstEdge, + idx_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_kEdge, + real_t* RESTRICT _data_kVertex, + idx_t* RESTRICT _data_srcEdge, + idx_t* RESTRICT _data_srcVertex, + real_t forVertex_0, + real_t forVertex_1, + real_t forVertex_2, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + std::shared_ptr< SparseMatrixProxy > mat, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t rayVertex_2, + real_t refVertex_0, + real_t refVertex_1, + real_t refVertex_2, + real_t thrVertex_0, + real_t thrVertex_1, + real_t thrVertex_2 ) const; + + /// Integral: P2ElementwiseDivKGradIcosahedralShellMap + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: computeInverseDiagonalOperatorValues + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 3 | points: 5, degree: 3 + /// - blending map: IcosahedralShellMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 1688 2892 66 15 5 0 0 1 - void computeInverseDiagonalOperatorValues_macro_3D( real_t* RESTRICT _data_invDiag_Edge, - real_t* RESTRICT _data_invDiag_Vertex, - real_t* RESTRICT _data_kEdge, - real_t* RESTRICT _data_kVertex, - real_t forVertex_0, - real_t forVertex_1, - real_t forVertex_2, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t rayVertex_2, - real_t refVertex_0, - real_t refVertex_1, - real_t refVertex_2, - real_t thrVertex_0, - real_t thrVertex_1, - real_t thrVertex_2 ) const; + void computeInverseDiagonalOperatorValues_P2ElementwiseDivKGradIcosahedralShellMap_macro_3D( + real_t* RESTRICT _data_invDiag_Edge, + real_t* RESTRICT _data_invDiag_Vertex, + real_t* RESTRICT _data_kEdge, + real_t* RESTRICT _data_kVertex, + real_t forVertex_0, + real_t forVertex_1, + real_t forVertex_2, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t rayVertex_2, + real_t refVertex_0, + real_t refVertex_1, + real_t refVertex_2, + real_t thrVertex_0, + real_t thrVertex_1, + real_t thrVertex_2 ) const; std::shared_ptr< P2Function< real_t > > invDiag_; P2Function< real_t > k; diff --git a/operators/div_k_grad/avx/P1ElementwiseDivKGrad_apply_macro_2D.cpp b/operators/div_k_grad/avx/P1ElementwiseDivKGrad_apply_P1ElementwiseDivKGrad_macro_2D.cpp similarity index 99% rename from operators/div_k_grad/avx/P1ElementwiseDivKGrad_apply_macro_2D.cpp rename to operators/div_k_grad/avx/P1ElementwiseDivKGrad_apply_P1ElementwiseDivKGrad_macro_2D.cpp index e9ee5836..742b6a5d 100644 --- a/operators/div_k_grad/avx/P1ElementwiseDivKGrad_apply_macro_2D.cpp +++ b/operators/div_k_grad/avx/P1ElementwiseDivKGrad_apply_P1ElementwiseDivKGrad_macro_2D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P1ElementwiseDivKGrad::apply_macro_2D( real_t * RESTRICT _data_dst, real_t * RESTRICT _data_k, real_t * RESTRICT _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P1ElementwiseDivKGrad::apply_P1ElementwiseDivKGrad_macro_2D( real_t * RESTRICT _data_dst, real_t * RESTRICT _data_k, real_t * RESTRICT _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.16666666666666666, 0.16666666666666666, 0.16666666666666666}; diff --git a/operators/div_k_grad/avx/P1ElementwiseDivKGrad_apply_macro_3D.cpp b/operators/div_k_grad/avx/P1ElementwiseDivKGrad_apply_P1ElementwiseDivKGrad_macro_3D.cpp similarity index 99% rename from operators/div_k_grad/avx/P1ElementwiseDivKGrad_apply_macro_3D.cpp rename to operators/div_k_grad/avx/P1ElementwiseDivKGrad_apply_P1ElementwiseDivKGrad_macro_3D.cpp index 62884c29..07516062 100644 --- a/operators/div_k_grad/avx/P1ElementwiseDivKGrad_apply_macro_3D.cpp +++ b/operators/div_k_grad/avx/P1ElementwiseDivKGrad_apply_P1ElementwiseDivKGrad_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P1ElementwiseDivKGrad::apply_macro_3D( real_t * RESTRICT _data_dst, real_t * RESTRICT _data_k, real_t * RESTRICT _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P1ElementwiseDivKGrad::apply_P1ElementwiseDivKGrad_macro_3D( real_t * RESTRICT _data_dst, real_t * RESTRICT _data_k, real_t * RESTRICT _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.041666666666666657, 0.041666666666666657, 0.041666666666666657, 0.041666666666666657}; diff --git a/operators/div_k_grad/avx/P1ElementwiseDivKGrad_computeInverseDiagonalOperatorValues_macro_2D.cpp b/operators/div_k_grad/avx/P1ElementwiseDivKGrad_computeInverseDiagonalOperatorValues_P1ElementwiseDivKGrad_macro_2D.cpp similarity index 99% rename from operators/div_k_grad/avx/P1ElementwiseDivKGrad_computeInverseDiagonalOperatorValues_macro_2D.cpp rename to operators/div_k_grad/avx/P1ElementwiseDivKGrad_computeInverseDiagonalOperatorValues_P1ElementwiseDivKGrad_macro_2D.cpp index 6a72549f..7e48f505 100644 --- a/operators/div_k_grad/avx/P1ElementwiseDivKGrad_computeInverseDiagonalOperatorValues_macro_2D.cpp +++ b/operators/div_k_grad/avx/P1ElementwiseDivKGrad_computeInverseDiagonalOperatorValues_P1ElementwiseDivKGrad_macro_2D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P1ElementwiseDivKGrad::computeInverseDiagonalOperatorValues_macro_2D( real_t * RESTRICT _data_invDiag_, real_t * RESTRICT _data_k, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P1ElementwiseDivKGrad::computeInverseDiagonalOperatorValues_P1ElementwiseDivKGrad_macro_2D( real_t * RESTRICT _data_invDiag_, real_t * RESTRICT _data_k, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.16666666666666666, 0.16666666666666666, 0.16666666666666666}; diff --git a/operators/div_k_grad/avx/P1ElementwiseDivKGrad_computeInverseDiagonalOperatorValues_macro_3D.cpp b/operators/div_k_grad/avx/P1ElementwiseDivKGrad_computeInverseDiagonalOperatorValues_P1ElementwiseDivKGrad_macro_3D.cpp similarity index 99% rename from operators/div_k_grad/avx/P1ElementwiseDivKGrad_computeInverseDiagonalOperatorValues_macro_3D.cpp rename to operators/div_k_grad/avx/P1ElementwiseDivKGrad_computeInverseDiagonalOperatorValues_P1ElementwiseDivKGrad_macro_3D.cpp index 32844194..a5a8fe88 100644 --- a/operators/div_k_grad/avx/P1ElementwiseDivKGrad_computeInverseDiagonalOperatorValues_macro_3D.cpp +++ b/operators/div_k_grad/avx/P1ElementwiseDivKGrad_computeInverseDiagonalOperatorValues_P1ElementwiseDivKGrad_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P1ElementwiseDivKGrad::computeInverseDiagonalOperatorValues_macro_3D( real_t * RESTRICT _data_invDiag_, real_t * RESTRICT _data_k, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P1ElementwiseDivKGrad::computeInverseDiagonalOperatorValues_P1ElementwiseDivKGrad_macro_3D( real_t * RESTRICT _data_invDiag_, real_t * RESTRICT _data_k, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.041666666666666657, 0.041666666666666657, 0.041666666666666657, 0.041666666666666657}; diff --git a/operators/div_k_grad/avx/P2ElementwiseDivKGradAnnulusMap_apply_macro_2D.cpp b/operators/div_k_grad/avx/P2ElementwiseDivKGradAnnulusMap_apply_P2ElementwiseDivKGradAnnulusMap_macro_2D.cpp similarity index 99% rename from operators/div_k_grad/avx/P2ElementwiseDivKGradAnnulusMap_apply_macro_2D.cpp rename to operators/div_k_grad/avx/P2ElementwiseDivKGradAnnulusMap_apply_P2ElementwiseDivKGradAnnulusMap_macro_2D.cpp index 9f880a45..9b41394a 100644 --- a/operators/div_k_grad/avx/P2ElementwiseDivKGradAnnulusMap_apply_macro_2D.cpp +++ b/operators/div_k_grad/avx/P2ElementwiseDivKGradAnnulusMap_apply_P2ElementwiseDivKGradAnnulusMap_macro_2D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseDivKGradAnnulusMap::apply_macro_2D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_kEdge, real_t * RESTRICT _data_kVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const +void P2ElementwiseDivKGradAnnulusMap::apply_P2ElementwiseDivKGradAnnulusMap_macro_2D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_kEdge, real_t * RESTRICT _data_kVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const { { const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669}; diff --git a/operators/div_k_grad/avx/P2ElementwiseDivKGradAnnulusMap_computeInverseDiagonalOperatorValues_macro_2D.cpp b/operators/div_k_grad/avx/P2ElementwiseDivKGradAnnulusMap_computeInverseDiagonalOperatorValues_P2ElementwiseDivKGradAnnulusMap_macro_2D.cpp similarity index 99% rename from operators/div_k_grad/avx/P2ElementwiseDivKGradAnnulusMap_computeInverseDiagonalOperatorValues_macro_2D.cpp rename to operators/div_k_grad/avx/P2ElementwiseDivKGradAnnulusMap_computeInverseDiagonalOperatorValues_P2ElementwiseDivKGradAnnulusMap_macro_2D.cpp index 7fb1e61d..c3180dc5 100644 --- a/operators/div_k_grad/avx/P2ElementwiseDivKGradAnnulusMap_computeInverseDiagonalOperatorValues_macro_2D.cpp +++ b/operators/div_k_grad/avx/P2ElementwiseDivKGradAnnulusMap_computeInverseDiagonalOperatorValues_P2ElementwiseDivKGradAnnulusMap_macro_2D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseDivKGradAnnulusMap::computeInverseDiagonalOperatorValues_macro_2D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_kEdge, real_t * RESTRICT _data_kVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const +void P2ElementwiseDivKGradAnnulusMap::computeInverseDiagonalOperatorValues_P2ElementwiseDivKGradAnnulusMap_macro_2D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_kEdge, real_t * RESTRICT _data_kVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const { { const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669}; diff --git a/operators/div_k_grad/avx/P2ElementwiseDivKGradIcosahedralShellMap_apply_macro_3D.cpp b/operators/div_k_grad/avx/P2ElementwiseDivKGradIcosahedralShellMap_apply_P2ElementwiseDivKGradIcosahedralShellMap_macro_3D.cpp similarity index 99% rename from operators/div_k_grad/avx/P2ElementwiseDivKGradIcosahedralShellMap_apply_macro_3D.cpp rename to operators/div_k_grad/avx/P2ElementwiseDivKGradIcosahedralShellMap_apply_P2ElementwiseDivKGradIcosahedralShellMap_macro_3D.cpp index 0a49d7ba..621b9144 100644 --- a/operators/div_k_grad/avx/P2ElementwiseDivKGradIcosahedralShellMap_apply_macro_3D.cpp +++ b/operators/div_k_grad/avx/P2ElementwiseDivKGradIcosahedralShellMap_apply_P2ElementwiseDivKGradIcosahedralShellMap_macro_3D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseDivKGradIcosahedralShellMap::apply_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_kEdge, real_t * RESTRICT _data_kVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const +void P2ElementwiseDivKGradIcosahedralShellMap::apply_P2ElementwiseDivKGradIcosahedralShellMap_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_kEdge, real_t * RESTRICT _data_kVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const { { const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983}; diff --git a/operators/div_k_grad/avx/P2ElementwiseDivKGradIcosahedralShellMap_computeInverseDiagonalOperatorValues_macro_3D.cpp b/operators/div_k_grad/avx/P2ElementwiseDivKGradIcosahedralShellMap_computeInverseDiagonalOperatorValues_P2ElementwiseDivKGradIcosahedralShellMap_macro_3D.cpp similarity index 99% rename from operators/div_k_grad/avx/P2ElementwiseDivKGradIcosahedralShellMap_computeInverseDiagonalOperatorValues_macro_3D.cpp rename to operators/div_k_grad/avx/P2ElementwiseDivKGradIcosahedralShellMap_computeInverseDiagonalOperatorValues_P2ElementwiseDivKGradIcosahedralShellMap_macro_3D.cpp index 27a84e72..9e074b2a 100644 --- a/operators/div_k_grad/avx/P2ElementwiseDivKGradIcosahedralShellMap_computeInverseDiagonalOperatorValues_macro_3D.cpp +++ b/operators/div_k_grad/avx/P2ElementwiseDivKGradIcosahedralShellMap_computeInverseDiagonalOperatorValues_P2ElementwiseDivKGradIcosahedralShellMap_macro_3D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseDivKGradIcosahedralShellMap::computeInverseDiagonalOperatorValues_macro_3D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_kEdge, real_t * RESTRICT _data_kVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const +void P2ElementwiseDivKGradIcosahedralShellMap::computeInverseDiagonalOperatorValues_P2ElementwiseDivKGradIcosahedralShellMap_macro_3D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_kEdge, real_t * RESTRICT _data_kVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const { { const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983}; diff --git a/operators/div_k_grad/avx/P2ElementwiseDivKGrad_apply_macro_2D.cpp b/operators/div_k_grad/avx/P2ElementwiseDivKGrad_apply_P2ElementwiseDivKGrad_macro_2D.cpp similarity index 99% rename from operators/div_k_grad/avx/P2ElementwiseDivKGrad_apply_macro_2D.cpp rename to operators/div_k_grad/avx/P2ElementwiseDivKGrad_apply_P2ElementwiseDivKGrad_macro_2D.cpp index 3d15cd08..ae8283c9 100644 --- a/operators/div_k_grad/avx/P2ElementwiseDivKGrad_apply_macro_2D.cpp +++ b/operators/div_k_grad/avx/P2ElementwiseDivKGrad_apply_P2ElementwiseDivKGrad_macro_2D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseDivKGrad::apply_macro_2D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_kEdge, real_t * RESTRICT _data_kVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseDivKGrad::apply_P2ElementwiseDivKGrad_macro_2D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_kEdge, real_t * RESTRICT _data_kVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.16666666666666666, 0.16666666666666666, 0.16666666666666666}; diff --git a/operators/div_k_grad/avx/P2ElementwiseDivKGrad_apply_macro_3D.cpp b/operators/div_k_grad/avx/P2ElementwiseDivKGrad_apply_P2ElementwiseDivKGrad_macro_3D.cpp similarity index 99% rename from operators/div_k_grad/avx/P2ElementwiseDivKGrad_apply_macro_3D.cpp rename to operators/div_k_grad/avx/P2ElementwiseDivKGrad_apply_P2ElementwiseDivKGrad_macro_3D.cpp index e0b46bc2..c3b7e54a 100644 --- a/operators/div_k_grad/avx/P2ElementwiseDivKGrad_apply_macro_3D.cpp +++ b/operators/div_k_grad/avx/P2ElementwiseDivKGrad_apply_P2ElementwiseDivKGrad_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseDivKGrad::apply_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_kEdge, real_t * RESTRICT _data_kVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseDivKGrad::apply_P2ElementwiseDivKGrad_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_kEdge, real_t * RESTRICT _data_kVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.041666666666666657, 0.041666666666666657, 0.041666666666666657, 0.041666666666666657}; diff --git a/operators/div_k_grad/avx/P2ElementwiseDivKGrad_computeInverseDiagonalOperatorValues_macro_2D.cpp b/operators/div_k_grad/avx/P2ElementwiseDivKGrad_computeInverseDiagonalOperatorValues_P2ElementwiseDivKGrad_macro_2D.cpp similarity index 99% rename from operators/div_k_grad/avx/P2ElementwiseDivKGrad_computeInverseDiagonalOperatorValues_macro_2D.cpp rename to operators/div_k_grad/avx/P2ElementwiseDivKGrad_computeInverseDiagonalOperatorValues_P2ElementwiseDivKGrad_macro_2D.cpp index 0b753df5..241e57c5 100644 --- a/operators/div_k_grad/avx/P2ElementwiseDivKGrad_computeInverseDiagonalOperatorValues_macro_2D.cpp +++ b/operators/div_k_grad/avx/P2ElementwiseDivKGrad_computeInverseDiagonalOperatorValues_P2ElementwiseDivKGrad_macro_2D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseDivKGrad::computeInverseDiagonalOperatorValues_macro_2D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_kEdge, real_t * RESTRICT _data_kVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseDivKGrad::computeInverseDiagonalOperatorValues_P2ElementwiseDivKGrad_macro_2D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_kEdge, real_t * RESTRICT _data_kVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.16666666666666666, 0.16666666666666666, 0.16666666666666666}; diff --git a/operators/div_k_grad/avx/P2ElementwiseDivKGrad_computeInverseDiagonalOperatorValues_macro_3D.cpp b/operators/div_k_grad/avx/P2ElementwiseDivKGrad_computeInverseDiagonalOperatorValues_P2ElementwiseDivKGrad_macro_3D.cpp similarity index 99% rename from operators/div_k_grad/avx/P2ElementwiseDivKGrad_computeInverseDiagonalOperatorValues_macro_3D.cpp rename to operators/div_k_grad/avx/P2ElementwiseDivKGrad_computeInverseDiagonalOperatorValues_P2ElementwiseDivKGrad_macro_3D.cpp index 3b9c2989..55e3fa2a 100644 --- a/operators/div_k_grad/avx/P2ElementwiseDivKGrad_computeInverseDiagonalOperatorValues_macro_3D.cpp +++ b/operators/div_k_grad/avx/P2ElementwiseDivKGrad_computeInverseDiagonalOperatorValues_P2ElementwiseDivKGrad_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseDivKGrad::computeInverseDiagonalOperatorValues_macro_3D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_kEdge, real_t * RESTRICT _data_kVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseDivKGrad::computeInverseDiagonalOperatorValues_P2ElementwiseDivKGrad_macro_3D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_kEdge, real_t * RESTRICT _data_kVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.041666666666666657, 0.041666666666666657, 0.041666666666666657, 0.041666666666666657}; diff --git a/operators/div_k_grad/noarch/P1ElementwiseDivKGrad_apply_macro_2D.cpp b/operators/div_k_grad/noarch/P1ElementwiseDivKGrad_apply_P1ElementwiseDivKGrad_macro_2D.cpp similarity index 97% rename from operators/div_k_grad/noarch/P1ElementwiseDivKGrad_apply_macro_2D.cpp rename to operators/div_k_grad/noarch/P1ElementwiseDivKGrad_apply_P1ElementwiseDivKGrad_macro_2D.cpp index a20e98e0..08fdf639 100644 --- a/operators/div_k_grad/noarch/P1ElementwiseDivKGrad_apply_macro_2D.cpp +++ b/operators/div_k_grad/noarch/P1ElementwiseDivKGrad_apply_P1ElementwiseDivKGrad_macro_2D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P1ElementwiseDivKGrad::apply_macro_2D( real_t * RESTRICT _data_dst, real_t * RESTRICT _data_k, real_t * RESTRICT _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P1ElementwiseDivKGrad::apply_P1ElementwiseDivKGrad_macro_2D( real_t * RESTRICT _data_dst, real_t * RESTRICT _data_k, real_t * RESTRICT _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.16666666666666666, 0.16666666666666666, 0.16666666666666666}; diff --git a/operators/div_k_grad/noarch/P1ElementwiseDivKGrad_apply_macro_3D.cpp b/operators/div_k_grad/noarch/P1ElementwiseDivKGrad_apply_P1ElementwiseDivKGrad_macro_3D.cpp similarity index 99% rename from operators/div_k_grad/noarch/P1ElementwiseDivKGrad_apply_macro_3D.cpp rename to operators/div_k_grad/noarch/P1ElementwiseDivKGrad_apply_P1ElementwiseDivKGrad_macro_3D.cpp index 248a21e3..ebf6a755 100644 --- a/operators/div_k_grad/noarch/P1ElementwiseDivKGrad_apply_macro_3D.cpp +++ b/operators/div_k_grad/noarch/P1ElementwiseDivKGrad_apply_P1ElementwiseDivKGrad_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P1ElementwiseDivKGrad::apply_macro_3D( real_t * RESTRICT _data_dst, real_t * RESTRICT _data_k, real_t * RESTRICT _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P1ElementwiseDivKGrad::apply_P1ElementwiseDivKGrad_macro_3D( real_t * RESTRICT _data_dst, real_t * RESTRICT _data_k, real_t * RESTRICT _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.041666666666666657, 0.041666666666666657, 0.041666666666666657, 0.041666666666666657}; diff --git a/operators/div_k_grad/noarch/P1ElementwiseDivKGrad_computeInverseDiagonalOperatorValues_macro_2D.cpp b/operators/div_k_grad/noarch/P1ElementwiseDivKGrad_computeInverseDiagonalOperatorValues_P1ElementwiseDivKGrad_macro_2D.cpp similarity index 97% rename from operators/div_k_grad/noarch/P1ElementwiseDivKGrad_computeInverseDiagonalOperatorValues_macro_2D.cpp rename to operators/div_k_grad/noarch/P1ElementwiseDivKGrad_computeInverseDiagonalOperatorValues_P1ElementwiseDivKGrad_macro_2D.cpp index 37f59b2a..b1aea623 100644 --- a/operators/div_k_grad/noarch/P1ElementwiseDivKGrad_computeInverseDiagonalOperatorValues_macro_2D.cpp +++ b/operators/div_k_grad/noarch/P1ElementwiseDivKGrad_computeInverseDiagonalOperatorValues_P1ElementwiseDivKGrad_macro_2D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P1ElementwiseDivKGrad::computeInverseDiagonalOperatorValues_macro_2D( real_t * RESTRICT _data_invDiag_, real_t * RESTRICT _data_k, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P1ElementwiseDivKGrad::computeInverseDiagonalOperatorValues_P1ElementwiseDivKGrad_macro_2D( real_t * RESTRICT _data_invDiag_, real_t * RESTRICT _data_k, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.16666666666666666, 0.16666666666666666, 0.16666666666666666}; diff --git a/operators/div_k_grad/noarch/P1ElementwiseDivKGrad_computeInverseDiagonalOperatorValues_macro_3D.cpp b/operators/div_k_grad/noarch/P1ElementwiseDivKGrad_computeInverseDiagonalOperatorValues_P1ElementwiseDivKGrad_macro_3D.cpp similarity index 99% rename from operators/div_k_grad/noarch/P1ElementwiseDivKGrad_computeInverseDiagonalOperatorValues_macro_3D.cpp rename to operators/div_k_grad/noarch/P1ElementwiseDivKGrad_computeInverseDiagonalOperatorValues_P1ElementwiseDivKGrad_macro_3D.cpp index 262ee517..76b3867c 100644 --- a/operators/div_k_grad/noarch/P1ElementwiseDivKGrad_computeInverseDiagonalOperatorValues_macro_3D.cpp +++ b/operators/div_k_grad/noarch/P1ElementwiseDivKGrad_computeInverseDiagonalOperatorValues_P1ElementwiseDivKGrad_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P1ElementwiseDivKGrad::computeInverseDiagonalOperatorValues_macro_3D( real_t * RESTRICT _data_invDiag_, real_t * RESTRICT _data_k, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P1ElementwiseDivKGrad::computeInverseDiagonalOperatorValues_P1ElementwiseDivKGrad_macro_3D( real_t * RESTRICT _data_invDiag_, real_t * RESTRICT _data_k, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.041666666666666657, 0.041666666666666657, 0.041666666666666657, 0.041666666666666657}; diff --git a/operators/div_k_grad/noarch/P1ElementwiseDivKGrad_toMatrix_macro_2D.cpp b/operators/div_k_grad/noarch/P1ElementwiseDivKGrad_toMatrix_P1ElementwiseDivKGrad_macro_2D.cpp similarity index 97% rename from operators/div_k_grad/noarch/P1ElementwiseDivKGrad_toMatrix_macro_2D.cpp rename to operators/div_k_grad/noarch/P1ElementwiseDivKGrad_toMatrix_P1ElementwiseDivKGrad_macro_2D.cpp index 93e49e99..0258b90a 100644 --- a/operators/div_k_grad/noarch/P1ElementwiseDivKGrad_toMatrix_macro_2D.cpp +++ b/operators/div_k_grad/noarch/P1ElementwiseDivKGrad_toMatrix_P1ElementwiseDivKGrad_macro_2D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P1ElementwiseDivKGrad::toMatrix_macro_2D( idx_t * RESTRICT _data_dst, real_t * RESTRICT _data_k, idx_t * RESTRICT _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P1ElementwiseDivKGrad::toMatrix_P1ElementwiseDivKGrad_macro_2D( idx_t * RESTRICT _data_dst, real_t * RESTRICT _data_k, idx_t * RESTRICT _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.16666666666666666, 0.16666666666666666, 0.16666666666666666}; diff --git a/operators/div_k_grad/noarch/P1ElementwiseDivKGrad_toMatrix_macro_3D.cpp b/operators/div_k_grad/noarch/P1ElementwiseDivKGrad_toMatrix_P1ElementwiseDivKGrad_macro_3D.cpp similarity index 99% rename from operators/div_k_grad/noarch/P1ElementwiseDivKGrad_toMatrix_macro_3D.cpp rename to operators/div_k_grad/noarch/P1ElementwiseDivKGrad_toMatrix_P1ElementwiseDivKGrad_macro_3D.cpp index 32fca0a6..b09d8354 100644 --- a/operators/div_k_grad/noarch/P1ElementwiseDivKGrad_toMatrix_macro_3D.cpp +++ b/operators/div_k_grad/noarch/P1ElementwiseDivKGrad_toMatrix_P1ElementwiseDivKGrad_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P1ElementwiseDivKGrad::toMatrix_macro_3D( idx_t * RESTRICT _data_dst, real_t * RESTRICT _data_k, idx_t * RESTRICT _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P1ElementwiseDivKGrad::toMatrix_P1ElementwiseDivKGrad_macro_3D( idx_t * RESTRICT _data_dst, real_t * RESTRICT _data_k, idx_t * RESTRICT _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.041666666666666657, 0.041666666666666657, 0.041666666666666657, 0.041666666666666657}; diff --git a/operators/div_k_grad/noarch/P2ElementwiseDivKGradAnnulusMap_apply_macro_2D.cpp b/operators/div_k_grad/noarch/P2ElementwiseDivKGradAnnulusMap_apply_P2ElementwiseDivKGradAnnulusMap_macro_2D.cpp similarity index 98% rename from operators/div_k_grad/noarch/P2ElementwiseDivKGradAnnulusMap_apply_macro_2D.cpp rename to operators/div_k_grad/noarch/P2ElementwiseDivKGradAnnulusMap_apply_P2ElementwiseDivKGradAnnulusMap_macro_2D.cpp index 2474e6cc..42622258 100644 --- a/operators/div_k_grad/noarch/P2ElementwiseDivKGradAnnulusMap_apply_macro_2D.cpp +++ b/operators/div_k_grad/noarch/P2ElementwiseDivKGradAnnulusMap_apply_P2ElementwiseDivKGradAnnulusMap_macro_2D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseDivKGradAnnulusMap::apply_macro_2D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_kEdge, real_t * RESTRICT _data_kVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const +void P2ElementwiseDivKGradAnnulusMap::apply_P2ElementwiseDivKGradAnnulusMap_macro_2D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_kEdge, real_t * RESTRICT _data_kVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const { { const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669}; diff --git a/operators/div_k_grad/noarch/P2ElementwiseDivKGradAnnulusMap_computeInverseDiagonalOperatorValues_macro_2D.cpp b/operators/div_k_grad/noarch/P2ElementwiseDivKGradAnnulusMap_computeInverseDiagonalOperatorValues_P2ElementwiseDivKGradAnnulusMap_macro_2D.cpp similarity index 98% rename from operators/div_k_grad/noarch/P2ElementwiseDivKGradAnnulusMap_computeInverseDiagonalOperatorValues_macro_2D.cpp rename to operators/div_k_grad/noarch/P2ElementwiseDivKGradAnnulusMap_computeInverseDiagonalOperatorValues_P2ElementwiseDivKGradAnnulusMap_macro_2D.cpp index b4fc63cc..756d25bd 100644 --- a/operators/div_k_grad/noarch/P2ElementwiseDivKGradAnnulusMap_computeInverseDiagonalOperatorValues_macro_2D.cpp +++ b/operators/div_k_grad/noarch/P2ElementwiseDivKGradAnnulusMap_computeInverseDiagonalOperatorValues_P2ElementwiseDivKGradAnnulusMap_macro_2D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseDivKGradAnnulusMap::computeInverseDiagonalOperatorValues_macro_2D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_kEdge, real_t * RESTRICT _data_kVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const +void P2ElementwiseDivKGradAnnulusMap::computeInverseDiagonalOperatorValues_P2ElementwiseDivKGradAnnulusMap_macro_2D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_kEdge, real_t * RESTRICT _data_kVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const { { const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669}; diff --git a/operators/div_k_grad/noarch/P2ElementwiseDivKGradAnnulusMap_toMatrix_macro_2D.cpp b/operators/div_k_grad/noarch/P2ElementwiseDivKGradAnnulusMap_toMatrix_P2ElementwiseDivKGradAnnulusMap_macro_2D.cpp similarity index 98% rename from operators/div_k_grad/noarch/P2ElementwiseDivKGradAnnulusMap_toMatrix_macro_2D.cpp rename to operators/div_k_grad/noarch/P2ElementwiseDivKGradAnnulusMap_toMatrix_P2ElementwiseDivKGradAnnulusMap_macro_2D.cpp index eb9f7a41..68c6f015 100644 --- a/operators/div_k_grad/noarch/P2ElementwiseDivKGradAnnulusMap_toMatrix_macro_2D.cpp +++ b/operators/div_k_grad/noarch/P2ElementwiseDivKGradAnnulusMap_toMatrix_P2ElementwiseDivKGradAnnulusMap_macro_2D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseDivKGradAnnulusMap::toMatrix_macro_2D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_kEdge, real_t * RESTRICT _data_kVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const +void P2ElementwiseDivKGradAnnulusMap::toMatrix_P2ElementwiseDivKGradAnnulusMap_macro_2D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_kEdge, real_t * RESTRICT _data_kVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const { { const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669}; diff --git a/operators/div_k_grad/noarch/P2ElementwiseDivKGradIcosahedralShellMap_apply_macro_3D.cpp b/operators/div_k_grad/noarch/P2ElementwiseDivKGradIcosahedralShellMap_apply_P2ElementwiseDivKGradIcosahedralShellMap_macro_3D.cpp similarity index 99% rename from operators/div_k_grad/noarch/P2ElementwiseDivKGradIcosahedralShellMap_apply_macro_3D.cpp rename to operators/div_k_grad/noarch/P2ElementwiseDivKGradIcosahedralShellMap_apply_P2ElementwiseDivKGradIcosahedralShellMap_macro_3D.cpp index 3e6876d2..7ef705be 100644 --- a/operators/div_k_grad/noarch/P2ElementwiseDivKGradIcosahedralShellMap_apply_macro_3D.cpp +++ b/operators/div_k_grad/noarch/P2ElementwiseDivKGradIcosahedralShellMap_apply_P2ElementwiseDivKGradIcosahedralShellMap_macro_3D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseDivKGradIcosahedralShellMap::apply_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_kEdge, real_t * RESTRICT _data_kVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const +void P2ElementwiseDivKGradIcosahedralShellMap::apply_P2ElementwiseDivKGradIcosahedralShellMap_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_kEdge, real_t * RESTRICT _data_kVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const { { const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983}; diff --git a/operators/div_k_grad/noarch/P2ElementwiseDivKGradIcosahedralShellMap_computeInverseDiagonalOperatorValues_macro_3D.cpp b/operators/div_k_grad/noarch/P2ElementwiseDivKGradIcosahedralShellMap_computeInverseDiagonalOperatorValues_P2ElementwiseDivKGradIcosahedralShellMap_macro_3D.cpp similarity index 99% rename from operators/div_k_grad/noarch/P2ElementwiseDivKGradIcosahedralShellMap_computeInverseDiagonalOperatorValues_macro_3D.cpp rename to operators/div_k_grad/noarch/P2ElementwiseDivKGradIcosahedralShellMap_computeInverseDiagonalOperatorValues_P2ElementwiseDivKGradIcosahedralShellMap_macro_3D.cpp index 2ac58fd8..1e24196d 100644 --- a/operators/div_k_grad/noarch/P2ElementwiseDivKGradIcosahedralShellMap_computeInverseDiagonalOperatorValues_macro_3D.cpp +++ b/operators/div_k_grad/noarch/P2ElementwiseDivKGradIcosahedralShellMap_computeInverseDiagonalOperatorValues_P2ElementwiseDivKGradIcosahedralShellMap_macro_3D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseDivKGradIcosahedralShellMap::computeInverseDiagonalOperatorValues_macro_3D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_kEdge, real_t * RESTRICT _data_kVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const +void P2ElementwiseDivKGradIcosahedralShellMap::computeInverseDiagonalOperatorValues_P2ElementwiseDivKGradIcosahedralShellMap_macro_3D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_kEdge, real_t * RESTRICT _data_kVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const { { const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983}; diff --git a/operators/div_k_grad/noarch/P2ElementwiseDivKGradIcosahedralShellMap_toMatrix_macro_3D.cpp b/operators/div_k_grad/noarch/P2ElementwiseDivKGradIcosahedralShellMap_toMatrix_P2ElementwiseDivKGradIcosahedralShellMap_macro_3D.cpp similarity index 99% rename from operators/div_k_grad/noarch/P2ElementwiseDivKGradIcosahedralShellMap_toMatrix_macro_3D.cpp rename to operators/div_k_grad/noarch/P2ElementwiseDivKGradIcosahedralShellMap_toMatrix_P2ElementwiseDivKGradIcosahedralShellMap_macro_3D.cpp index 8dc72e26..04fced8a 100644 --- a/operators/div_k_grad/noarch/P2ElementwiseDivKGradIcosahedralShellMap_toMatrix_macro_3D.cpp +++ b/operators/div_k_grad/noarch/P2ElementwiseDivKGradIcosahedralShellMap_toMatrix_P2ElementwiseDivKGradIcosahedralShellMap_macro_3D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseDivKGradIcosahedralShellMap::toMatrix_macro_3D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_kEdge, real_t * RESTRICT _data_kVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const +void P2ElementwiseDivKGradIcosahedralShellMap::toMatrix_P2ElementwiseDivKGradIcosahedralShellMap_macro_3D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_kEdge, real_t * RESTRICT _data_kVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const { { const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983}; diff --git a/operators/div_k_grad/noarch/P2ElementwiseDivKGrad_apply_macro_2D.cpp b/operators/div_k_grad/noarch/P2ElementwiseDivKGrad_apply_P2ElementwiseDivKGrad_macro_2D.cpp similarity index 98% rename from operators/div_k_grad/noarch/P2ElementwiseDivKGrad_apply_macro_2D.cpp rename to operators/div_k_grad/noarch/P2ElementwiseDivKGrad_apply_P2ElementwiseDivKGrad_macro_2D.cpp index 135d9eff..17c7e3af 100644 --- a/operators/div_k_grad/noarch/P2ElementwiseDivKGrad_apply_macro_2D.cpp +++ b/operators/div_k_grad/noarch/P2ElementwiseDivKGrad_apply_P2ElementwiseDivKGrad_macro_2D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseDivKGrad::apply_macro_2D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_kEdge, real_t * RESTRICT _data_kVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseDivKGrad::apply_P2ElementwiseDivKGrad_macro_2D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_kEdge, real_t * RESTRICT _data_kVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.16666666666666666, 0.16666666666666666, 0.16666666666666666}; diff --git a/operators/div_k_grad/noarch/P2ElementwiseDivKGrad_apply_macro_3D.cpp b/operators/div_k_grad/noarch/P2ElementwiseDivKGrad_apply_P2ElementwiseDivKGrad_macro_3D.cpp similarity index 99% rename from operators/div_k_grad/noarch/P2ElementwiseDivKGrad_apply_macro_3D.cpp rename to operators/div_k_grad/noarch/P2ElementwiseDivKGrad_apply_P2ElementwiseDivKGrad_macro_3D.cpp index b04e3118..3b8272b2 100644 --- a/operators/div_k_grad/noarch/P2ElementwiseDivKGrad_apply_macro_3D.cpp +++ b/operators/div_k_grad/noarch/P2ElementwiseDivKGrad_apply_P2ElementwiseDivKGrad_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseDivKGrad::apply_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_kEdge, real_t * RESTRICT _data_kVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseDivKGrad::apply_P2ElementwiseDivKGrad_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_kEdge, real_t * RESTRICT _data_kVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.041666666666666657, 0.041666666666666657, 0.041666666666666657, 0.041666666666666657}; diff --git a/operators/div_k_grad/noarch/P2ElementwiseDivKGrad_computeInverseDiagonalOperatorValues_macro_2D.cpp b/operators/div_k_grad/noarch/P2ElementwiseDivKGrad_computeInverseDiagonalOperatorValues_P2ElementwiseDivKGrad_macro_2D.cpp similarity index 98% rename from operators/div_k_grad/noarch/P2ElementwiseDivKGrad_computeInverseDiagonalOperatorValues_macro_2D.cpp rename to operators/div_k_grad/noarch/P2ElementwiseDivKGrad_computeInverseDiagonalOperatorValues_P2ElementwiseDivKGrad_macro_2D.cpp index dace6244..f2767976 100644 --- a/operators/div_k_grad/noarch/P2ElementwiseDivKGrad_computeInverseDiagonalOperatorValues_macro_2D.cpp +++ b/operators/div_k_grad/noarch/P2ElementwiseDivKGrad_computeInverseDiagonalOperatorValues_P2ElementwiseDivKGrad_macro_2D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseDivKGrad::computeInverseDiagonalOperatorValues_macro_2D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_kEdge, real_t * RESTRICT _data_kVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseDivKGrad::computeInverseDiagonalOperatorValues_P2ElementwiseDivKGrad_macro_2D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_kEdge, real_t * RESTRICT _data_kVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.16666666666666666, 0.16666666666666666, 0.16666666666666666}; diff --git a/operators/div_k_grad/noarch/P2ElementwiseDivKGrad_computeInverseDiagonalOperatorValues_macro_3D.cpp b/operators/div_k_grad/noarch/P2ElementwiseDivKGrad_computeInverseDiagonalOperatorValues_P2ElementwiseDivKGrad_macro_3D.cpp similarity index 99% rename from operators/div_k_grad/noarch/P2ElementwiseDivKGrad_computeInverseDiagonalOperatorValues_macro_3D.cpp rename to operators/div_k_grad/noarch/P2ElementwiseDivKGrad_computeInverseDiagonalOperatorValues_P2ElementwiseDivKGrad_macro_3D.cpp index d4ee433c..f2ccc2b2 100644 --- a/operators/div_k_grad/noarch/P2ElementwiseDivKGrad_computeInverseDiagonalOperatorValues_macro_3D.cpp +++ b/operators/div_k_grad/noarch/P2ElementwiseDivKGrad_computeInverseDiagonalOperatorValues_P2ElementwiseDivKGrad_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseDivKGrad::computeInverseDiagonalOperatorValues_macro_3D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_kEdge, real_t * RESTRICT _data_kVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseDivKGrad::computeInverseDiagonalOperatorValues_P2ElementwiseDivKGrad_macro_3D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_kEdge, real_t * RESTRICT _data_kVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.041666666666666657, 0.041666666666666657, 0.041666666666666657, 0.041666666666666657}; diff --git a/operators/div_k_grad/noarch/P2ElementwiseDivKGrad_toMatrix_macro_2D.cpp b/operators/div_k_grad/noarch/P2ElementwiseDivKGrad_toMatrix_P2ElementwiseDivKGrad_macro_2D.cpp similarity index 98% rename from operators/div_k_grad/noarch/P2ElementwiseDivKGrad_toMatrix_macro_2D.cpp rename to operators/div_k_grad/noarch/P2ElementwiseDivKGrad_toMatrix_P2ElementwiseDivKGrad_macro_2D.cpp index d19cb169..16cf14e5 100644 --- a/operators/div_k_grad/noarch/P2ElementwiseDivKGrad_toMatrix_macro_2D.cpp +++ b/operators/div_k_grad/noarch/P2ElementwiseDivKGrad_toMatrix_P2ElementwiseDivKGrad_macro_2D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseDivKGrad::toMatrix_macro_2D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_kEdge, real_t * RESTRICT _data_kVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseDivKGrad::toMatrix_P2ElementwiseDivKGrad_macro_2D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_kEdge, real_t * RESTRICT _data_kVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.16666666666666666, 0.16666666666666666, 0.16666666666666666}; diff --git a/operators/div_k_grad/noarch/P2ElementwiseDivKGrad_toMatrix_macro_3D.cpp b/operators/div_k_grad/noarch/P2ElementwiseDivKGrad_toMatrix_P2ElementwiseDivKGrad_macro_3D.cpp similarity index 99% rename from operators/div_k_grad/noarch/P2ElementwiseDivKGrad_toMatrix_macro_3D.cpp rename to operators/div_k_grad/noarch/P2ElementwiseDivKGrad_toMatrix_P2ElementwiseDivKGrad_macro_3D.cpp index 280d8106..a9ef3882 100644 --- a/operators/div_k_grad/noarch/P2ElementwiseDivKGrad_toMatrix_macro_3D.cpp +++ b/operators/div_k_grad/noarch/P2ElementwiseDivKGrad_toMatrix_P2ElementwiseDivKGrad_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseDivKGrad::toMatrix_macro_3D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_kEdge, real_t * RESTRICT _data_kVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseDivKGrad::toMatrix_P2ElementwiseDivKGrad_macro_3D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_kEdge, real_t * RESTRICT _data_kVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.041666666666666657, 0.041666666666666657, 0.041666666666666657, 0.041666666666666657}; diff --git a/operators/divergence/CMakeLists.txt b/operators/divergence/CMakeLists.txt index a3729803..dec538e7 100644 --- a/operators/divergence/CMakeLists.txt +++ b/operators/divergence/CMakeLists.txt @@ -21,40 +21,40 @@ add_library( opgen-divergence if(HYTEG_BUILD_WITH_AVX AND WALBERLA_DOUBLE_ACCURACY) target_sources(opgen-divergence PRIVATE - avx/P2ToP1ElementwiseDivergenceAnnulusMap_0_0_apply_macro_2D.cpp - avx/P2ToP1ElementwiseDivergenceAnnulusMap_0_1_apply_macro_2D.cpp - avx/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_0_apply_macro_3D.cpp - avx/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_1_apply_macro_3D.cpp - avx/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_2_apply_macro_3D.cpp - avx/P2ToP1ElementwiseDivergence_0_0_apply_macro_2D.cpp - avx/P2ToP1ElementwiseDivergence_0_0_apply_macro_3D.cpp - avx/P2ToP1ElementwiseDivergence_0_1_apply_macro_2D.cpp - avx/P2ToP1ElementwiseDivergence_0_1_apply_macro_3D.cpp - avx/P2ToP1ElementwiseDivergence_0_2_apply_macro_3D.cpp - noarch/P2ToP1ElementwiseDivergenceAnnulusMap_0_0_toMatrix_macro_2D.cpp - noarch/P2ToP1ElementwiseDivergenceAnnulusMap_0_1_toMatrix_macro_2D.cpp - noarch/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_0_toMatrix_macro_3D.cpp - noarch/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_1_toMatrix_macro_3D.cpp - noarch/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_2_toMatrix_macro_3D.cpp - noarch/P2ToP1ElementwiseDivergence_0_0_toMatrix_macro_2D.cpp - noarch/P2ToP1ElementwiseDivergence_0_0_toMatrix_macro_3D.cpp - noarch/P2ToP1ElementwiseDivergence_0_1_toMatrix_macro_2D.cpp - noarch/P2ToP1ElementwiseDivergence_0_1_toMatrix_macro_3D.cpp - noarch/P2ToP1ElementwiseDivergence_0_2_toMatrix_macro_3D.cpp + avx/P2ToP1ElementwiseDivergenceAnnulusMap_0_0_apply_P2ToP1ElementwiseDivergenceAnnulusMap_0_0_macro_2D.cpp + avx/P2ToP1ElementwiseDivergenceAnnulusMap_0_1_apply_P2ToP1ElementwiseDivergenceAnnulusMap_0_1_macro_2D.cpp + avx/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_0_apply_P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_0_macro_3D.cpp + avx/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_1_apply_P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_1_macro_3D.cpp + avx/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_2_apply_P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_2_macro_3D.cpp + avx/P2ToP1ElementwiseDivergence_0_0_apply_P2ToP1ElementwiseDivergence_0_0_macro_2D.cpp + avx/P2ToP1ElementwiseDivergence_0_0_apply_P2ToP1ElementwiseDivergence_0_0_macro_3D.cpp + avx/P2ToP1ElementwiseDivergence_0_1_apply_P2ToP1ElementwiseDivergence_0_1_macro_2D.cpp + avx/P2ToP1ElementwiseDivergence_0_1_apply_P2ToP1ElementwiseDivergence_0_1_macro_3D.cpp + avx/P2ToP1ElementwiseDivergence_0_2_apply_P2ToP1ElementwiseDivergence_0_2_macro_3D.cpp + noarch/P2ToP1ElementwiseDivergenceAnnulusMap_0_0_toMatrix_P2ToP1ElementwiseDivergenceAnnulusMap_0_0_macro_2D.cpp + noarch/P2ToP1ElementwiseDivergenceAnnulusMap_0_1_toMatrix_P2ToP1ElementwiseDivergenceAnnulusMap_0_1_macro_2D.cpp + noarch/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_0_toMatrix_P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_0_macro_3D.cpp + noarch/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_1_toMatrix_P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_1_macro_3D.cpp + noarch/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_2_toMatrix_P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_2_macro_3D.cpp + noarch/P2ToP1ElementwiseDivergence_0_0_toMatrix_P2ToP1ElementwiseDivergence_0_0_macro_2D.cpp + noarch/P2ToP1ElementwiseDivergence_0_0_toMatrix_P2ToP1ElementwiseDivergence_0_0_macro_3D.cpp + noarch/P2ToP1ElementwiseDivergence_0_1_toMatrix_P2ToP1ElementwiseDivergence_0_1_macro_2D.cpp + noarch/P2ToP1ElementwiseDivergence_0_1_toMatrix_P2ToP1ElementwiseDivergence_0_1_macro_3D.cpp + noarch/P2ToP1ElementwiseDivergence_0_2_toMatrix_P2ToP1ElementwiseDivergence_0_2_macro_3D.cpp ) set_source_files_properties( - avx/P2ToP1ElementwiseDivergenceAnnulusMap_0_0_apply_macro_2D.cpp - avx/P2ToP1ElementwiseDivergenceAnnulusMap_0_1_apply_macro_2D.cpp - avx/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_0_apply_macro_3D.cpp - avx/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_1_apply_macro_3D.cpp - avx/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_2_apply_macro_3D.cpp - avx/P2ToP1ElementwiseDivergence_0_0_apply_macro_2D.cpp - avx/P2ToP1ElementwiseDivergence_0_0_apply_macro_3D.cpp - avx/P2ToP1ElementwiseDivergence_0_1_apply_macro_2D.cpp - avx/P2ToP1ElementwiseDivergence_0_1_apply_macro_3D.cpp - avx/P2ToP1ElementwiseDivergence_0_2_apply_macro_3D.cpp + avx/P2ToP1ElementwiseDivergenceAnnulusMap_0_0_apply_P2ToP1ElementwiseDivergenceAnnulusMap_0_0_macro_2D.cpp + avx/P2ToP1ElementwiseDivergenceAnnulusMap_0_1_apply_P2ToP1ElementwiseDivergenceAnnulusMap_0_1_macro_2D.cpp + avx/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_0_apply_P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_0_macro_3D.cpp + avx/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_1_apply_P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_1_macro_3D.cpp + avx/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_2_apply_P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_2_macro_3D.cpp + avx/P2ToP1ElementwiseDivergence_0_0_apply_P2ToP1ElementwiseDivergence_0_0_macro_2D.cpp + avx/P2ToP1ElementwiseDivergence_0_0_apply_P2ToP1ElementwiseDivergence_0_0_macro_3D.cpp + avx/P2ToP1ElementwiseDivergence_0_1_apply_P2ToP1ElementwiseDivergence_0_1_macro_2D.cpp + avx/P2ToP1ElementwiseDivergence_0_1_apply_P2ToP1ElementwiseDivergence_0_1_macro_3D.cpp + avx/P2ToP1ElementwiseDivergence_0_2_apply_P2ToP1ElementwiseDivergence_0_2_macro_3D.cpp PROPERTIES COMPILE_OPTIONS ${HYTEG_COMPILER_NATIVE_FLAGS} ) @@ -65,26 +65,26 @@ else() target_sources(opgen-divergence PRIVATE - noarch/P2ToP1ElementwiseDivergenceAnnulusMap_0_0_apply_macro_2D.cpp - noarch/P2ToP1ElementwiseDivergenceAnnulusMap_0_0_toMatrix_macro_2D.cpp - noarch/P2ToP1ElementwiseDivergenceAnnulusMap_0_1_apply_macro_2D.cpp - noarch/P2ToP1ElementwiseDivergenceAnnulusMap_0_1_toMatrix_macro_2D.cpp - noarch/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_0_apply_macro_3D.cpp - noarch/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_0_toMatrix_macro_3D.cpp - noarch/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_1_apply_macro_3D.cpp - noarch/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_1_toMatrix_macro_3D.cpp - noarch/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_2_apply_macro_3D.cpp - noarch/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_2_toMatrix_macro_3D.cpp - noarch/P2ToP1ElementwiseDivergence_0_0_apply_macro_2D.cpp - noarch/P2ToP1ElementwiseDivergence_0_0_apply_macro_3D.cpp - noarch/P2ToP1ElementwiseDivergence_0_0_toMatrix_macro_2D.cpp - noarch/P2ToP1ElementwiseDivergence_0_0_toMatrix_macro_3D.cpp - noarch/P2ToP1ElementwiseDivergence_0_1_apply_macro_2D.cpp - noarch/P2ToP1ElementwiseDivergence_0_1_apply_macro_3D.cpp - noarch/P2ToP1ElementwiseDivergence_0_1_toMatrix_macro_2D.cpp - noarch/P2ToP1ElementwiseDivergence_0_1_toMatrix_macro_3D.cpp - noarch/P2ToP1ElementwiseDivergence_0_2_apply_macro_3D.cpp - noarch/P2ToP1ElementwiseDivergence_0_2_toMatrix_macro_3D.cpp + noarch/P2ToP1ElementwiseDivergenceAnnulusMap_0_0_apply_P2ToP1ElementwiseDivergenceAnnulusMap_0_0_macro_2D.cpp + noarch/P2ToP1ElementwiseDivergenceAnnulusMap_0_0_toMatrix_P2ToP1ElementwiseDivergenceAnnulusMap_0_0_macro_2D.cpp + noarch/P2ToP1ElementwiseDivergenceAnnulusMap_0_1_apply_P2ToP1ElementwiseDivergenceAnnulusMap_0_1_macro_2D.cpp + noarch/P2ToP1ElementwiseDivergenceAnnulusMap_0_1_toMatrix_P2ToP1ElementwiseDivergenceAnnulusMap_0_1_macro_2D.cpp + noarch/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_0_apply_P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_0_macro_3D.cpp + noarch/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_0_toMatrix_P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_0_macro_3D.cpp + noarch/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_1_apply_P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_1_macro_3D.cpp + noarch/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_1_toMatrix_P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_1_macro_3D.cpp + noarch/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_2_apply_P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_2_macro_3D.cpp + noarch/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_2_toMatrix_P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_2_macro_3D.cpp + noarch/P2ToP1ElementwiseDivergence_0_0_apply_P2ToP1ElementwiseDivergence_0_0_macro_2D.cpp + noarch/P2ToP1ElementwiseDivergence_0_0_apply_P2ToP1ElementwiseDivergence_0_0_macro_3D.cpp + noarch/P2ToP1ElementwiseDivergence_0_0_toMatrix_P2ToP1ElementwiseDivergence_0_0_macro_2D.cpp + noarch/P2ToP1ElementwiseDivergence_0_0_toMatrix_P2ToP1ElementwiseDivergence_0_0_macro_3D.cpp + noarch/P2ToP1ElementwiseDivergence_0_1_apply_P2ToP1ElementwiseDivergence_0_1_macro_2D.cpp + noarch/P2ToP1ElementwiseDivergence_0_1_apply_P2ToP1ElementwiseDivergence_0_1_macro_3D.cpp + noarch/P2ToP1ElementwiseDivergence_0_1_toMatrix_P2ToP1ElementwiseDivergence_0_1_macro_2D.cpp + noarch/P2ToP1ElementwiseDivergence_0_1_toMatrix_P2ToP1ElementwiseDivergence_0_1_macro_3D.cpp + noarch/P2ToP1ElementwiseDivergence_0_2_apply_P2ToP1ElementwiseDivergence_0_2_macro_3D.cpp + noarch/P2ToP1ElementwiseDivergence_0_2_toMatrix_P2ToP1ElementwiseDivergence_0_2_macro_3D.cpp ) endif() diff --git a/operators/divergence/P2ToP1ElementwiseDivergenceAnnulusMap_0_0.cpp b/operators/divergence/P2ToP1ElementwiseDivergenceAnnulusMap_0_0.cpp index a3480614..4279d509 100644 --- a/operators/divergence/P2ToP1ElementwiseDivergenceAnnulusMap_0_0.cpp +++ b/operators/divergence/P2ToP1ElementwiseDivergenceAnnulusMap_0_0.cpp @@ -133,7 +133,7 @@ void P2ToP1ElementwiseDivergenceAnnulusMap_0_0::apply( const P2Function< real_t this->timingTree_->start( "kernel" ); - apply_macro_2D( + apply_P2ToP1ElementwiseDivergenceAnnulusMap_0_0_macro_2D( _data_dst, _data_srcEdge, @@ -154,6 +154,7 @@ void P2ToP1ElementwiseDivergenceAnnulusMap_0_0::apply( const P2Function< real_t refVertex_1, thrVertex_0, thrVertex_1 ); + this->timingTree_->stop( "kernel" ); } @@ -228,7 +229,7 @@ void P2ToP1ElementwiseDivergenceAnnulusMap_0_0::toMatrix( const std::shared_ptr< this->timingTree_->start( "kernel" ); - toMatrix_macro_2D( + toMatrix_P2ToP1ElementwiseDivergenceAnnulusMap_0_0_macro_2D( _data_dst, _data_srcEdge, @@ -250,6 +251,7 @@ void P2ToP1ElementwiseDivergenceAnnulusMap_0_0::toMatrix( const std::shared_ptr< refVertex_1, thrVertex_0, thrVertex_1 ); + this->timingTree_->stop( "kernel" ); } } diff --git a/operators/divergence/P2ToP1ElementwiseDivergenceAnnulusMap_0_0.hpp b/operators/divergence/P2ToP1ElementwiseDivergenceAnnulusMap_0_0.hpp index 8480f840..99cb2ed6 100644 --- a/operators/divergence/P2ToP1ElementwiseDivergenceAnnulusMap_0_0.hpp +++ b/operators/divergence/P2ToP1ElementwiseDivergenceAnnulusMap_0_0.hpp @@ -29,6 +29,7 @@ #include "core/DataTypes.h" #include "hyteg/LikwidWrapper.hpp" +#include "hyteg/boundary/BoundaryConditions.hpp" #include "hyteg/communication/Syncing.hpp" #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp" #include "hyteg/geometry/AnnulusMap.hpp" @@ -37,6 +38,7 @@ #include "hyteg/p2functionspace/P2Function.hpp" #include "hyteg/primitivestorage/PrimitiveStorage.hpp" #include "hyteg/sparseassembly/SparseMatrixProxy.hpp" +#include "hyteg/types/types.hpp" #define FUNC_PREFIX @@ -77,57 +79,66 @@ class P2ToP1ElementwiseDivergenceAnnulusMap_0_0 : public Operator< P2Function< r protected: private: - /// Kernel type: apply + /// Integral: P2ToP1ElementwiseDivergenceAnnulusMap_0_0 + /// - volume element: triangle, dim: 2, vertices: 3, spacedim: 2 + /// - kernel type: apply + /// - loop strategy: SAWTOOTH /// - quadrature rule: Dunavant 2 | points: 3, degree: 2 + /// - blending map: AnnulusMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 220 318 17 12 3 0 0 1 - void apply_macro_2D( real_t* RESTRICT _data_dst, - real_t* RESTRICT _data_srcEdge, - real_t* RESTRICT _data_srcVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t refVertex_0, - real_t refVertex_1, - real_t thrVertex_0, - real_t thrVertex_1 ) const; - /// Kernel type: toMatrix + void apply_P2ToP1ElementwiseDivergenceAnnulusMap_0_0_macro_2D( real_t* RESTRICT _data_dst, + real_t* RESTRICT _data_srcEdge, + real_t* RESTRICT _data_srcVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t refVertex_0, + real_t refVertex_1, + real_t thrVertex_0, + real_t thrVertex_1 ) const; + + /// Integral: P2ToP1ElementwiseDivergenceAnnulusMap_0_0 + /// - volume element: triangle, dim: 2, vertices: 3, spacedim: 2 + /// - kernel type: toMatrix + /// - loop strategy: SAWTOOTH /// - quadrature rule: Dunavant 2 | points: 3, degree: 2 + /// - blending map: AnnulusMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 202 300 17 12 3 0 0 4 - void toMatrix_macro_2D( idx_t* RESTRICT _data_dst, - idx_t* RESTRICT _data_srcEdge, - idx_t* RESTRICT _data_srcVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - std::shared_ptr< SparseMatrixProxy > mat, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t refVertex_0, - real_t refVertex_1, - real_t thrVertex_0, - real_t thrVertex_1 ) const; + void toMatrix_P2ToP1ElementwiseDivergenceAnnulusMap_0_0_macro_2D( idx_t* RESTRICT _data_dst, + idx_t* RESTRICT _data_srcEdge, + idx_t* RESTRICT _data_srcVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + std::shared_ptr< SparseMatrixProxy > mat, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t refVertex_0, + real_t refVertex_1, + real_t thrVertex_0, + real_t thrVertex_1 ) const; }; } // namespace operatorgeneration diff --git a/operators/divergence/P2ToP1ElementwiseDivergenceAnnulusMap_0_1.cpp b/operators/divergence/P2ToP1ElementwiseDivergenceAnnulusMap_0_1.cpp index 682f2f25..f63530f1 100644 --- a/operators/divergence/P2ToP1ElementwiseDivergenceAnnulusMap_0_1.cpp +++ b/operators/divergence/P2ToP1ElementwiseDivergenceAnnulusMap_0_1.cpp @@ -133,7 +133,7 @@ void P2ToP1ElementwiseDivergenceAnnulusMap_0_1::apply( const P2Function< real_t this->timingTree_->start( "kernel" ); - apply_macro_2D( + apply_P2ToP1ElementwiseDivergenceAnnulusMap_0_1_macro_2D( _data_dst, _data_srcEdge, @@ -154,6 +154,7 @@ void P2ToP1ElementwiseDivergenceAnnulusMap_0_1::apply( const P2Function< real_t refVertex_1, thrVertex_0, thrVertex_1 ); + this->timingTree_->stop( "kernel" ); } @@ -228,7 +229,7 @@ void P2ToP1ElementwiseDivergenceAnnulusMap_0_1::toMatrix( const std::shared_ptr< this->timingTree_->start( "kernel" ); - toMatrix_macro_2D( + toMatrix_P2ToP1ElementwiseDivergenceAnnulusMap_0_1_macro_2D( _data_dst, _data_srcEdge, @@ -250,6 +251,7 @@ void P2ToP1ElementwiseDivergenceAnnulusMap_0_1::toMatrix( const std::shared_ptr< refVertex_1, thrVertex_0, thrVertex_1 ); + this->timingTree_->stop( "kernel" ); } } diff --git a/operators/divergence/P2ToP1ElementwiseDivergenceAnnulusMap_0_1.hpp b/operators/divergence/P2ToP1ElementwiseDivergenceAnnulusMap_0_1.hpp index da1d51b4..d9659d20 100644 --- a/operators/divergence/P2ToP1ElementwiseDivergenceAnnulusMap_0_1.hpp +++ b/operators/divergence/P2ToP1ElementwiseDivergenceAnnulusMap_0_1.hpp @@ -29,6 +29,7 @@ #include "core/DataTypes.h" #include "hyteg/LikwidWrapper.hpp" +#include "hyteg/boundary/BoundaryConditions.hpp" #include "hyteg/communication/Syncing.hpp" #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp" #include "hyteg/geometry/AnnulusMap.hpp" @@ -37,6 +38,7 @@ #include "hyteg/p2functionspace/P2Function.hpp" #include "hyteg/primitivestorage/PrimitiveStorage.hpp" #include "hyteg/sparseassembly/SparseMatrixProxy.hpp" +#include "hyteg/types/types.hpp" #define FUNC_PREFIX @@ -77,57 +79,66 @@ class P2ToP1ElementwiseDivergenceAnnulusMap_0_1 : public Operator< P2Function< r protected: private: - /// Kernel type: apply + /// Integral: P2ToP1ElementwiseDivergenceAnnulusMap_0_1 + /// - volume element: triangle, dim: 2, vertices: 3, spacedim: 2 + /// - kernel type: apply + /// - loop strategy: SAWTOOTH /// - quadrature rule: Dunavant 2 | points: 3, degree: 2 + /// - blending map: AnnulusMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 220 318 17 12 3 0 0 1 - void apply_macro_2D( real_t* RESTRICT _data_dst, - real_t* RESTRICT _data_srcEdge, - real_t* RESTRICT _data_srcVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t refVertex_0, - real_t refVertex_1, - real_t thrVertex_0, - real_t thrVertex_1 ) const; - /// Kernel type: toMatrix + void apply_P2ToP1ElementwiseDivergenceAnnulusMap_0_1_macro_2D( real_t* RESTRICT _data_dst, + real_t* RESTRICT _data_srcEdge, + real_t* RESTRICT _data_srcVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t refVertex_0, + real_t refVertex_1, + real_t thrVertex_0, + real_t thrVertex_1 ) const; + + /// Integral: P2ToP1ElementwiseDivergenceAnnulusMap_0_1 + /// - volume element: triangle, dim: 2, vertices: 3, spacedim: 2 + /// - kernel type: toMatrix + /// - loop strategy: SAWTOOTH /// - quadrature rule: Dunavant 2 | points: 3, degree: 2 + /// - blending map: AnnulusMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 202 300 17 12 3 0 0 4 - void toMatrix_macro_2D( idx_t* RESTRICT _data_dst, - idx_t* RESTRICT _data_srcEdge, - idx_t* RESTRICT _data_srcVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - std::shared_ptr< SparseMatrixProxy > mat, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t refVertex_0, - real_t refVertex_1, - real_t thrVertex_0, - real_t thrVertex_1 ) const; + void toMatrix_P2ToP1ElementwiseDivergenceAnnulusMap_0_1_macro_2D( idx_t* RESTRICT _data_dst, + idx_t* RESTRICT _data_srcEdge, + idx_t* RESTRICT _data_srcVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + std::shared_ptr< SparseMatrixProxy > mat, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t refVertex_0, + real_t refVertex_1, + real_t thrVertex_0, + real_t thrVertex_1 ) const; }; } // namespace operatorgeneration diff --git a/operators/divergence/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_0.cpp b/operators/divergence/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_0.cpp index 9b612ab4..6597cfad 100644 --- a/operators/divergence/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_0.cpp +++ b/operators/divergence/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_0.cpp @@ -145,7 +145,7 @@ void P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_0::apply( const P2Function this->timingTree_->start( "kernel" ); - apply_macro_3D( + apply_P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_0_macro_3D( _data_dst, _data_srcEdge, @@ -178,6 +178,7 @@ void P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_0::apply( const P2Function thrVertex_0, thrVertex_1, thrVertex_2 ); + this->timingTree_->stop( "kernel" ); } @@ -261,7 +262,7 @@ void P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_0::toMatrix( const std::sh this->timingTree_->start( "kernel" ); - toMatrix_macro_3D( + toMatrix_P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_0_macro_3D( _data_dst, _data_srcEdge, @@ -295,6 +296,7 @@ void P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_0::toMatrix( const std::sh thrVertex_0, thrVertex_1, thrVertex_2 ); + this->timingTree_->stop( "kernel" ); } } diff --git a/operators/divergence/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_0.hpp b/operators/divergence/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_0.hpp index 271f6e6e..58026a0d 100644 --- a/operators/divergence/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_0.hpp +++ b/operators/divergence/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_0.hpp @@ -29,6 +29,7 @@ #include "core/DataTypes.h" #include "hyteg/LikwidWrapper.hpp" +#include "hyteg/boundary/BoundaryConditions.hpp" #include "hyteg/communication/Syncing.hpp" #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp" #include "hyteg/geometry/IcosahedralShellMap.hpp" @@ -37,6 +38,7 @@ #include "hyteg/p2functionspace/P2Function.hpp" #include "hyteg/primitivestorage/PrimitiveStorage.hpp" #include "hyteg/sparseassembly/SparseMatrixProxy.hpp" +#include "hyteg/types/types.hpp" #define FUNC_PREFIX @@ -77,81 +79,90 @@ class P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_0 : public Operator< P2Fu protected: private: - /// Kernel type: apply + /// Integral: P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_0 + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: apply + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 1 | points: 4, degree: 2 + /// - blending map: IcosahedralShellMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 540 755 46 4 4 0 0 1 - void apply_macro_3D( real_t* RESTRICT _data_dst, - real_t* RESTRICT _data_srcEdge, - real_t* RESTRICT _data_srcVertex, - real_t forVertex_0, - real_t forVertex_1, - real_t forVertex_2, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t rayVertex_2, - real_t refVertex_0, - real_t refVertex_1, - real_t refVertex_2, - real_t thrVertex_0, - real_t thrVertex_1, - real_t thrVertex_2 ) const; - /// Kernel type: toMatrix + void apply_P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_0_macro_3D( real_t* RESTRICT _data_dst, + real_t* RESTRICT _data_srcEdge, + real_t* RESTRICT _data_srcVertex, + real_t forVertex_0, + real_t forVertex_1, + real_t forVertex_2, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t rayVertex_2, + real_t refVertex_0, + real_t refVertex_1, + real_t refVertex_2, + real_t thrVertex_0, + real_t thrVertex_1, + real_t thrVertex_2 ) const; + + /// Integral: P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_0 + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: toMatrix + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 1 | points: 4, degree: 2 + /// - blending map: IcosahedralShellMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 500 715 46 4 4 0 0 4 - void toMatrix_macro_3D( idx_t* RESTRICT _data_dst, - idx_t* RESTRICT _data_srcEdge, - idx_t* RESTRICT _data_srcVertex, - real_t forVertex_0, - real_t forVertex_1, - real_t forVertex_2, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - std::shared_ptr< SparseMatrixProxy > mat, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t rayVertex_2, - real_t refVertex_0, - real_t refVertex_1, - real_t refVertex_2, - real_t thrVertex_0, - real_t thrVertex_1, - real_t thrVertex_2 ) const; + void toMatrix_P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_0_macro_3D( idx_t* RESTRICT _data_dst, + idx_t* RESTRICT _data_srcEdge, + idx_t* RESTRICT _data_srcVertex, + real_t forVertex_0, + real_t forVertex_1, + real_t forVertex_2, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + std::shared_ptr< SparseMatrixProxy > mat, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t rayVertex_2, + real_t refVertex_0, + real_t refVertex_1, + real_t refVertex_2, + real_t thrVertex_0, + real_t thrVertex_1, + real_t thrVertex_2 ) const; }; } // namespace operatorgeneration diff --git a/operators/divergence/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_1.cpp b/operators/divergence/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_1.cpp index 9759b2b7..b7a33b38 100644 --- a/operators/divergence/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_1.cpp +++ b/operators/divergence/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_1.cpp @@ -145,7 +145,7 @@ void P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_1::apply( const P2Function this->timingTree_->start( "kernel" ); - apply_macro_3D( + apply_P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_1_macro_3D( _data_dst, _data_srcEdge, @@ -178,6 +178,7 @@ void P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_1::apply( const P2Function thrVertex_0, thrVertex_1, thrVertex_2 ); + this->timingTree_->stop( "kernel" ); } @@ -261,7 +262,7 @@ void P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_1::toMatrix( const std::sh this->timingTree_->start( "kernel" ); - toMatrix_macro_3D( + toMatrix_P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_1_macro_3D( _data_dst, _data_srcEdge, @@ -295,6 +296,7 @@ void P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_1::toMatrix( const std::sh thrVertex_0, thrVertex_1, thrVertex_2 ); + this->timingTree_->stop( "kernel" ); } } diff --git a/operators/divergence/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_1.hpp b/operators/divergence/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_1.hpp index c4c588ca..b04b0891 100644 --- a/operators/divergence/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_1.hpp +++ b/operators/divergence/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_1.hpp @@ -29,6 +29,7 @@ #include "core/DataTypes.h" #include "hyteg/LikwidWrapper.hpp" +#include "hyteg/boundary/BoundaryConditions.hpp" #include "hyteg/communication/Syncing.hpp" #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp" #include "hyteg/geometry/IcosahedralShellMap.hpp" @@ -37,6 +38,7 @@ #include "hyteg/p2functionspace/P2Function.hpp" #include "hyteg/primitivestorage/PrimitiveStorage.hpp" #include "hyteg/sparseassembly/SparseMatrixProxy.hpp" +#include "hyteg/types/types.hpp" #define FUNC_PREFIX @@ -77,81 +79,90 @@ class P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_1 : public Operator< P2Fu protected: private: - /// Kernel type: apply + /// Integral: P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_1 + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: apply + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 1 | points: 4, degree: 2 + /// - blending map: IcosahedralShellMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 540 755 46 4 4 0 0 1 - void apply_macro_3D( real_t* RESTRICT _data_dst, - real_t* RESTRICT _data_srcEdge, - real_t* RESTRICT _data_srcVertex, - real_t forVertex_0, - real_t forVertex_1, - real_t forVertex_2, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t rayVertex_2, - real_t refVertex_0, - real_t refVertex_1, - real_t refVertex_2, - real_t thrVertex_0, - real_t thrVertex_1, - real_t thrVertex_2 ) const; - /// Kernel type: toMatrix + void apply_P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_1_macro_3D( real_t* RESTRICT _data_dst, + real_t* RESTRICT _data_srcEdge, + real_t* RESTRICT _data_srcVertex, + real_t forVertex_0, + real_t forVertex_1, + real_t forVertex_2, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t rayVertex_2, + real_t refVertex_0, + real_t refVertex_1, + real_t refVertex_2, + real_t thrVertex_0, + real_t thrVertex_1, + real_t thrVertex_2 ) const; + + /// Integral: P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_1 + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: toMatrix + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 1 | points: 4, degree: 2 + /// - blending map: IcosahedralShellMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 500 715 46 4 4 0 0 4 - void toMatrix_macro_3D( idx_t* RESTRICT _data_dst, - idx_t* RESTRICT _data_srcEdge, - idx_t* RESTRICT _data_srcVertex, - real_t forVertex_0, - real_t forVertex_1, - real_t forVertex_2, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - std::shared_ptr< SparseMatrixProxy > mat, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t rayVertex_2, - real_t refVertex_0, - real_t refVertex_1, - real_t refVertex_2, - real_t thrVertex_0, - real_t thrVertex_1, - real_t thrVertex_2 ) const; + void toMatrix_P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_1_macro_3D( idx_t* RESTRICT _data_dst, + idx_t* RESTRICT _data_srcEdge, + idx_t* RESTRICT _data_srcVertex, + real_t forVertex_0, + real_t forVertex_1, + real_t forVertex_2, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + std::shared_ptr< SparseMatrixProxy > mat, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t rayVertex_2, + real_t refVertex_0, + real_t refVertex_1, + real_t refVertex_2, + real_t thrVertex_0, + real_t thrVertex_1, + real_t thrVertex_2 ) const; }; } // namespace operatorgeneration diff --git a/operators/divergence/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_2.cpp b/operators/divergence/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_2.cpp index 7099d22e..f92a94c7 100644 --- a/operators/divergence/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_2.cpp +++ b/operators/divergence/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_2.cpp @@ -145,7 +145,7 @@ void P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_2::apply( const P2Function this->timingTree_->start( "kernel" ); - apply_macro_3D( + apply_P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_2_macro_3D( _data_dst, _data_srcEdge, @@ -178,6 +178,7 @@ void P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_2::apply( const P2Function thrVertex_0, thrVertex_1, thrVertex_2 ); + this->timingTree_->stop( "kernel" ); } @@ -261,7 +262,7 @@ void P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_2::toMatrix( const std::sh this->timingTree_->start( "kernel" ); - toMatrix_macro_3D( + toMatrix_P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_2_macro_3D( _data_dst, _data_srcEdge, @@ -295,6 +296,7 @@ void P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_2::toMatrix( const std::sh thrVertex_0, thrVertex_1, thrVertex_2 ); + this->timingTree_->stop( "kernel" ); } } diff --git a/operators/divergence/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_2.hpp b/operators/divergence/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_2.hpp index abb88eb5..d0df6fe8 100644 --- a/operators/divergence/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_2.hpp +++ b/operators/divergence/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_2.hpp @@ -29,6 +29,7 @@ #include "core/DataTypes.h" #include "hyteg/LikwidWrapper.hpp" +#include "hyteg/boundary/BoundaryConditions.hpp" #include "hyteg/communication/Syncing.hpp" #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp" #include "hyteg/geometry/IcosahedralShellMap.hpp" @@ -37,6 +38,7 @@ #include "hyteg/p2functionspace/P2Function.hpp" #include "hyteg/primitivestorage/PrimitiveStorage.hpp" #include "hyteg/sparseassembly/SparseMatrixProxy.hpp" +#include "hyteg/types/types.hpp" #define FUNC_PREFIX @@ -77,81 +79,90 @@ class P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_2 : public Operator< P2Fu protected: private: - /// Kernel type: apply + /// Integral: P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_2 + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: apply + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 1 | points: 4, degree: 2 + /// - blending map: IcosahedralShellMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 540 755 46 4 4 0 0 1 - void apply_macro_3D( real_t* RESTRICT _data_dst, - real_t* RESTRICT _data_srcEdge, - real_t* RESTRICT _data_srcVertex, - real_t forVertex_0, - real_t forVertex_1, - real_t forVertex_2, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t rayVertex_2, - real_t refVertex_0, - real_t refVertex_1, - real_t refVertex_2, - real_t thrVertex_0, - real_t thrVertex_1, - real_t thrVertex_2 ) const; - /// Kernel type: toMatrix + void apply_P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_2_macro_3D( real_t* RESTRICT _data_dst, + real_t* RESTRICT _data_srcEdge, + real_t* RESTRICT _data_srcVertex, + real_t forVertex_0, + real_t forVertex_1, + real_t forVertex_2, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t rayVertex_2, + real_t refVertex_0, + real_t refVertex_1, + real_t refVertex_2, + real_t thrVertex_0, + real_t thrVertex_1, + real_t thrVertex_2 ) const; + + /// Integral: P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_2 + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: toMatrix + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 1 | points: 4, degree: 2 + /// - blending map: IcosahedralShellMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 500 715 46 4 4 0 0 4 - void toMatrix_macro_3D( idx_t* RESTRICT _data_dst, - idx_t* RESTRICT _data_srcEdge, - idx_t* RESTRICT _data_srcVertex, - real_t forVertex_0, - real_t forVertex_1, - real_t forVertex_2, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - std::shared_ptr< SparseMatrixProxy > mat, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t rayVertex_2, - real_t refVertex_0, - real_t refVertex_1, - real_t refVertex_2, - real_t thrVertex_0, - real_t thrVertex_1, - real_t thrVertex_2 ) const; + void toMatrix_P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_2_macro_3D( idx_t* RESTRICT _data_dst, + idx_t* RESTRICT _data_srcEdge, + idx_t* RESTRICT _data_srcVertex, + real_t forVertex_0, + real_t forVertex_1, + real_t forVertex_2, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + std::shared_ptr< SparseMatrixProxy > mat, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t rayVertex_2, + real_t refVertex_0, + real_t refVertex_1, + real_t refVertex_2, + real_t thrVertex_0, + real_t thrVertex_1, + real_t thrVertex_2 ) const; }; } // namespace operatorgeneration diff --git a/operators/divergence/P2ToP1ElementwiseDivergence_0_0.cpp b/operators/divergence/P2ToP1ElementwiseDivergence_0_0.cpp index 4b0f4130..e03a5a1a 100644 --- a/operators/divergence/P2ToP1ElementwiseDivergence_0_0.cpp +++ b/operators/divergence/P2ToP1ElementwiseDivergence_0_0.cpp @@ -127,7 +127,7 @@ void P2ToP1ElementwiseDivergence_0_0::apply( const P2Function< real_t >& src, this->timingTree_->start( "kernel" ); - apply_macro_3D( + apply_P2ToP1ElementwiseDivergence_0_0_macro_3D( _data_dst, _data_srcEdge, @@ -146,6 +146,7 @@ void P2ToP1ElementwiseDivergence_0_0::apply( const P2Function< real_t >& src, macro_vertex_coord_id_3comp2, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } @@ -194,7 +195,7 @@ void P2ToP1ElementwiseDivergence_0_0::apply( const P2Function< real_t >& src, this->timingTree_->start( "kernel" ); - apply_macro_2D( + apply_P2ToP1ElementwiseDivergence_0_0_macro_2D( _data_dst, _data_srcEdge, @@ -207,6 +208,7 @@ void P2ToP1ElementwiseDivergence_0_0::apply( const P2Function< real_t >& src, macro_vertex_coord_id_2comp1, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } @@ -268,7 +270,7 @@ void P2ToP1ElementwiseDivergence_0_0::toMatrix( const std::shared_ptr< SparseMat this->timingTree_->start( "kernel" ); - toMatrix_macro_3D( + toMatrix_P2ToP1ElementwiseDivergence_0_0_macro_3D( _data_dst, _data_srcEdge, @@ -288,6 +290,7 @@ void P2ToP1ElementwiseDivergence_0_0::toMatrix( const std::shared_ptr< SparseMat mat, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } } @@ -317,7 +320,7 @@ void P2ToP1ElementwiseDivergence_0_0::toMatrix( const std::shared_ptr< SparseMat this->timingTree_->start( "kernel" ); - toMatrix_macro_2D( + toMatrix_P2ToP1ElementwiseDivergence_0_0_macro_2D( _data_dst, _data_srcEdge, @@ -331,6 +334,7 @@ void P2ToP1ElementwiseDivergence_0_0::toMatrix( const std::shared_ptr< SparseMat mat, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } } diff --git a/operators/divergence/P2ToP1ElementwiseDivergence_0_0.hpp b/operators/divergence/P2ToP1ElementwiseDivergence_0_0.hpp index 653ee1b0..8657dc17 100644 --- a/operators/divergence/P2ToP1ElementwiseDivergence_0_0.hpp +++ b/operators/divergence/P2ToP1ElementwiseDivergence_0_0.hpp @@ -29,6 +29,7 @@ #include "core/DataTypes.h" #include "hyteg/LikwidWrapper.hpp" +#include "hyteg/boundary/BoundaryConditions.hpp" #include "hyteg/communication/Syncing.hpp" #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp" #include "hyteg/operators/Operator.hpp" @@ -36,6 +37,7 @@ #include "hyteg/p2functionspace/P2Function.hpp" #include "hyteg/primitivestorage/PrimitiveStorage.hpp" #include "hyteg/sparseassembly/SparseMatrixProxy.hpp" +#include "hyteg/types/types.hpp" #define FUNC_PREFIX @@ -74,88 +76,107 @@ class P2ToP1ElementwiseDivergence_0_0 : public Operator< P2Function< real_t >, P protected: private: - /// Kernel type: apply + /// Integral: P2ToP1ElementwiseDivergence_0_0 + /// - volume element: triangle, dim: 2, vertices: 3, spacedim: 2 + /// - kernel type: apply + /// - loop strategy: CUBES /// - quadrature rule: Dunavant 2 | points: 3, degree: 2 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 116 132 12 0 0 0 0 1 - void apply_macro_2D( real_t* RESTRICT _data_dst, - real_t* RESTRICT _data_srcEdge, - real_t* RESTRICT _data_srcVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; - /// Kernel type: apply + void apply_P2ToP1ElementwiseDivergence_0_0_macro_2D( real_t* RESTRICT _data_dst, + real_t* RESTRICT _data_srcEdge, + real_t* RESTRICT _data_srcVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; + + /// Integral: P2ToP1ElementwiseDivergence_0_0 + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: apply + /// - loop strategy: CUBES /// - quadrature rule: Hammer-Marlowe-Stroud 1 | points: 4, degree: 2 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 345 352 36 0 0 0 0 1 - void apply_macro_3D( real_t* RESTRICT _data_dst, - real_t* RESTRICT _data_srcEdge, - real_t* RESTRICT _data_srcVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; - /// Kernel type: toMatrix + void apply_P2ToP1ElementwiseDivergence_0_0_macro_3D( real_t* RESTRICT _data_dst, + real_t* RESTRICT _data_srcEdge, + real_t* RESTRICT _data_srcVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; + + /// Integral: P2ToP1ElementwiseDivergence_0_0 + /// - volume element: triangle, dim: 2, vertices: 3, spacedim: 2 + /// - kernel type: toMatrix + /// - loop strategy: CUBES /// - quadrature rule: Dunavant 2 | points: 3, degree: 2 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 98 114 12 0 0 0 0 4 - void toMatrix_macro_2D( idx_t* RESTRICT _data_dst, - idx_t* RESTRICT _data_srcEdge, - idx_t* RESTRICT _data_srcVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - std::shared_ptr< SparseMatrixProxy > mat, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; - /// Kernel type: toMatrix + void toMatrix_P2ToP1ElementwiseDivergence_0_0_macro_2D( idx_t* RESTRICT _data_dst, + idx_t* RESTRICT _data_srcEdge, + idx_t* RESTRICT _data_srcVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + std::shared_ptr< SparseMatrixProxy > mat, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; + + /// Integral: P2ToP1ElementwiseDivergence_0_0 + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: toMatrix + /// - loop strategy: CUBES /// - quadrature rule: Hammer-Marlowe-Stroud 1 | points: 4, degree: 2 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 305 312 36 0 0 0 0 4 - void toMatrix_macro_3D( idx_t* RESTRICT _data_dst, - idx_t* RESTRICT _data_srcEdge, - idx_t* RESTRICT _data_srcVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - std::shared_ptr< SparseMatrixProxy > mat, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; + void toMatrix_P2ToP1ElementwiseDivergence_0_0_macro_3D( idx_t* RESTRICT _data_dst, + idx_t* RESTRICT _data_srcEdge, + idx_t* RESTRICT _data_srcVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + std::shared_ptr< SparseMatrixProxy > mat, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; }; } // namespace operatorgeneration diff --git a/operators/divergence/P2ToP1ElementwiseDivergence_0_1.cpp b/operators/divergence/P2ToP1ElementwiseDivergence_0_1.cpp index e0bc14a7..25f0678d 100644 --- a/operators/divergence/P2ToP1ElementwiseDivergence_0_1.cpp +++ b/operators/divergence/P2ToP1ElementwiseDivergence_0_1.cpp @@ -127,7 +127,7 @@ void P2ToP1ElementwiseDivergence_0_1::apply( const P2Function< real_t >& src, this->timingTree_->start( "kernel" ); - apply_macro_3D( + apply_P2ToP1ElementwiseDivergence_0_1_macro_3D( _data_dst, _data_srcEdge, @@ -146,6 +146,7 @@ void P2ToP1ElementwiseDivergence_0_1::apply( const P2Function< real_t >& src, macro_vertex_coord_id_3comp2, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } @@ -194,7 +195,7 @@ void P2ToP1ElementwiseDivergence_0_1::apply( const P2Function< real_t >& src, this->timingTree_->start( "kernel" ); - apply_macro_2D( + apply_P2ToP1ElementwiseDivergence_0_1_macro_2D( _data_dst, _data_srcEdge, @@ -207,6 +208,7 @@ void P2ToP1ElementwiseDivergence_0_1::apply( const P2Function< real_t >& src, macro_vertex_coord_id_2comp1, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } @@ -268,7 +270,7 @@ void P2ToP1ElementwiseDivergence_0_1::toMatrix( const std::shared_ptr< SparseMat this->timingTree_->start( "kernel" ); - toMatrix_macro_3D( + toMatrix_P2ToP1ElementwiseDivergence_0_1_macro_3D( _data_dst, _data_srcEdge, @@ -288,6 +290,7 @@ void P2ToP1ElementwiseDivergence_0_1::toMatrix( const std::shared_ptr< SparseMat mat, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } } @@ -317,7 +320,7 @@ void P2ToP1ElementwiseDivergence_0_1::toMatrix( const std::shared_ptr< SparseMat this->timingTree_->start( "kernel" ); - toMatrix_macro_2D( + toMatrix_P2ToP1ElementwiseDivergence_0_1_macro_2D( _data_dst, _data_srcEdge, @@ -331,6 +334,7 @@ void P2ToP1ElementwiseDivergence_0_1::toMatrix( const std::shared_ptr< SparseMat mat, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } } diff --git a/operators/divergence/P2ToP1ElementwiseDivergence_0_1.hpp b/operators/divergence/P2ToP1ElementwiseDivergence_0_1.hpp index 58716343..389d7638 100644 --- a/operators/divergence/P2ToP1ElementwiseDivergence_0_1.hpp +++ b/operators/divergence/P2ToP1ElementwiseDivergence_0_1.hpp @@ -29,6 +29,7 @@ #include "core/DataTypes.h" #include "hyteg/LikwidWrapper.hpp" +#include "hyteg/boundary/BoundaryConditions.hpp" #include "hyteg/communication/Syncing.hpp" #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp" #include "hyteg/operators/Operator.hpp" @@ -36,6 +37,7 @@ #include "hyteg/p2functionspace/P2Function.hpp" #include "hyteg/primitivestorage/PrimitiveStorage.hpp" #include "hyteg/sparseassembly/SparseMatrixProxy.hpp" +#include "hyteg/types/types.hpp" #define FUNC_PREFIX @@ -74,88 +76,107 @@ class P2ToP1ElementwiseDivergence_0_1 : public Operator< P2Function< real_t >, P protected: private: - /// Kernel type: apply + /// Integral: P2ToP1ElementwiseDivergence_0_1 + /// - volume element: triangle, dim: 2, vertices: 3, spacedim: 2 + /// - kernel type: apply + /// - loop strategy: CUBES /// - quadrature rule: Dunavant 2 | points: 3, degree: 2 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 116 132 12 0 0 0 0 1 - void apply_macro_2D( real_t* RESTRICT _data_dst, - real_t* RESTRICT _data_srcEdge, - real_t* RESTRICT _data_srcVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; - /// Kernel type: apply + void apply_P2ToP1ElementwiseDivergence_0_1_macro_2D( real_t* RESTRICT _data_dst, + real_t* RESTRICT _data_srcEdge, + real_t* RESTRICT _data_srcVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; + + /// Integral: P2ToP1ElementwiseDivergence_0_1 + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: apply + /// - loop strategy: CUBES /// - quadrature rule: Hammer-Marlowe-Stroud 1 | points: 4, degree: 2 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 345 352 36 0 0 0 0 1 - void apply_macro_3D( real_t* RESTRICT _data_dst, - real_t* RESTRICT _data_srcEdge, - real_t* RESTRICT _data_srcVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; - /// Kernel type: toMatrix + void apply_P2ToP1ElementwiseDivergence_0_1_macro_3D( real_t* RESTRICT _data_dst, + real_t* RESTRICT _data_srcEdge, + real_t* RESTRICT _data_srcVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; + + /// Integral: P2ToP1ElementwiseDivergence_0_1 + /// - volume element: triangle, dim: 2, vertices: 3, spacedim: 2 + /// - kernel type: toMatrix + /// - loop strategy: CUBES /// - quadrature rule: Dunavant 2 | points: 3, degree: 2 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 98 114 12 0 0 0 0 4 - void toMatrix_macro_2D( idx_t* RESTRICT _data_dst, - idx_t* RESTRICT _data_srcEdge, - idx_t* RESTRICT _data_srcVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - std::shared_ptr< SparseMatrixProxy > mat, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; - /// Kernel type: toMatrix + void toMatrix_P2ToP1ElementwiseDivergence_0_1_macro_2D( idx_t* RESTRICT _data_dst, + idx_t* RESTRICT _data_srcEdge, + idx_t* RESTRICT _data_srcVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + std::shared_ptr< SparseMatrixProxy > mat, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; + + /// Integral: P2ToP1ElementwiseDivergence_0_1 + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: toMatrix + /// - loop strategy: CUBES /// - quadrature rule: Hammer-Marlowe-Stroud 1 | points: 4, degree: 2 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 305 312 36 0 0 0 0 4 - void toMatrix_macro_3D( idx_t* RESTRICT _data_dst, - idx_t* RESTRICT _data_srcEdge, - idx_t* RESTRICT _data_srcVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - std::shared_ptr< SparseMatrixProxy > mat, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; + void toMatrix_P2ToP1ElementwiseDivergence_0_1_macro_3D( idx_t* RESTRICT _data_dst, + idx_t* RESTRICT _data_srcEdge, + idx_t* RESTRICT _data_srcVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + std::shared_ptr< SparseMatrixProxy > mat, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; }; } // namespace operatorgeneration diff --git a/operators/divergence/P2ToP1ElementwiseDivergence_0_2.cpp b/operators/divergence/P2ToP1ElementwiseDivergence_0_2.cpp index 920dcae6..5a88c930 100644 --- a/operators/divergence/P2ToP1ElementwiseDivergence_0_2.cpp +++ b/operators/divergence/P2ToP1ElementwiseDivergence_0_2.cpp @@ -127,7 +127,7 @@ void P2ToP1ElementwiseDivergence_0_2::apply( const P2Function< real_t >& src, this->timingTree_->start( "kernel" ); - apply_macro_3D( + apply_P2ToP1ElementwiseDivergence_0_2_macro_3D( _data_dst, _data_srcEdge, @@ -146,6 +146,7 @@ void P2ToP1ElementwiseDivergence_0_2::apply( const P2Function< real_t >& src, macro_vertex_coord_id_3comp2, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } @@ -212,7 +213,7 @@ void P2ToP1ElementwiseDivergence_0_2::toMatrix( const std::shared_ptr< SparseMat this->timingTree_->start( "kernel" ); - toMatrix_macro_3D( + toMatrix_P2ToP1ElementwiseDivergence_0_2_macro_3D( _data_dst, _data_srcEdge, @@ -232,6 +233,7 @@ void P2ToP1ElementwiseDivergence_0_2::toMatrix( const std::shared_ptr< SparseMat mat, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } } diff --git a/operators/divergence/P2ToP1ElementwiseDivergence_0_2.hpp b/operators/divergence/P2ToP1ElementwiseDivergence_0_2.hpp index ebec476b..01a9a941 100644 --- a/operators/divergence/P2ToP1ElementwiseDivergence_0_2.hpp +++ b/operators/divergence/P2ToP1ElementwiseDivergence_0_2.hpp @@ -29,6 +29,7 @@ #include "core/DataTypes.h" #include "hyteg/LikwidWrapper.hpp" +#include "hyteg/boundary/BoundaryConditions.hpp" #include "hyteg/communication/Syncing.hpp" #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp" #include "hyteg/operators/Operator.hpp" @@ -36,6 +37,7 @@ #include "hyteg/p2functionspace/P2Function.hpp" #include "hyteg/primitivestorage/PrimitiveStorage.hpp" #include "hyteg/sparseassembly/SparseMatrixProxy.hpp" +#include "hyteg/types/types.hpp" #define FUNC_PREFIX @@ -74,53 +76,62 @@ class P2ToP1ElementwiseDivergence_0_2 : public Operator< P2Function< real_t >, P protected: private: - /// Kernel type: apply + /// Integral: P2ToP1ElementwiseDivergence_0_2 + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: apply + /// - loop strategy: CUBES /// - quadrature rule: Hammer-Marlowe-Stroud 1 | points: 4, degree: 2 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 345 352 36 0 0 0 0 1 - void apply_macro_3D( real_t* RESTRICT _data_dst, - real_t* RESTRICT _data_srcEdge, - real_t* RESTRICT _data_srcVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; - /// Kernel type: toMatrix + void apply_P2ToP1ElementwiseDivergence_0_2_macro_3D( real_t* RESTRICT _data_dst, + real_t* RESTRICT _data_srcEdge, + real_t* RESTRICT _data_srcVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; + + /// Integral: P2ToP1ElementwiseDivergence_0_2 + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: toMatrix + /// - loop strategy: CUBES /// - quadrature rule: Hammer-Marlowe-Stroud 1 | points: 4, degree: 2 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 305 312 36 0 0 0 0 4 - void toMatrix_macro_3D( idx_t* RESTRICT _data_dst, - idx_t* RESTRICT _data_srcEdge, - idx_t* RESTRICT _data_srcVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - std::shared_ptr< SparseMatrixProxy > mat, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; + void toMatrix_P2ToP1ElementwiseDivergence_0_2_macro_3D( idx_t* RESTRICT _data_dst, + idx_t* RESTRICT _data_srcEdge, + idx_t* RESTRICT _data_srcVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + std::shared_ptr< SparseMatrixProxy > mat, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; }; } // namespace operatorgeneration diff --git a/operators/divergence/avx/P2ToP1ElementwiseDivergenceAnnulusMap_0_0_apply_macro_2D.cpp b/operators/divergence/avx/P2ToP1ElementwiseDivergenceAnnulusMap_0_0_apply_P2ToP1ElementwiseDivergenceAnnulusMap_0_0_macro_2D.cpp similarity index 99% rename from operators/divergence/avx/P2ToP1ElementwiseDivergenceAnnulusMap_0_0_apply_macro_2D.cpp rename to operators/divergence/avx/P2ToP1ElementwiseDivergenceAnnulusMap_0_0_apply_P2ToP1ElementwiseDivergenceAnnulusMap_0_0_macro_2D.cpp index bd473d03..47c44040 100644 --- a/operators/divergence/avx/P2ToP1ElementwiseDivergenceAnnulusMap_0_0_apply_macro_2D.cpp +++ b/operators/divergence/avx/P2ToP1ElementwiseDivergenceAnnulusMap_0_0_apply_P2ToP1ElementwiseDivergenceAnnulusMap_0_0_macro_2D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ToP1ElementwiseDivergenceAnnulusMap_0_0::apply_macro_2D( real_t * RESTRICT _data_dst, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const +void P2ToP1ElementwiseDivergenceAnnulusMap_0_0::apply_P2ToP1ElementwiseDivergenceAnnulusMap_0_0_macro_2D( real_t * RESTRICT _data_dst, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const { { const real_t tmp_coords_jac_0_GRAY = 1.0 / (micro_edges_per_macro_edge_float)*1.0; diff --git a/operators/divergence/avx/P2ToP1ElementwiseDivergenceAnnulusMap_0_1_apply_macro_2D.cpp b/operators/divergence/avx/P2ToP1ElementwiseDivergenceAnnulusMap_0_1_apply_P2ToP1ElementwiseDivergenceAnnulusMap_0_1_macro_2D.cpp similarity index 99% rename from operators/divergence/avx/P2ToP1ElementwiseDivergenceAnnulusMap_0_1_apply_macro_2D.cpp rename to operators/divergence/avx/P2ToP1ElementwiseDivergenceAnnulusMap_0_1_apply_P2ToP1ElementwiseDivergenceAnnulusMap_0_1_macro_2D.cpp index 2ca6bcae..157bb143 100644 --- a/operators/divergence/avx/P2ToP1ElementwiseDivergenceAnnulusMap_0_1_apply_macro_2D.cpp +++ b/operators/divergence/avx/P2ToP1ElementwiseDivergenceAnnulusMap_0_1_apply_P2ToP1ElementwiseDivergenceAnnulusMap_0_1_macro_2D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ToP1ElementwiseDivergenceAnnulusMap_0_1::apply_macro_2D( real_t * RESTRICT _data_dst, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const +void P2ToP1ElementwiseDivergenceAnnulusMap_0_1::apply_P2ToP1ElementwiseDivergenceAnnulusMap_0_1_macro_2D( real_t * RESTRICT _data_dst, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const { { const real_t tmp_coords_jac_0_GRAY = 1.0 / (micro_edges_per_macro_edge_float)*1.0; diff --git a/operators/divergence/avx/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_0_apply_macro_3D.cpp b/operators/divergence/avx/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_0_apply_P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_0_macro_3D.cpp similarity index 99% rename from operators/divergence/avx/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_0_apply_macro_3D.cpp rename to operators/divergence/avx/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_0_apply_P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_0_macro_3D.cpp index 96df5b0a..4324487f 100644 --- a/operators/divergence/avx/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_0_apply_macro_3D.cpp +++ b/operators/divergence/avx/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_0_apply_P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_0_macro_3D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_0::apply_macro_3D( real_t * RESTRICT _data_dst, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const +void P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_0::apply_P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_0_macro_3D( real_t * RESTRICT _data_dst, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const { { const real_t tmp_coords_jac_0_WHITE_UP = 1.0 / (micro_edges_per_macro_edge_float)*1.0; diff --git a/operators/divergence/avx/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_1_apply_macro_3D.cpp b/operators/divergence/avx/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_1_apply_P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_1_macro_3D.cpp similarity index 99% rename from operators/divergence/avx/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_1_apply_macro_3D.cpp rename to operators/divergence/avx/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_1_apply_P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_1_macro_3D.cpp index ea48849b..fff3e8c5 100644 --- a/operators/divergence/avx/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_1_apply_macro_3D.cpp +++ b/operators/divergence/avx/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_1_apply_P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_1_macro_3D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_1::apply_macro_3D( real_t * RESTRICT _data_dst, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const +void P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_1::apply_P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_1_macro_3D( real_t * RESTRICT _data_dst, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const { { const real_t tmp_coords_jac_0_WHITE_UP = 1.0 / (micro_edges_per_macro_edge_float)*1.0; diff --git a/operators/divergence/avx/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_2_apply_macro_3D.cpp b/operators/divergence/avx/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_2_apply_P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_2_macro_3D.cpp similarity index 99% rename from operators/divergence/avx/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_2_apply_macro_3D.cpp rename to operators/divergence/avx/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_2_apply_P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_2_macro_3D.cpp index 334eca5c..60540552 100644 --- a/operators/divergence/avx/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_2_apply_macro_3D.cpp +++ b/operators/divergence/avx/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_2_apply_P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_2_macro_3D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_2::apply_macro_3D( real_t * RESTRICT _data_dst, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const +void P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_2::apply_P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_2_macro_3D( real_t * RESTRICT _data_dst, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const { { const real_t tmp_coords_jac_0_WHITE_UP = 1.0 / (micro_edges_per_macro_edge_float)*1.0; diff --git a/operators/divergence/avx/P2ToP1ElementwiseDivergence_0_0_apply_macro_2D.cpp b/operators/divergence/avx/P2ToP1ElementwiseDivergence_0_0_apply_P2ToP1ElementwiseDivergence_0_0_macro_2D.cpp similarity index 99% rename from operators/divergence/avx/P2ToP1ElementwiseDivergence_0_0_apply_macro_2D.cpp rename to operators/divergence/avx/P2ToP1ElementwiseDivergence_0_0_apply_P2ToP1ElementwiseDivergence_0_0_macro_2D.cpp index 9b6ca0f6..f0ee48b0 100644 --- a/operators/divergence/avx/P2ToP1ElementwiseDivergence_0_0_apply_macro_2D.cpp +++ b/operators/divergence/avx/P2ToP1ElementwiseDivergence_0_0_apply_P2ToP1ElementwiseDivergence_0_0_macro_2D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ToP1ElementwiseDivergence_0_0::apply_macro_2D( real_t * RESTRICT _data_dst, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ToP1ElementwiseDivergence_0_0::apply_P2ToP1ElementwiseDivergence_0_0_macro_2D( real_t * RESTRICT _data_dst, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t tmp_coords_jac_0_BLUE = 1.0 / (micro_edges_per_macro_edge_float)*1.0; diff --git a/operators/divergence/avx/P2ToP1ElementwiseDivergence_0_0_apply_macro_3D.cpp b/operators/divergence/avx/P2ToP1ElementwiseDivergence_0_0_apply_P2ToP1ElementwiseDivergence_0_0_macro_3D.cpp similarity index 99% rename from operators/divergence/avx/P2ToP1ElementwiseDivergence_0_0_apply_macro_3D.cpp rename to operators/divergence/avx/P2ToP1ElementwiseDivergence_0_0_apply_P2ToP1ElementwiseDivergence_0_0_macro_3D.cpp index 94e211f5..7b2391e3 100644 --- a/operators/divergence/avx/P2ToP1ElementwiseDivergence_0_0_apply_macro_3D.cpp +++ b/operators/divergence/avx/P2ToP1ElementwiseDivergence_0_0_apply_P2ToP1ElementwiseDivergence_0_0_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ToP1ElementwiseDivergence_0_0::apply_macro_3D( real_t * RESTRICT _data_dst, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ToP1ElementwiseDivergence_0_0::apply_P2ToP1ElementwiseDivergence_0_0_macro_3D( real_t * RESTRICT _data_dst, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t tmp_coords_jac_0_GREEN_DOWN = 1.0 / (micro_edges_per_macro_edge_float)*1.0; diff --git a/operators/divergence/avx/P2ToP1ElementwiseDivergence_0_1_apply_macro_2D.cpp b/operators/divergence/avx/P2ToP1ElementwiseDivergence_0_1_apply_P2ToP1ElementwiseDivergence_0_1_macro_2D.cpp similarity index 99% rename from operators/divergence/avx/P2ToP1ElementwiseDivergence_0_1_apply_macro_2D.cpp rename to operators/divergence/avx/P2ToP1ElementwiseDivergence_0_1_apply_P2ToP1ElementwiseDivergence_0_1_macro_2D.cpp index bcc2e8f2..9bed3fde 100644 --- a/operators/divergence/avx/P2ToP1ElementwiseDivergence_0_1_apply_macro_2D.cpp +++ b/operators/divergence/avx/P2ToP1ElementwiseDivergence_0_1_apply_P2ToP1ElementwiseDivergence_0_1_macro_2D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ToP1ElementwiseDivergence_0_1::apply_macro_2D( real_t * RESTRICT _data_dst, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ToP1ElementwiseDivergence_0_1::apply_P2ToP1ElementwiseDivergence_0_1_macro_2D( real_t * RESTRICT _data_dst, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t tmp_coords_jac_0_BLUE = 1.0 / (micro_edges_per_macro_edge_float)*1.0; diff --git a/operators/divergence/avx/P2ToP1ElementwiseDivergence_0_1_apply_macro_3D.cpp b/operators/divergence/avx/P2ToP1ElementwiseDivergence_0_1_apply_P2ToP1ElementwiseDivergence_0_1_macro_3D.cpp similarity index 99% rename from operators/divergence/avx/P2ToP1ElementwiseDivergence_0_1_apply_macro_3D.cpp rename to operators/divergence/avx/P2ToP1ElementwiseDivergence_0_1_apply_P2ToP1ElementwiseDivergence_0_1_macro_3D.cpp index 4dcd5815..219f1791 100644 --- a/operators/divergence/avx/P2ToP1ElementwiseDivergence_0_1_apply_macro_3D.cpp +++ b/operators/divergence/avx/P2ToP1ElementwiseDivergence_0_1_apply_P2ToP1ElementwiseDivergence_0_1_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ToP1ElementwiseDivergence_0_1::apply_macro_3D( real_t * RESTRICT _data_dst, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ToP1ElementwiseDivergence_0_1::apply_P2ToP1ElementwiseDivergence_0_1_macro_3D( real_t * RESTRICT _data_dst, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t tmp_coords_jac_0_GREEN_DOWN = 1.0 / (micro_edges_per_macro_edge_float)*1.0; diff --git a/operators/divergence/avx/P2ToP1ElementwiseDivergence_0_2_apply_macro_3D.cpp b/operators/divergence/avx/P2ToP1ElementwiseDivergence_0_2_apply_P2ToP1ElementwiseDivergence_0_2_macro_3D.cpp similarity index 99% rename from operators/divergence/avx/P2ToP1ElementwiseDivergence_0_2_apply_macro_3D.cpp rename to operators/divergence/avx/P2ToP1ElementwiseDivergence_0_2_apply_P2ToP1ElementwiseDivergence_0_2_macro_3D.cpp index 9a37b8db..abbde6d1 100644 --- a/operators/divergence/avx/P2ToP1ElementwiseDivergence_0_2_apply_macro_3D.cpp +++ b/operators/divergence/avx/P2ToP1ElementwiseDivergence_0_2_apply_P2ToP1ElementwiseDivergence_0_2_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ToP1ElementwiseDivergence_0_2::apply_macro_3D( real_t * RESTRICT _data_dst, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ToP1ElementwiseDivergence_0_2::apply_P2ToP1ElementwiseDivergence_0_2_macro_3D( real_t * RESTRICT _data_dst, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t tmp_coords_jac_0_GREEN_DOWN = 1.0 / (micro_edges_per_macro_edge_float)*1.0; diff --git a/operators/divergence/noarch/P2ToP1ElementwiseDivergenceAnnulusMap_0_0_apply_macro_2D.cpp b/operators/divergence/noarch/P2ToP1ElementwiseDivergenceAnnulusMap_0_0_apply_P2ToP1ElementwiseDivergenceAnnulusMap_0_0_macro_2D.cpp similarity index 98% rename from operators/divergence/noarch/P2ToP1ElementwiseDivergenceAnnulusMap_0_0_apply_macro_2D.cpp rename to operators/divergence/noarch/P2ToP1ElementwiseDivergenceAnnulusMap_0_0_apply_P2ToP1ElementwiseDivergenceAnnulusMap_0_0_macro_2D.cpp index f0fc0ab3..e4242583 100644 --- a/operators/divergence/noarch/P2ToP1ElementwiseDivergenceAnnulusMap_0_0_apply_macro_2D.cpp +++ b/operators/divergence/noarch/P2ToP1ElementwiseDivergenceAnnulusMap_0_0_apply_P2ToP1ElementwiseDivergenceAnnulusMap_0_0_macro_2D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ToP1ElementwiseDivergenceAnnulusMap_0_0::apply_macro_2D( real_t * RESTRICT _data_dst, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const +void P2ToP1ElementwiseDivergenceAnnulusMap_0_0::apply_P2ToP1ElementwiseDivergenceAnnulusMap_0_0_macro_2D( real_t * RESTRICT _data_dst, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const { { const real_t tmp_coords_jac_0_GRAY = 1.0 / (micro_edges_per_macro_edge_float)*1.0; diff --git a/operators/divergence/noarch/P2ToP1ElementwiseDivergenceAnnulusMap_0_0_toMatrix_macro_2D.cpp b/operators/divergence/noarch/P2ToP1ElementwiseDivergenceAnnulusMap_0_0_toMatrix_P2ToP1ElementwiseDivergenceAnnulusMap_0_0_macro_2D.cpp similarity index 98% rename from operators/divergence/noarch/P2ToP1ElementwiseDivergenceAnnulusMap_0_0_toMatrix_macro_2D.cpp rename to operators/divergence/noarch/P2ToP1ElementwiseDivergenceAnnulusMap_0_0_toMatrix_P2ToP1ElementwiseDivergenceAnnulusMap_0_0_macro_2D.cpp index 3985fe5a..107518f4 100644 --- a/operators/divergence/noarch/P2ToP1ElementwiseDivergenceAnnulusMap_0_0_toMatrix_macro_2D.cpp +++ b/operators/divergence/noarch/P2ToP1ElementwiseDivergenceAnnulusMap_0_0_toMatrix_P2ToP1ElementwiseDivergenceAnnulusMap_0_0_macro_2D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ToP1ElementwiseDivergenceAnnulusMap_0_0::toMatrix_macro_2D( idx_t * RESTRICT _data_dst, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const +void P2ToP1ElementwiseDivergenceAnnulusMap_0_0::toMatrix_P2ToP1ElementwiseDivergenceAnnulusMap_0_0_macro_2D( idx_t * RESTRICT _data_dst, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const { { const real_t tmp_coords_jac_0_GRAY = 1.0 / (micro_edges_per_macro_edge_float)*1.0; diff --git a/operators/divergence/noarch/P2ToP1ElementwiseDivergenceAnnulusMap_0_1_apply_macro_2D.cpp b/operators/divergence/noarch/P2ToP1ElementwiseDivergenceAnnulusMap_0_1_apply_P2ToP1ElementwiseDivergenceAnnulusMap_0_1_macro_2D.cpp similarity index 98% rename from operators/divergence/noarch/P2ToP1ElementwiseDivergenceAnnulusMap_0_1_apply_macro_2D.cpp rename to operators/divergence/noarch/P2ToP1ElementwiseDivergenceAnnulusMap_0_1_apply_P2ToP1ElementwiseDivergenceAnnulusMap_0_1_macro_2D.cpp index 2622ca33..ccd65bb8 100644 --- a/operators/divergence/noarch/P2ToP1ElementwiseDivergenceAnnulusMap_0_1_apply_macro_2D.cpp +++ b/operators/divergence/noarch/P2ToP1ElementwiseDivergenceAnnulusMap_0_1_apply_P2ToP1ElementwiseDivergenceAnnulusMap_0_1_macro_2D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ToP1ElementwiseDivergenceAnnulusMap_0_1::apply_macro_2D( real_t * RESTRICT _data_dst, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const +void P2ToP1ElementwiseDivergenceAnnulusMap_0_1::apply_P2ToP1ElementwiseDivergenceAnnulusMap_0_1_macro_2D( real_t * RESTRICT _data_dst, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const { { const real_t tmp_coords_jac_0_GRAY = 1.0 / (micro_edges_per_macro_edge_float)*1.0; diff --git a/operators/divergence/noarch/P2ToP1ElementwiseDivergenceAnnulusMap_0_1_toMatrix_macro_2D.cpp b/operators/divergence/noarch/P2ToP1ElementwiseDivergenceAnnulusMap_0_1_toMatrix_P2ToP1ElementwiseDivergenceAnnulusMap_0_1_macro_2D.cpp similarity index 98% rename from operators/divergence/noarch/P2ToP1ElementwiseDivergenceAnnulusMap_0_1_toMatrix_macro_2D.cpp rename to operators/divergence/noarch/P2ToP1ElementwiseDivergenceAnnulusMap_0_1_toMatrix_P2ToP1ElementwiseDivergenceAnnulusMap_0_1_macro_2D.cpp index 1304883d..72c271f7 100644 --- a/operators/divergence/noarch/P2ToP1ElementwiseDivergenceAnnulusMap_0_1_toMatrix_macro_2D.cpp +++ b/operators/divergence/noarch/P2ToP1ElementwiseDivergenceAnnulusMap_0_1_toMatrix_P2ToP1ElementwiseDivergenceAnnulusMap_0_1_macro_2D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ToP1ElementwiseDivergenceAnnulusMap_0_1::toMatrix_macro_2D( idx_t * RESTRICT _data_dst, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const +void P2ToP1ElementwiseDivergenceAnnulusMap_0_1::toMatrix_P2ToP1ElementwiseDivergenceAnnulusMap_0_1_macro_2D( idx_t * RESTRICT _data_dst, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const { { const real_t tmp_coords_jac_0_GRAY = 1.0 / (micro_edges_per_macro_edge_float)*1.0; diff --git a/operators/divergence/noarch/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_0_apply_macro_3D.cpp b/operators/divergence/noarch/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_0_apply_P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_0_macro_3D.cpp similarity index 99% rename from operators/divergence/noarch/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_0_apply_macro_3D.cpp rename to operators/divergence/noarch/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_0_apply_P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_0_macro_3D.cpp index 82535c45..c6e331b6 100644 --- a/operators/divergence/noarch/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_0_apply_macro_3D.cpp +++ b/operators/divergence/noarch/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_0_apply_P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_0_macro_3D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_0::apply_macro_3D( real_t * RESTRICT _data_dst, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const +void P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_0::apply_P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_0_macro_3D( real_t * RESTRICT _data_dst, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const { { const real_t tmp_coords_jac_0_WHITE_UP = 1.0 / (micro_edges_per_macro_edge_float)*1.0; diff --git a/operators/divergence/noarch/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_0_toMatrix_macro_3D.cpp b/operators/divergence/noarch/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_0_toMatrix_P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_0_macro_3D.cpp similarity index 99% rename from operators/divergence/noarch/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_0_toMatrix_macro_3D.cpp rename to operators/divergence/noarch/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_0_toMatrix_P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_0_macro_3D.cpp index a2d064fb..20197c4c 100644 --- a/operators/divergence/noarch/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_0_toMatrix_macro_3D.cpp +++ b/operators/divergence/noarch/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_0_toMatrix_P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_0_macro_3D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_0::toMatrix_macro_3D( idx_t * RESTRICT _data_dst, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const +void P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_0::toMatrix_P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_0_macro_3D( idx_t * RESTRICT _data_dst, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const { { const real_t tmp_coords_jac_0_WHITE_UP = 1.0 / (micro_edges_per_macro_edge_float)*1.0; diff --git a/operators/divergence/noarch/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_1_apply_macro_3D.cpp b/operators/divergence/noarch/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_1_apply_P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_1_macro_3D.cpp similarity index 99% rename from operators/divergence/noarch/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_1_apply_macro_3D.cpp rename to operators/divergence/noarch/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_1_apply_P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_1_macro_3D.cpp index ebb2f49c..ee1c6e1c 100644 --- a/operators/divergence/noarch/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_1_apply_macro_3D.cpp +++ b/operators/divergence/noarch/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_1_apply_P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_1_macro_3D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_1::apply_macro_3D( real_t * RESTRICT _data_dst, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const +void P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_1::apply_P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_1_macro_3D( real_t * RESTRICT _data_dst, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const { { const real_t tmp_coords_jac_0_WHITE_UP = 1.0 / (micro_edges_per_macro_edge_float)*1.0; diff --git a/operators/divergence/noarch/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_1_toMatrix_macro_3D.cpp b/operators/divergence/noarch/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_1_toMatrix_P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_1_macro_3D.cpp similarity index 99% rename from operators/divergence/noarch/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_1_toMatrix_macro_3D.cpp rename to operators/divergence/noarch/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_1_toMatrix_P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_1_macro_3D.cpp index 396cc539..c54ae33f 100644 --- a/operators/divergence/noarch/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_1_toMatrix_macro_3D.cpp +++ b/operators/divergence/noarch/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_1_toMatrix_P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_1_macro_3D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_1::toMatrix_macro_3D( idx_t * RESTRICT _data_dst, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const +void P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_1::toMatrix_P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_1_macro_3D( idx_t * RESTRICT _data_dst, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const { { const real_t tmp_coords_jac_0_WHITE_UP = 1.0 / (micro_edges_per_macro_edge_float)*1.0; diff --git a/operators/divergence/noarch/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_2_apply_macro_3D.cpp b/operators/divergence/noarch/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_2_apply_P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_2_macro_3D.cpp similarity index 99% rename from operators/divergence/noarch/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_2_apply_macro_3D.cpp rename to operators/divergence/noarch/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_2_apply_P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_2_macro_3D.cpp index 2442c50d..0a08c166 100644 --- a/operators/divergence/noarch/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_2_apply_macro_3D.cpp +++ b/operators/divergence/noarch/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_2_apply_P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_2_macro_3D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_2::apply_macro_3D( real_t * RESTRICT _data_dst, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const +void P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_2::apply_P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_2_macro_3D( real_t * RESTRICT _data_dst, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const { { const real_t tmp_coords_jac_0_WHITE_UP = 1.0 / (micro_edges_per_macro_edge_float)*1.0; diff --git a/operators/divergence/noarch/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_2_toMatrix_macro_3D.cpp b/operators/divergence/noarch/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_2_toMatrix_P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_2_macro_3D.cpp similarity index 99% rename from operators/divergence/noarch/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_2_toMatrix_macro_3D.cpp rename to operators/divergence/noarch/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_2_toMatrix_P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_2_macro_3D.cpp index 3fba24ce..f140d92f 100644 --- a/operators/divergence/noarch/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_2_toMatrix_macro_3D.cpp +++ b/operators/divergence/noarch/P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_2_toMatrix_P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_2_macro_3D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_2::toMatrix_macro_3D( idx_t * RESTRICT _data_dst, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const +void P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_2::toMatrix_P2ToP1ElementwiseDivergenceIcosahedralShellMap_0_2_macro_3D( idx_t * RESTRICT _data_dst, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const { { const real_t tmp_coords_jac_0_WHITE_UP = 1.0 / (micro_edges_per_macro_edge_float)*1.0; diff --git a/operators/divergence/noarch/P2ToP1ElementwiseDivergence_0_0_apply_macro_2D.cpp b/operators/divergence/noarch/P2ToP1ElementwiseDivergence_0_0_apply_P2ToP1ElementwiseDivergence_0_0_macro_2D.cpp similarity index 98% rename from operators/divergence/noarch/P2ToP1ElementwiseDivergence_0_0_apply_macro_2D.cpp rename to operators/divergence/noarch/P2ToP1ElementwiseDivergence_0_0_apply_P2ToP1ElementwiseDivergence_0_0_macro_2D.cpp index 0be5acbd..2f4fbfbd 100644 --- a/operators/divergence/noarch/P2ToP1ElementwiseDivergence_0_0_apply_macro_2D.cpp +++ b/operators/divergence/noarch/P2ToP1ElementwiseDivergence_0_0_apply_P2ToP1ElementwiseDivergence_0_0_macro_2D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ToP1ElementwiseDivergence_0_0::apply_macro_2D( real_t * RESTRICT _data_dst, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ToP1ElementwiseDivergence_0_0::apply_P2ToP1ElementwiseDivergence_0_0_macro_2D( real_t * RESTRICT _data_dst, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t tmp_coords_jac_0_BLUE = 1.0 / (micro_edges_per_macro_edge_float)*1.0; diff --git a/operators/divergence/noarch/P2ToP1ElementwiseDivergence_0_0_apply_macro_3D.cpp b/operators/divergence/noarch/P2ToP1ElementwiseDivergence_0_0_apply_P2ToP1ElementwiseDivergence_0_0_macro_3D.cpp similarity index 99% rename from operators/divergence/noarch/P2ToP1ElementwiseDivergence_0_0_apply_macro_3D.cpp rename to operators/divergence/noarch/P2ToP1ElementwiseDivergence_0_0_apply_P2ToP1ElementwiseDivergence_0_0_macro_3D.cpp index b6434286..acc9c0d1 100644 --- a/operators/divergence/noarch/P2ToP1ElementwiseDivergence_0_0_apply_macro_3D.cpp +++ b/operators/divergence/noarch/P2ToP1ElementwiseDivergence_0_0_apply_P2ToP1ElementwiseDivergence_0_0_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ToP1ElementwiseDivergence_0_0::apply_macro_3D( real_t * RESTRICT _data_dst, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ToP1ElementwiseDivergence_0_0::apply_P2ToP1ElementwiseDivergence_0_0_macro_3D( real_t * RESTRICT _data_dst, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t tmp_coords_jac_0_GREEN_DOWN = 1.0 / (micro_edges_per_macro_edge_float)*1.0; diff --git a/operators/divergence/noarch/P2ToP1ElementwiseDivergence_0_0_toMatrix_macro_2D.cpp b/operators/divergence/noarch/P2ToP1ElementwiseDivergence_0_0_toMatrix_P2ToP1ElementwiseDivergence_0_0_macro_2D.cpp similarity index 98% rename from operators/divergence/noarch/P2ToP1ElementwiseDivergence_0_0_toMatrix_macro_2D.cpp rename to operators/divergence/noarch/P2ToP1ElementwiseDivergence_0_0_toMatrix_P2ToP1ElementwiseDivergence_0_0_macro_2D.cpp index a1694a9e..600e403c 100644 --- a/operators/divergence/noarch/P2ToP1ElementwiseDivergence_0_0_toMatrix_macro_2D.cpp +++ b/operators/divergence/noarch/P2ToP1ElementwiseDivergence_0_0_toMatrix_P2ToP1ElementwiseDivergence_0_0_macro_2D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ToP1ElementwiseDivergence_0_0::toMatrix_macro_2D( idx_t * RESTRICT _data_dst, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ToP1ElementwiseDivergence_0_0::toMatrix_P2ToP1ElementwiseDivergence_0_0_macro_2D( idx_t * RESTRICT _data_dst, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t tmp_coords_jac_0_BLUE = 1.0 / (micro_edges_per_macro_edge_float)*1.0; diff --git a/operators/divergence/noarch/P2ToP1ElementwiseDivergence_0_0_toMatrix_macro_3D.cpp b/operators/divergence/noarch/P2ToP1ElementwiseDivergence_0_0_toMatrix_P2ToP1ElementwiseDivergence_0_0_macro_3D.cpp similarity index 99% rename from operators/divergence/noarch/P2ToP1ElementwiseDivergence_0_0_toMatrix_macro_3D.cpp rename to operators/divergence/noarch/P2ToP1ElementwiseDivergence_0_0_toMatrix_P2ToP1ElementwiseDivergence_0_0_macro_3D.cpp index 71006702..73af2956 100644 --- a/operators/divergence/noarch/P2ToP1ElementwiseDivergence_0_0_toMatrix_macro_3D.cpp +++ b/operators/divergence/noarch/P2ToP1ElementwiseDivergence_0_0_toMatrix_P2ToP1ElementwiseDivergence_0_0_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ToP1ElementwiseDivergence_0_0::toMatrix_macro_3D( idx_t * RESTRICT _data_dst, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ToP1ElementwiseDivergence_0_0::toMatrix_P2ToP1ElementwiseDivergence_0_0_macro_3D( idx_t * RESTRICT _data_dst, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t tmp_coords_jac_0_GREEN_DOWN = 1.0 / (micro_edges_per_macro_edge_float)*1.0; diff --git a/operators/divergence/noarch/P2ToP1ElementwiseDivergence_0_1_apply_macro_2D.cpp b/operators/divergence/noarch/P2ToP1ElementwiseDivergence_0_1_apply_P2ToP1ElementwiseDivergence_0_1_macro_2D.cpp similarity index 98% rename from operators/divergence/noarch/P2ToP1ElementwiseDivergence_0_1_apply_macro_2D.cpp rename to operators/divergence/noarch/P2ToP1ElementwiseDivergence_0_1_apply_P2ToP1ElementwiseDivergence_0_1_macro_2D.cpp index 2057cab6..4f36c2a3 100644 --- a/operators/divergence/noarch/P2ToP1ElementwiseDivergence_0_1_apply_macro_2D.cpp +++ b/operators/divergence/noarch/P2ToP1ElementwiseDivergence_0_1_apply_P2ToP1ElementwiseDivergence_0_1_macro_2D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ToP1ElementwiseDivergence_0_1::apply_macro_2D( real_t * RESTRICT _data_dst, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ToP1ElementwiseDivergence_0_1::apply_P2ToP1ElementwiseDivergence_0_1_macro_2D( real_t * RESTRICT _data_dst, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t tmp_coords_jac_0_BLUE = 1.0 / (micro_edges_per_macro_edge_float)*1.0; diff --git a/operators/divergence/noarch/P2ToP1ElementwiseDivergence_0_1_apply_macro_3D.cpp b/operators/divergence/noarch/P2ToP1ElementwiseDivergence_0_1_apply_P2ToP1ElementwiseDivergence_0_1_macro_3D.cpp similarity index 99% rename from operators/divergence/noarch/P2ToP1ElementwiseDivergence_0_1_apply_macro_3D.cpp rename to operators/divergence/noarch/P2ToP1ElementwiseDivergence_0_1_apply_P2ToP1ElementwiseDivergence_0_1_macro_3D.cpp index ad720e9c..d957a1ed 100644 --- a/operators/divergence/noarch/P2ToP1ElementwiseDivergence_0_1_apply_macro_3D.cpp +++ b/operators/divergence/noarch/P2ToP1ElementwiseDivergence_0_1_apply_P2ToP1ElementwiseDivergence_0_1_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ToP1ElementwiseDivergence_0_1::apply_macro_3D( real_t * RESTRICT _data_dst, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ToP1ElementwiseDivergence_0_1::apply_P2ToP1ElementwiseDivergence_0_1_macro_3D( real_t * RESTRICT _data_dst, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t tmp_coords_jac_0_GREEN_DOWN = 1.0 / (micro_edges_per_macro_edge_float)*1.0; diff --git a/operators/divergence/noarch/P2ToP1ElementwiseDivergence_0_1_toMatrix_macro_2D.cpp b/operators/divergence/noarch/P2ToP1ElementwiseDivergence_0_1_toMatrix_P2ToP1ElementwiseDivergence_0_1_macro_2D.cpp similarity index 98% rename from operators/divergence/noarch/P2ToP1ElementwiseDivergence_0_1_toMatrix_macro_2D.cpp rename to operators/divergence/noarch/P2ToP1ElementwiseDivergence_0_1_toMatrix_P2ToP1ElementwiseDivergence_0_1_macro_2D.cpp index a390a762..bbd6556c 100644 --- a/operators/divergence/noarch/P2ToP1ElementwiseDivergence_0_1_toMatrix_macro_2D.cpp +++ b/operators/divergence/noarch/P2ToP1ElementwiseDivergence_0_1_toMatrix_P2ToP1ElementwiseDivergence_0_1_macro_2D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ToP1ElementwiseDivergence_0_1::toMatrix_macro_2D( idx_t * RESTRICT _data_dst, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ToP1ElementwiseDivergence_0_1::toMatrix_P2ToP1ElementwiseDivergence_0_1_macro_2D( idx_t * RESTRICT _data_dst, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t tmp_coords_jac_0_BLUE = 1.0 / (micro_edges_per_macro_edge_float)*1.0; diff --git a/operators/divergence/noarch/P2ToP1ElementwiseDivergence_0_1_toMatrix_macro_3D.cpp b/operators/divergence/noarch/P2ToP1ElementwiseDivergence_0_1_toMatrix_P2ToP1ElementwiseDivergence_0_1_macro_3D.cpp similarity index 99% rename from operators/divergence/noarch/P2ToP1ElementwiseDivergence_0_1_toMatrix_macro_3D.cpp rename to operators/divergence/noarch/P2ToP1ElementwiseDivergence_0_1_toMatrix_P2ToP1ElementwiseDivergence_0_1_macro_3D.cpp index 3ca38cd9..8d8310e2 100644 --- a/operators/divergence/noarch/P2ToP1ElementwiseDivergence_0_1_toMatrix_macro_3D.cpp +++ b/operators/divergence/noarch/P2ToP1ElementwiseDivergence_0_1_toMatrix_P2ToP1ElementwiseDivergence_0_1_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ToP1ElementwiseDivergence_0_1::toMatrix_macro_3D( idx_t * RESTRICT _data_dst, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ToP1ElementwiseDivergence_0_1::toMatrix_P2ToP1ElementwiseDivergence_0_1_macro_3D( idx_t * RESTRICT _data_dst, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t tmp_coords_jac_0_GREEN_DOWN = 1.0 / (micro_edges_per_macro_edge_float)*1.0; diff --git a/operators/divergence/noarch/P2ToP1ElementwiseDivergence_0_2_apply_macro_3D.cpp b/operators/divergence/noarch/P2ToP1ElementwiseDivergence_0_2_apply_P2ToP1ElementwiseDivergence_0_2_macro_3D.cpp similarity index 99% rename from operators/divergence/noarch/P2ToP1ElementwiseDivergence_0_2_apply_macro_3D.cpp rename to operators/divergence/noarch/P2ToP1ElementwiseDivergence_0_2_apply_P2ToP1ElementwiseDivergence_0_2_macro_3D.cpp index 390f8c57..e11280ec 100644 --- a/operators/divergence/noarch/P2ToP1ElementwiseDivergence_0_2_apply_macro_3D.cpp +++ b/operators/divergence/noarch/P2ToP1ElementwiseDivergence_0_2_apply_P2ToP1ElementwiseDivergence_0_2_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ToP1ElementwiseDivergence_0_2::apply_macro_3D( real_t * RESTRICT _data_dst, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ToP1ElementwiseDivergence_0_2::apply_P2ToP1ElementwiseDivergence_0_2_macro_3D( real_t * RESTRICT _data_dst, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t tmp_coords_jac_0_GREEN_DOWN = 1.0 / (micro_edges_per_macro_edge_float)*1.0; diff --git a/operators/divergence/noarch/P2ToP1ElementwiseDivergence_0_2_toMatrix_macro_3D.cpp b/operators/divergence/noarch/P2ToP1ElementwiseDivergence_0_2_toMatrix_P2ToP1ElementwiseDivergence_0_2_macro_3D.cpp similarity index 99% rename from operators/divergence/noarch/P2ToP1ElementwiseDivergence_0_2_toMatrix_macro_3D.cpp rename to operators/divergence/noarch/P2ToP1ElementwiseDivergence_0_2_toMatrix_P2ToP1ElementwiseDivergence_0_2_macro_3D.cpp index 094ca101..391888d7 100644 --- a/operators/divergence/noarch/P2ToP1ElementwiseDivergence_0_2_toMatrix_macro_3D.cpp +++ b/operators/divergence/noarch/P2ToP1ElementwiseDivergence_0_2_toMatrix_P2ToP1ElementwiseDivergence_0_2_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ToP1ElementwiseDivergence_0_2::toMatrix_macro_3D( idx_t * RESTRICT _data_dst, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ToP1ElementwiseDivergence_0_2::toMatrix_P2ToP1ElementwiseDivergence_0_2_macro_3D( idx_t * RESTRICT _data_dst, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t tmp_coords_jac_0_GREEN_DOWN = 1.0 / (micro_edges_per_macro_edge_float)*1.0; diff --git a/operators/epsilon/CMakeLists.txt b/operators/epsilon/CMakeLists.txt index ed34aa1e..9e6dfb23 100644 --- a/operators/epsilon/CMakeLists.txt +++ b/operators/epsilon/CMakeLists.txt @@ -49,108 +49,108 @@ add_library( opgen-epsilon if(HYTEG_BUILD_WITH_AVX AND WALBERLA_DOUBLE_ACCURACY) target_sources(opgen-epsilon PRIVATE - avx/P2ElementwiseEpsilonAnnulusMap_0_0_apply_macro_2D.cpp - avx/P2ElementwiseEpsilonAnnulusMap_0_0_computeInverseDiagonalOperatorValues_macro_2D.cpp - avx/P2ElementwiseEpsilonAnnulusMap_0_1_apply_macro_2D.cpp - avx/P2ElementwiseEpsilonAnnulusMap_1_0_apply_macro_2D.cpp - avx/P2ElementwiseEpsilonAnnulusMap_1_1_apply_macro_2D.cpp - avx/P2ElementwiseEpsilonAnnulusMap_1_1_computeInverseDiagonalOperatorValues_macro_2D.cpp - avx/P2ElementwiseEpsilonIcosahedralShellMap_0_0_apply_macro_3D.cpp - avx/P2ElementwiseEpsilonIcosahedralShellMap_0_0_computeInverseDiagonalOperatorValues_macro_3D.cpp - avx/P2ElementwiseEpsilonIcosahedralShellMap_0_1_apply_macro_3D.cpp - avx/P2ElementwiseEpsilonIcosahedralShellMap_0_2_apply_macro_3D.cpp - avx/P2ElementwiseEpsilonIcosahedralShellMap_1_0_apply_macro_3D.cpp - avx/P2ElementwiseEpsilonIcosahedralShellMap_1_1_apply_macro_3D.cpp - avx/P2ElementwiseEpsilonIcosahedralShellMap_1_1_computeInverseDiagonalOperatorValues_macro_3D.cpp - avx/P2ElementwiseEpsilonIcosahedralShellMap_1_2_apply_macro_3D.cpp - avx/P2ElementwiseEpsilonIcosahedralShellMap_2_0_apply_macro_3D.cpp - avx/P2ElementwiseEpsilonIcosahedralShellMap_2_1_apply_macro_3D.cpp - avx/P2ElementwiseEpsilonIcosahedralShellMap_2_2_apply_macro_3D.cpp - avx/P2ElementwiseEpsilonIcosahedralShellMap_2_2_computeInverseDiagonalOperatorValues_macro_3D.cpp - avx/P2ElementwiseEpsilon_0_0_apply_macro_2D.cpp - avx/P2ElementwiseEpsilon_0_0_apply_macro_3D.cpp - avx/P2ElementwiseEpsilon_0_0_computeInverseDiagonalOperatorValues_macro_2D.cpp - avx/P2ElementwiseEpsilon_0_0_computeInverseDiagonalOperatorValues_macro_3D.cpp - avx/P2ElementwiseEpsilon_0_1_apply_macro_2D.cpp - avx/P2ElementwiseEpsilon_0_1_apply_macro_3D.cpp - avx/P2ElementwiseEpsilon_0_2_apply_macro_3D.cpp - avx/P2ElementwiseEpsilon_1_0_apply_macro_2D.cpp - avx/P2ElementwiseEpsilon_1_0_apply_macro_3D.cpp - avx/P2ElementwiseEpsilon_1_1_apply_macro_2D.cpp - avx/P2ElementwiseEpsilon_1_1_apply_macro_3D.cpp - avx/P2ElementwiseEpsilon_1_1_computeInverseDiagonalOperatorValues_macro_2D.cpp - avx/P2ElementwiseEpsilon_1_1_computeInverseDiagonalOperatorValues_macro_3D.cpp - avx/P2ElementwiseEpsilon_1_2_apply_macro_3D.cpp - avx/P2ElementwiseEpsilon_2_0_apply_macro_3D.cpp - avx/P2ElementwiseEpsilon_2_1_apply_macro_3D.cpp - avx/P2ElementwiseEpsilon_2_2_apply_macro_3D.cpp - avx/P2ElementwiseEpsilon_2_2_computeInverseDiagonalOperatorValues_macro_3D.cpp - noarch/P2ElementwiseEpsilonAnnulusMap_0_0_toMatrix_macro_2D.cpp - noarch/P2ElementwiseEpsilonAnnulusMap_0_1_toMatrix_macro_2D.cpp - noarch/P2ElementwiseEpsilonAnnulusMap_1_0_toMatrix_macro_2D.cpp - noarch/P2ElementwiseEpsilonAnnulusMap_1_1_toMatrix_macro_2D.cpp - noarch/P2ElementwiseEpsilonIcosahedralShellMap_0_0_toMatrix_macro_3D.cpp - noarch/P2ElementwiseEpsilonIcosahedralShellMap_0_1_toMatrix_macro_3D.cpp - noarch/P2ElementwiseEpsilonIcosahedralShellMap_0_2_toMatrix_macro_3D.cpp - noarch/P2ElementwiseEpsilonIcosahedralShellMap_1_0_toMatrix_macro_3D.cpp - noarch/P2ElementwiseEpsilonIcosahedralShellMap_1_1_toMatrix_macro_3D.cpp - noarch/P2ElementwiseEpsilonIcosahedralShellMap_1_2_toMatrix_macro_3D.cpp - noarch/P2ElementwiseEpsilonIcosahedralShellMap_2_0_toMatrix_macro_3D.cpp - noarch/P2ElementwiseEpsilonIcosahedralShellMap_2_1_toMatrix_macro_3D.cpp - noarch/P2ElementwiseEpsilonIcosahedralShellMap_2_2_toMatrix_macro_3D.cpp - noarch/P2ElementwiseEpsilon_0_0_toMatrix_macro_2D.cpp - noarch/P2ElementwiseEpsilon_0_0_toMatrix_macro_3D.cpp - noarch/P2ElementwiseEpsilon_0_1_toMatrix_macro_2D.cpp - noarch/P2ElementwiseEpsilon_0_1_toMatrix_macro_3D.cpp - noarch/P2ElementwiseEpsilon_0_2_toMatrix_macro_3D.cpp - noarch/P2ElementwiseEpsilon_1_0_toMatrix_macro_2D.cpp - noarch/P2ElementwiseEpsilon_1_0_toMatrix_macro_3D.cpp - noarch/P2ElementwiseEpsilon_1_1_toMatrix_macro_2D.cpp - noarch/P2ElementwiseEpsilon_1_1_toMatrix_macro_3D.cpp - noarch/P2ElementwiseEpsilon_1_2_toMatrix_macro_3D.cpp - noarch/P2ElementwiseEpsilon_2_0_toMatrix_macro_3D.cpp - noarch/P2ElementwiseEpsilon_2_1_toMatrix_macro_3D.cpp - noarch/P2ElementwiseEpsilon_2_2_toMatrix_macro_3D.cpp + avx/P2ElementwiseEpsilonAnnulusMap_0_0_apply_P2ElementwiseEpsilonAnnulusMap_0_0_macro_2D.cpp + avx/P2ElementwiseEpsilonAnnulusMap_0_0_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilonAnnulusMap_0_0_macro_2D.cpp + avx/P2ElementwiseEpsilonAnnulusMap_0_1_apply_P2ElementwiseEpsilonAnnulusMap_0_1_macro_2D.cpp + avx/P2ElementwiseEpsilonAnnulusMap_1_0_apply_P2ElementwiseEpsilonAnnulusMap_1_0_macro_2D.cpp + avx/P2ElementwiseEpsilonAnnulusMap_1_1_apply_P2ElementwiseEpsilonAnnulusMap_1_1_macro_2D.cpp + avx/P2ElementwiseEpsilonAnnulusMap_1_1_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilonAnnulusMap_1_1_macro_2D.cpp + avx/P2ElementwiseEpsilonIcosahedralShellMap_0_0_apply_P2ElementwiseEpsilonIcosahedralShellMap_0_0_macro_3D.cpp + avx/P2ElementwiseEpsilonIcosahedralShellMap_0_0_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilonIcosahedralShellMap_0_0_macro_3D.cpp + avx/P2ElementwiseEpsilonIcosahedralShellMap_0_1_apply_P2ElementwiseEpsilonIcosahedralShellMap_0_1_macro_3D.cpp + avx/P2ElementwiseEpsilonIcosahedralShellMap_0_2_apply_P2ElementwiseEpsilonIcosahedralShellMap_0_2_macro_3D.cpp + avx/P2ElementwiseEpsilonIcosahedralShellMap_1_0_apply_P2ElementwiseEpsilonIcosahedralShellMap_1_0_macro_3D.cpp + avx/P2ElementwiseEpsilonIcosahedralShellMap_1_1_apply_P2ElementwiseEpsilonIcosahedralShellMap_1_1_macro_3D.cpp + avx/P2ElementwiseEpsilonIcosahedralShellMap_1_1_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilonIcosahedralShellMap_1_1_macro_3D.cpp + avx/P2ElementwiseEpsilonIcosahedralShellMap_1_2_apply_P2ElementwiseEpsilonIcosahedralShellMap_1_2_macro_3D.cpp + avx/P2ElementwiseEpsilonIcosahedralShellMap_2_0_apply_P2ElementwiseEpsilonIcosahedralShellMap_2_0_macro_3D.cpp + avx/P2ElementwiseEpsilonIcosahedralShellMap_2_1_apply_P2ElementwiseEpsilonIcosahedralShellMap_2_1_macro_3D.cpp + avx/P2ElementwiseEpsilonIcosahedralShellMap_2_2_apply_P2ElementwiseEpsilonIcosahedralShellMap_2_2_macro_3D.cpp + avx/P2ElementwiseEpsilonIcosahedralShellMap_2_2_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilonIcosahedralShellMap_2_2_macro_3D.cpp + avx/P2ElementwiseEpsilon_0_0_apply_P2ElementwiseEpsilon_0_0_macro_2D.cpp + avx/P2ElementwiseEpsilon_0_0_apply_P2ElementwiseEpsilon_0_0_macro_3D.cpp + avx/P2ElementwiseEpsilon_0_0_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilon_0_0_macro_2D.cpp + avx/P2ElementwiseEpsilon_0_0_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilon_0_0_macro_3D.cpp + avx/P2ElementwiseEpsilon_0_1_apply_P2ElementwiseEpsilon_0_1_macro_2D.cpp + avx/P2ElementwiseEpsilon_0_1_apply_P2ElementwiseEpsilon_0_1_macro_3D.cpp + avx/P2ElementwiseEpsilon_0_2_apply_P2ElementwiseEpsilon_0_2_macro_3D.cpp + avx/P2ElementwiseEpsilon_1_0_apply_P2ElementwiseEpsilon_1_0_macro_2D.cpp + avx/P2ElementwiseEpsilon_1_0_apply_P2ElementwiseEpsilon_1_0_macro_3D.cpp + avx/P2ElementwiseEpsilon_1_1_apply_P2ElementwiseEpsilon_1_1_macro_2D.cpp + avx/P2ElementwiseEpsilon_1_1_apply_P2ElementwiseEpsilon_1_1_macro_3D.cpp + avx/P2ElementwiseEpsilon_1_1_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilon_1_1_macro_2D.cpp + avx/P2ElementwiseEpsilon_1_1_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilon_1_1_macro_3D.cpp + avx/P2ElementwiseEpsilon_1_2_apply_P2ElementwiseEpsilon_1_2_macro_3D.cpp + avx/P2ElementwiseEpsilon_2_0_apply_P2ElementwiseEpsilon_2_0_macro_3D.cpp + avx/P2ElementwiseEpsilon_2_1_apply_P2ElementwiseEpsilon_2_1_macro_3D.cpp + avx/P2ElementwiseEpsilon_2_2_apply_P2ElementwiseEpsilon_2_2_macro_3D.cpp + avx/P2ElementwiseEpsilon_2_2_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilon_2_2_macro_3D.cpp + noarch/P2ElementwiseEpsilonAnnulusMap_0_0_toMatrix_P2ElementwiseEpsilonAnnulusMap_0_0_macro_2D.cpp + noarch/P2ElementwiseEpsilonAnnulusMap_0_1_toMatrix_P2ElementwiseEpsilonAnnulusMap_0_1_macro_2D.cpp + noarch/P2ElementwiseEpsilonAnnulusMap_1_0_toMatrix_P2ElementwiseEpsilonAnnulusMap_1_0_macro_2D.cpp + noarch/P2ElementwiseEpsilonAnnulusMap_1_1_toMatrix_P2ElementwiseEpsilonAnnulusMap_1_1_macro_2D.cpp + noarch/P2ElementwiseEpsilonIcosahedralShellMap_0_0_toMatrix_P2ElementwiseEpsilonIcosahedralShellMap_0_0_macro_3D.cpp + noarch/P2ElementwiseEpsilonIcosahedralShellMap_0_1_toMatrix_P2ElementwiseEpsilonIcosahedralShellMap_0_1_macro_3D.cpp + noarch/P2ElementwiseEpsilonIcosahedralShellMap_0_2_toMatrix_P2ElementwiseEpsilonIcosahedralShellMap_0_2_macro_3D.cpp + noarch/P2ElementwiseEpsilonIcosahedralShellMap_1_0_toMatrix_P2ElementwiseEpsilonIcosahedralShellMap_1_0_macro_3D.cpp + noarch/P2ElementwiseEpsilonIcosahedralShellMap_1_1_toMatrix_P2ElementwiseEpsilonIcosahedralShellMap_1_1_macro_3D.cpp + noarch/P2ElementwiseEpsilonIcosahedralShellMap_1_2_toMatrix_P2ElementwiseEpsilonIcosahedralShellMap_1_2_macro_3D.cpp + noarch/P2ElementwiseEpsilonIcosahedralShellMap_2_0_toMatrix_P2ElementwiseEpsilonIcosahedralShellMap_2_0_macro_3D.cpp + noarch/P2ElementwiseEpsilonIcosahedralShellMap_2_1_toMatrix_P2ElementwiseEpsilonIcosahedralShellMap_2_1_macro_3D.cpp + noarch/P2ElementwiseEpsilonIcosahedralShellMap_2_2_toMatrix_P2ElementwiseEpsilonIcosahedralShellMap_2_2_macro_3D.cpp + noarch/P2ElementwiseEpsilon_0_0_toMatrix_P2ElementwiseEpsilon_0_0_macro_2D.cpp + noarch/P2ElementwiseEpsilon_0_0_toMatrix_P2ElementwiseEpsilon_0_0_macro_3D.cpp + noarch/P2ElementwiseEpsilon_0_1_toMatrix_P2ElementwiseEpsilon_0_1_macro_2D.cpp + noarch/P2ElementwiseEpsilon_0_1_toMatrix_P2ElementwiseEpsilon_0_1_macro_3D.cpp + noarch/P2ElementwiseEpsilon_0_2_toMatrix_P2ElementwiseEpsilon_0_2_macro_3D.cpp + noarch/P2ElementwiseEpsilon_1_0_toMatrix_P2ElementwiseEpsilon_1_0_macro_2D.cpp + noarch/P2ElementwiseEpsilon_1_0_toMatrix_P2ElementwiseEpsilon_1_0_macro_3D.cpp + noarch/P2ElementwiseEpsilon_1_1_toMatrix_P2ElementwiseEpsilon_1_1_macro_2D.cpp + noarch/P2ElementwiseEpsilon_1_1_toMatrix_P2ElementwiseEpsilon_1_1_macro_3D.cpp + noarch/P2ElementwiseEpsilon_1_2_toMatrix_P2ElementwiseEpsilon_1_2_macro_3D.cpp + noarch/P2ElementwiseEpsilon_2_0_toMatrix_P2ElementwiseEpsilon_2_0_macro_3D.cpp + noarch/P2ElementwiseEpsilon_2_1_toMatrix_P2ElementwiseEpsilon_2_1_macro_3D.cpp + noarch/P2ElementwiseEpsilon_2_2_toMatrix_P2ElementwiseEpsilon_2_2_macro_3D.cpp ) set_source_files_properties( - avx/P2ElementwiseEpsilonAnnulusMap_0_0_apply_macro_2D.cpp - avx/P2ElementwiseEpsilonAnnulusMap_0_0_computeInverseDiagonalOperatorValues_macro_2D.cpp - avx/P2ElementwiseEpsilonAnnulusMap_0_1_apply_macro_2D.cpp - avx/P2ElementwiseEpsilonAnnulusMap_1_0_apply_macro_2D.cpp - avx/P2ElementwiseEpsilonAnnulusMap_1_1_apply_macro_2D.cpp - avx/P2ElementwiseEpsilonAnnulusMap_1_1_computeInverseDiagonalOperatorValues_macro_2D.cpp - avx/P2ElementwiseEpsilonIcosahedralShellMap_0_0_apply_macro_3D.cpp - avx/P2ElementwiseEpsilonIcosahedralShellMap_0_0_computeInverseDiagonalOperatorValues_macro_3D.cpp - avx/P2ElementwiseEpsilonIcosahedralShellMap_0_1_apply_macro_3D.cpp - avx/P2ElementwiseEpsilonIcosahedralShellMap_0_2_apply_macro_3D.cpp - avx/P2ElementwiseEpsilonIcosahedralShellMap_1_0_apply_macro_3D.cpp - avx/P2ElementwiseEpsilonIcosahedralShellMap_1_1_apply_macro_3D.cpp - avx/P2ElementwiseEpsilonIcosahedralShellMap_1_1_computeInverseDiagonalOperatorValues_macro_3D.cpp - avx/P2ElementwiseEpsilonIcosahedralShellMap_1_2_apply_macro_3D.cpp - avx/P2ElementwiseEpsilonIcosahedralShellMap_2_0_apply_macro_3D.cpp - avx/P2ElementwiseEpsilonIcosahedralShellMap_2_1_apply_macro_3D.cpp - avx/P2ElementwiseEpsilonIcosahedralShellMap_2_2_apply_macro_3D.cpp - avx/P2ElementwiseEpsilonIcosahedralShellMap_2_2_computeInverseDiagonalOperatorValues_macro_3D.cpp - avx/P2ElementwiseEpsilon_0_0_apply_macro_2D.cpp - avx/P2ElementwiseEpsilon_0_0_apply_macro_3D.cpp - avx/P2ElementwiseEpsilon_0_0_computeInverseDiagonalOperatorValues_macro_2D.cpp - avx/P2ElementwiseEpsilon_0_0_computeInverseDiagonalOperatorValues_macro_3D.cpp - avx/P2ElementwiseEpsilon_0_1_apply_macro_2D.cpp - avx/P2ElementwiseEpsilon_0_1_apply_macro_3D.cpp - avx/P2ElementwiseEpsilon_0_2_apply_macro_3D.cpp - avx/P2ElementwiseEpsilon_1_0_apply_macro_2D.cpp - avx/P2ElementwiseEpsilon_1_0_apply_macro_3D.cpp - avx/P2ElementwiseEpsilon_1_1_apply_macro_2D.cpp - avx/P2ElementwiseEpsilon_1_1_apply_macro_3D.cpp - avx/P2ElementwiseEpsilon_1_1_computeInverseDiagonalOperatorValues_macro_2D.cpp - avx/P2ElementwiseEpsilon_1_1_computeInverseDiagonalOperatorValues_macro_3D.cpp - avx/P2ElementwiseEpsilon_1_2_apply_macro_3D.cpp - avx/P2ElementwiseEpsilon_2_0_apply_macro_3D.cpp - avx/P2ElementwiseEpsilon_2_1_apply_macro_3D.cpp - avx/P2ElementwiseEpsilon_2_2_apply_macro_3D.cpp - avx/P2ElementwiseEpsilon_2_2_computeInverseDiagonalOperatorValues_macro_3D.cpp + avx/P2ElementwiseEpsilonAnnulusMap_0_0_apply_P2ElementwiseEpsilonAnnulusMap_0_0_macro_2D.cpp + avx/P2ElementwiseEpsilonAnnulusMap_0_0_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilonAnnulusMap_0_0_macro_2D.cpp + avx/P2ElementwiseEpsilonAnnulusMap_0_1_apply_P2ElementwiseEpsilonAnnulusMap_0_1_macro_2D.cpp + avx/P2ElementwiseEpsilonAnnulusMap_1_0_apply_P2ElementwiseEpsilonAnnulusMap_1_0_macro_2D.cpp + avx/P2ElementwiseEpsilonAnnulusMap_1_1_apply_P2ElementwiseEpsilonAnnulusMap_1_1_macro_2D.cpp + avx/P2ElementwiseEpsilonAnnulusMap_1_1_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilonAnnulusMap_1_1_macro_2D.cpp + avx/P2ElementwiseEpsilonIcosahedralShellMap_0_0_apply_P2ElementwiseEpsilonIcosahedralShellMap_0_0_macro_3D.cpp + avx/P2ElementwiseEpsilonIcosahedralShellMap_0_0_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilonIcosahedralShellMap_0_0_macro_3D.cpp + avx/P2ElementwiseEpsilonIcosahedralShellMap_0_1_apply_P2ElementwiseEpsilonIcosahedralShellMap_0_1_macro_3D.cpp + avx/P2ElementwiseEpsilonIcosahedralShellMap_0_2_apply_P2ElementwiseEpsilonIcosahedralShellMap_0_2_macro_3D.cpp + avx/P2ElementwiseEpsilonIcosahedralShellMap_1_0_apply_P2ElementwiseEpsilonIcosahedralShellMap_1_0_macro_3D.cpp + avx/P2ElementwiseEpsilonIcosahedralShellMap_1_1_apply_P2ElementwiseEpsilonIcosahedralShellMap_1_1_macro_3D.cpp + avx/P2ElementwiseEpsilonIcosahedralShellMap_1_1_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilonIcosahedralShellMap_1_1_macro_3D.cpp + avx/P2ElementwiseEpsilonIcosahedralShellMap_1_2_apply_P2ElementwiseEpsilonIcosahedralShellMap_1_2_macro_3D.cpp + avx/P2ElementwiseEpsilonIcosahedralShellMap_2_0_apply_P2ElementwiseEpsilonIcosahedralShellMap_2_0_macro_3D.cpp + avx/P2ElementwiseEpsilonIcosahedralShellMap_2_1_apply_P2ElementwiseEpsilonIcosahedralShellMap_2_1_macro_3D.cpp + avx/P2ElementwiseEpsilonIcosahedralShellMap_2_2_apply_P2ElementwiseEpsilonIcosahedralShellMap_2_2_macro_3D.cpp + avx/P2ElementwiseEpsilonIcosahedralShellMap_2_2_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilonIcosahedralShellMap_2_2_macro_3D.cpp + avx/P2ElementwiseEpsilon_0_0_apply_P2ElementwiseEpsilon_0_0_macro_2D.cpp + avx/P2ElementwiseEpsilon_0_0_apply_P2ElementwiseEpsilon_0_0_macro_3D.cpp + avx/P2ElementwiseEpsilon_0_0_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilon_0_0_macro_2D.cpp + avx/P2ElementwiseEpsilon_0_0_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilon_0_0_macro_3D.cpp + avx/P2ElementwiseEpsilon_0_1_apply_P2ElementwiseEpsilon_0_1_macro_2D.cpp + avx/P2ElementwiseEpsilon_0_1_apply_P2ElementwiseEpsilon_0_1_macro_3D.cpp + avx/P2ElementwiseEpsilon_0_2_apply_P2ElementwiseEpsilon_0_2_macro_3D.cpp + avx/P2ElementwiseEpsilon_1_0_apply_P2ElementwiseEpsilon_1_0_macro_2D.cpp + avx/P2ElementwiseEpsilon_1_0_apply_P2ElementwiseEpsilon_1_0_macro_3D.cpp + avx/P2ElementwiseEpsilon_1_1_apply_P2ElementwiseEpsilon_1_1_macro_2D.cpp + avx/P2ElementwiseEpsilon_1_1_apply_P2ElementwiseEpsilon_1_1_macro_3D.cpp + avx/P2ElementwiseEpsilon_1_1_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilon_1_1_macro_2D.cpp + avx/P2ElementwiseEpsilon_1_1_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilon_1_1_macro_3D.cpp + avx/P2ElementwiseEpsilon_1_2_apply_P2ElementwiseEpsilon_1_2_macro_3D.cpp + avx/P2ElementwiseEpsilon_2_0_apply_P2ElementwiseEpsilon_2_0_macro_3D.cpp + avx/P2ElementwiseEpsilon_2_1_apply_P2ElementwiseEpsilon_2_1_macro_3D.cpp + avx/P2ElementwiseEpsilon_2_2_apply_P2ElementwiseEpsilon_2_2_macro_3D.cpp + avx/P2ElementwiseEpsilon_2_2_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilon_2_2_macro_3D.cpp PROPERTIES COMPILE_OPTIONS ${HYTEG_COMPILER_NATIVE_FLAGS} ) @@ -161,68 +161,68 @@ else() target_sources(opgen-epsilon PRIVATE - noarch/P2ElementwiseEpsilonAnnulusMap_0_0_apply_macro_2D.cpp - noarch/P2ElementwiseEpsilonAnnulusMap_0_0_computeInverseDiagonalOperatorValues_macro_2D.cpp - noarch/P2ElementwiseEpsilonAnnulusMap_0_0_toMatrix_macro_2D.cpp - noarch/P2ElementwiseEpsilonAnnulusMap_0_1_apply_macro_2D.cpp - noarch/P2ElementwiseEpsilonAnnulusMap_0_1_toMatrix_macro_2D.cpp - noarch/P2ElementwiseEpsilonAnnulusMap_1_0_apply_macro_2D.cpp - noarch/P2ElementwiseEpsilonAnnulusMap_1_0_toMatrix_macro_2D.cpp - noarch/P2ElementwiseEpsilonAnnulusMap_1_1_apply_macro_2D.cpp - noarch/P2ElementwiseEpsilonAnnulusMap_1_1_computeInverseDiagonalOperatorValues_macro_2D.cpp - noarch/P2ElementwiseEpsilonAnnulusMap_1_1_toMatrix_macro_2D.cpp - noarch/P2ElementwiseEpsilonIcosahedralShellMap_0_0_apply_macro_3D.cpp - noarch/P2ElementwiseEpsilonIcosahedralShellMap_0_0_computeInverseDiagonalOperatorValues_macro_3D.cpp - noarch/P2ElementwiseEpsilonIcosahedralShellMap_0_0_toMatrix_macro_3D.cpp - noarch/P2ElementwiseEpsilonIcosahedralShellMap_0_1_apply_macro_3D.cpp - noarch/P2ElementwiseEpsilonIcosahedralShellMap_0_1_toMatrix_macro_3D.cpp - noarch/P2ElementwiseEpsilonIcosahedralShellMap_0_2_apply_macro_3D.cpp - noarch/P2ElementwiseEpsilonIcosahedralShellMap_0_2_toMatrix_macro_3D.cpp - noarch/P2ElementwiseEpsilonIcosahedralShellMap_1_0_apply_macro_3D.cpp - noarch/P2ElementwiseEpsilonIcosahedralShellMap_1_0_toMatrix_macro_3D.cpp - noarch/P2ElementwiseEpsilonIcosahedralShellMap_1_1_apply_macro_3D.cpp - noarch/P2ElementwiseEpsilonIcosahedralShellMap_1_1_computeInverseDiagonalOperatorValues_macro_3D.cpp - noarch/P2ElementwiseEpsilonIcosahedralShellMap_1_1_toMatrix_macro_3D.cpp - noarch/P2ElementwiseEpsilonIcosahedralShellMap_1_2_apply_macro_3D.cpp - noarch/P2ElementwiseEpsilonIcosahedralShellMap_1_2_toMatrix_macro_3D.cpp - noarch/P2ElementwiseEpsilonIcosahedralShellMap_2_0_apply_macro_3D.cpp - noarch/P2ElementwiseEpsilonIcosahedralShellMap_2_0_toMatrix_macro_3D.cpp - noarch/P2ElementwiseEpsilonIcosahedralShellMap_2_1_apply_macro_3D.cpp - noarch/P2ElementwiseEpsilonIcosahedralShellMap_2_1_toMatrix_macro_3D.cpp - noarch/P2ElementwiseEpsilonIcosahedralShellMap_2_2_apply_macro_3D.cpp - noarch/P2ElementwiseEpsilonIcosahedralShellMap_2_2_computeInverseDiagonalOperatorValues_macro_3D.cpp - noarch/P2ElementwiseEpsilonIcosahedralShellMap_2_2_toMatrix_macro_3D.cpp - noarch/P2ElementwiseEpsilon_0_0_apply_macro_2D.cpp - noarch/P2ElementwiseEpsilon_0_0_apply_macro_3D.cpp - noarch/P2ElementwiseEpsilon_0_0_computeInverseDiagonalOperatorValues_macro_2D.cpp - noarch/P2ElementwiseEpsilon_0_0_computeInverseDiagonalOperatorValues_macro_3D.cpp - noarch/P2ElementwiseEpsilon_0_0_toMatrix_macro_2D.cpp - noarch/P2ElementwiseEpsilon_0_0_toMatrix_macro_3D.cpp - noarch/P2ElementwiseEpsilon_0_1_apply_macro_2D.cpp - noarch/P2ElementwiseEpsilon_0_1_apply_macro_3D.cpp - noarch/P2ElementwiseEpsilon_0_1_toMatrix_macro_2D.cpp - noarch/P2ElementwiseEpsilon_0_1_toMatrix_macro_3D.cpp - noarch/P2ElementwiseEpsilon_0_2_apply_macro_3D.cpp - noarch/P2ElementwiseEpsilon_0_2_toMatrix_macro_3D.cpp - noarch/P2ElementwiseEpsilon_1_0_apply_macro_2D.cpp - noarch/P2ElementwiseEpsilon_1_0_apply_macro_3D.cpp - noarch/P2ElementwiseEpsilon_1_0_toMatrix_macro_2D.cpp - noarch/P2ElementwiseEpsilon_1_0_toMatrix_macro_3D.cpp - noarch/P2ElementwiseEpsilon_1_1_apply_macro_2D.cpp - noarch/P2ElementwiseEpsilon_1_1_apply_macro_3D.cpp - noarch/P2ElementwiseEpsilon_1_1_computeInverseDiagonalOperatorValues_macro_2D.cpp - noarch/P2ElementwiseEpsilon_1_1_computeInverseDiagonalOperatorValues_macro_3D.cpp - noarch/P2ElementwiseEpsilon_1_1_toMatrix_macro_2D.cpp - noarch/P2ElementwiseEpsilon_1_1_toMatrix_macro_3D.cpp - noarch/P2ElementwiseEpsilon_1_2_apply_macro_3D.cpp - noarch/P2ElementwiseEpsilon_1_2_toMatrix_macro_3D.cpp - noarch/P2ElementwiseEpsilon_2_0_apply_macro_3D.cpp - noarch/P2ElementwiseEpsilon_2_0_toMatrix_macro_3D.cpp - noarch/P2ElementwiseEpsilon_2_1_apply_macro_3D.cpp - noarch/P2ElementwiseEpsilon_2_1_toMatrix_macro_3D.cpp - noarch/P2ElementwiseEpsilon_2_2_apply_macro_3D.cpp - noarch/P2ElementwiseEpsilon_2_2_computeInverseDiagonalOperatorValues_macro_3D.cpp - noarch/P2ElementwiseEpsilon_2_2_toMatrix_macro_3D.cpp + noarch/P2ElementwiseEpsilonAnnulusMap_0_0_apply_P2ElementwiseEpsilonAnnulusMap_0_0_macro_2D.cpp + noarch/P2ElementwiseEpsilonAnnulusMap_0_0_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilonAnnulusMap_0_0_macro_2D.cpp + noarch/P2ElementwiseEpsilonAnnulusMap_0_0_toMatrix_P2ElementwiseEpsilonAnnulusMap_0_0_macro_2D.cpp + noarch/P2ElementwiseEpsilonAnnulusMap_0_1_apply_P2ElementwiseEpsilonAnnulusMap_0_1_macro_2D.cpp + noarch/P2ElementwiseEpsilonAnnulusMap_0_1_toMatrix_P2ElementwiseEpsilonAnnulusMap_0_1_macro_2D.cpp + noarch/P2ElementwiseEpsilonAnnulusMap_1_0_apply_P2ElementwiseEpsilonAnnulusMap_1_0_macro_2D.cpp + noarch/P2ElementwiseEpsilonAnnulusMap_1_0_toMatrix_P2ElementwiseEpsilonAnnulusMap_1_0_macro_2D.cpp + noarch/P2ElementwiseEpsilonAnnulusMap_1_1_apply_P2ElementwiseEpsilonAnnulusMap_1_1_macro_2D.cpp + noarch/P2ElementwiseEpsilonAnnulusMap_1_1_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilonAnnulusMap_1_1_macro_2D.cpp + noarch/P2ElementwiseEpsilonAnnulusMap_1_1_toMatrix_P2ElementwiseEpsilonAnnulusMap_1_1_macro_2D.cpp + noarch/P2ElementwiseEpsilonIcosahedralShellMap_0_0_apply_P2ElementwiseEpsilonIcosahedralShellMap_0_0_macro_3D.cpp + noarch/P2ElementwiseEpsilonIcosahedralShellMap_0_0_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilonIcosahedralShellMap_0_0_macro_3D.cpp + noarch/P2ElementwiseEpsilonIcosahedralShellMap_0_0_toMatrix_P2ElementwiseEpsilonIcosahedralShellMap_0_0_macro_3D.cpp + noarch/P2ElementwiseEpsilonIcosahedralShellMap_0_1_apply_P2ElementwiseEpsilonIcosahedralShellMap_0_1_macro_3D.cpp + noarch/P2ElementwiseEpsilonIcosahedralShellMap_0_1_toMatrix_P2ElementwiseEpsilonIcosahedralShellMap_0_1_macro_3D.cpp + noarch/P2ElementwiseEpsilonIcosahedralShellMap_0_2_apply_P2ElementwiseEpsilonIcosahedralShellMap_0_2_macro_3D.cpp + noarch/P2ElementwiseEpsilonIcosahedralShellMap_0_2_toMatrix_P2ElementwiseEpsilonIcosahedralShellMap_0_2_macro_3D.cpp + noarch/P2ElementwiseEpsilonIcosahedralShellMap_1_0_apply_P2ElementwiseEpsilonIcosahedralShellMap_1_0_macro_3D.cpp + noarch/P2ElementwiseEpsilonIcosahedralShellMap_1_0_toMatrix_P2ElementwiseEpsilonIcosahedralShellMap_1_0_macro_3D.cpp + noarch/P2ElementwiseEpsilonIcosahedralShellMap_1_1_apply_P2ElementwiseEpsilonIcosahedralShellMap_1_1_macro_3D.cpp + noarch/P2ElementwiseEpsilonIcosahedralShellMap_1_1_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilonIcosahedralShellMap_1_1_macro_3D.cpp + noarch/P2ElementwiseEpsilonIcosahedralShellMap_1_1_toMatrix_P2ElementwiseEpsilonIcosahedralShellMap_1_1_macro_3D.cpp + noarch/P2ElementwiseEpsilonIcosahedralShellMap_1_2_apply_P2ElementwiseEpsilonIcosahedralShellMap_1_2_macro_3D.cpp + noarch/P2ElementwiseEpsilonIcosahedralShellMap_1_2_toMatrix_P2ElementwiseEpsilonIcosahedralShellMap_1_2_macro_3D.cpp + noarch/P2ElementwiseEpsilonIcosahedralShellMap_2_0_apply_P2ElementwiseEpsilonIcosahedralShellMap_2_0_macro_3D.cpp + noarch/P2ElementwiseEpsilonIcosahedralShellMap_2_0_toMatrix_P2ElementwiseEpsilonIcosahedralShellMap_2_0_macro_3D.cpp + noarch/P2ElementwiseEpsilonIcosahedralShellMap_2_1_apply_P2ElementwiseEpsilonIcosahedralShellMap_2_1_macro_3D.cpp + noarch/P2ElementwiseEpsilonIcosahedralShellMap_2_1_toMatrix_P2ElementwiseEpsilonIcosahedralShellMap_2_1_macro_3D.cpp + noarch/P2ElementwiseEpsilonIcosahedralShellMap_2_2_apply_P2ElementwiseEpsilonIcosahedralShellMap_2_2_macro_3D.cpp + noarch/P2ElementwiseEpsilonIcosahedralShellMap_2_2_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilonIcosahedralShellMap_2_2_macro_3D.cpp + noarch/P2ElementwiseEpsilonIcosahedralShellMap_2_2_toMatrix_P2ElementwiseEpsilonIcosahedralShellMap_2_2_macro_3D.cpp + noarch/P2ElementwiseEpsilon_0_0_apply_P2ElementwiseEpsilon_0_0_macro_2D.cpp + noarch/P2ElementwiseEpsilon_0_0_apply_P2ElementwiseEpsilon_0_0_macro_3D.cpp + noarch/P2ElementwiseEpsilon_0_0_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilon_0_0_macro_2D.cpp + noarch/P2ElementwiseEpsilon_0_0_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilon_0_0_macro_3D.cpp + noarch/P2ElementwiseEpsilon_0_0_toMatrix_P2ElementwiseEpsilon_0_0_macro_2D.cpp + noarch/P2ElementwiseEpsilon_0_0_toMatrix_P2ElementwiseEpsilon_0_0_macro_3D.cpp + noarch/P2ElementwiseEpsilon_0_1_apply_P2ElementwiseEpsilon_0_1_macro_2D.cpp + noarch/P2ElementwiseEpsilon_0_1_apply_P2ElementwiseEpsilon_0_1_macro_3D.cpp + noarch/P2ElementwiseEpsilon_0_1_toMatrix_P2ElementwiseEpsilon_0_1_macro_2D.cpp + noarch/P2ElementwiseEpsilon_0_1_toMatrix_P2ElementwiseEpsilon_0_1_macro_3D.cpp + noarch/P2ElementwiseEpsilon_0_2_apply_P2ElementwiseEpsilon_0_2_macro_3D.cpp + noarch/P2ElementwiseEpsilon_0_2_toMatrix_P2ElementwiseEpsilon_0_2_macro_3D.cpp + noarch/P2ElementwiseEpsilon_1_0_apply_P2ElementwiseEpsilon_1_0_macro_2D.cpp + noarch/P2ElementwiseEpsilon_1_0_apply_P2ElementwiseEpsilon_1_0_macro_3D.cpp + noarch/P2ElementwiseEpsilon_1_0_toMatrix_P2ElementwiseEpsilon_1_0_macro_2D.cpp + noarch/P2ElementwiseEpsilon_1_0_toMatrix_P2ElementwiseEpsilon_1_0_macro_3D.cpp + noarch/P2ElementwiseEpsilon_1_1_apply_P2ElementwiseEpsilon_1_1_macro_2D.cpp + noarch/P2ElementwiseEpsilon_1_1_apply_P2ElementwiseEpsilon_1_1_macro_3D.cpp + noarch/P2ElementwiseEpsilon_1_1_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilon_1_1_macro_2D.cpp + noarch/P2ElementwiseEpsilon_1_1_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilon_1_1_macro_3D.cpp + noarch/P2ElementwiseEpsilon_1_1_toMatrix_P2ElementwiseEpsilon_1_1_macro_2D.cpp + noarch/P2ElementwiseEpsilon_1_1_toMatrix_P2ElementwiseEpsilon_1_1_macro_3D.cpp + noarch/P2ElementwiseEpsilon_1_2_apply_P2ElementwiseEpsilon_1_2_macro_3D.cpp + noarch/P2ElementwiseEpsilon_1_2_toMatrix_P2ElementwiseEpsilon_1_2_macro_3D.cpp + noarch/P2ElementwiseEpsilon_2_0_apply_P2ElementwiseEpsilon_2_0_macro_3D.cpp + noarch/P2ElementwiseEpsilon_2_0_toMatrix_P2ElementwiseEpsilon_2_0_macro_3D.cpp + noarch/P2ElementwiseEpsilon_2_1_apply_P2ElementwiseEpsilon_2_1_macro_3D.cpp + noarch/P2ElementwiseEpsilon_2_1_toMatrix_P2ElementwiseEpsilon_2_1_macro_3D.cpp + noarch/P2ElementwiseEpsilon_2_2_apply_P2ElementwiseEpsilon_2_2_macro_3D.cpp + noarch/P2ElementwiseEpsilon_2_2_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilon_2_2_macro_3D.cpp + noarch/P2ElementwiseEpsilon_2_2_toMatrix_P2ElementwiseEpsilon_2_2_macro_3D.cpp ) endif() diff --git a/operators/epsilon/P2ElementwiseEpsilonAnnulusMap_0_0.cpp b/operators/epsilon/P2ElementwiseEpsilonAnnulusMap_0_0.cpp index 19216374..c957ab96 100644 --- a/operators/epsilon/P2ElementwiseEpsilonAnnulusMap_0_0.cpp +++ b/operators/epsilon/P2ElementwiseEpsilonAnnulusMap_0_0.cpp @@ -149,7 +149,7 @@ void P2ElementwiseEpsilonAnnulusMap_0_0::apply( const P2Function< real_t >& src, this->timingTree_->start( "kernel" ); - apply_macro_2D( + apply_P2ElementwiseEpsilonAnnulusMap_0_0_macro_2D( _data_dstEdge, _data_dstVertex, @@ -173,6 +173,7 @@ void P2ElementwiseEpsilonAnnulusMap_0_0::apply( const P2Function< real_t >& src, refVertex_1, thrVertex_0, thrVertex_1 ); + this->timingTree_->stop( "kernel" ); } @@ -256,7 +257,7 @@ void P2ElementwiseEpsilonAnnulusMap_0_0::toMatrix( const std::shared_ptr< Sparse this->timingTree_->start( "kernel" ); - toMatrix_macro_2D( + toMatrix_P2ElementwiseEpsilonAnnulusMap_0_0_macro_2D( _data_dstEdge, _data_dstVertex, @@ -281,6 +282,7 @@ void P2ElementwiseEpsilonAnnulusMap_0_0::toMatrix( const std::shared_ptr< Sparse refVertex_1, thrVertex_0, thrVertex_1 ); + this->timingTree_->stop( "kernel" ); } } @@ -349,7 +351,7 @@ void P2ElementwiseEpsilonAnnulusMap_0_0::computeInverseDiagonalOperatorValues() this->timingTree_->start( "kernel" ); - computeInverseDiagonalOperatorValues_macro_2D( + computeInverseDiagonalOperatorValues_P2ElementwiseEpsilonAnnulusMap_0_0_macro_2D( _data_invDiag_Edge, _data_invDiag_Vertex, @@ -371,6 +373,7 @@ void P2ElementwiseEpsilonAnnulusMap_0_0::computeInverseDiagonalOperatorValues() refVertex_1, thrVertex_0, thrVertex_1 ); + this->timingTree_->stop( "kernel" ); } diff --git a/operators/epsilon/P2ElementwiseEpsilonAnnulusMap_0_0.hpp b/operators/epsilon/P2ElementwiseEpsilonAnnulusMap_0_0.hpp index 8dc175a8..81612f44 100644 --- a/operators/epsilon/P2ElementwiseEpsilonAnnulusMap_0_0.hpp +++ b/operators/epsilon/P2ElementwiseEpsilonAnnulusMap_0_0.hpp @@ -29,6 +29,7 @@ #include "core/DataTypes.h" #include "hyteg/LikwidWrapper.hpp" +#include "hyteg/boundary/BoundaryConditions.hpp" #include "hyteg/communication/Syncing.hpp" #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp" #include "hyteg/geometry/AnnulusMap.hpp" @@ -37,6 +38,7 @@ #include "hyteg/primitivestorage/PrimitiveStorage.hpp" #include "hyteg/solvers/Smoothables.hpp" #include "hyteg/sparseassembly/SparseMatrixProxy.hpp" +#include "hyteg/types/types.hpp" #define FUNC_PREFIX @@ -89,89 +91,103 @@ class P2ElementwiseEpsilonAnnulusMap_0_0 : public Operator< P2Function< real_t > protected: private: - /// Kernel type: apply + /// Integral: P2ElementwiseEpsilonAnnulusMap_0_0 + /// - volume element: triangle, dim: 2, vertices: 3, spacedim: 2 + /// - kernel type: apply + /// - loop strategy: SAWTOOTH /// - quadrature rule: Dunavant 3 | points: 4, degree: 3 + /// - blending map: AnnulusMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 756 1132 28 20 4 0 0 1 - void apply_macro_2D( real_t* RESTRICT _data_dstEdge, - real_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - real_t* RESTRICT _data_srcEdge, - real_t* RESTRICT _data_srcVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t refVertex_0, - real_t refVertex_1, - real_t thrVertex_0, - real_t thrVertex_1 ) const; - /// Kernel type: toMatrix + void apply_P2ElementwiseEpsilonAnnulusMap_0_0_macro_2D( real_t* RESTRICT _data_dstEdge, + real_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + real_t* RESTRICT _data_srcEdge, + real_t* RESTRICT _data_srcVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t refVertex_0, + real_t refVertex_1, + real_t thrVertex_0, + real_t thrVertex_1 ) const; + + /// Integral: P2ElementwiseEpsilonAnnulusMap_0_0 + /// - volume element: triangle, dim: 2, vertices: 3, spacedim: 2 + /// - kernel type: toMatrix + /// - loop strategy: SAWTOOTH /// - quadrature rule: Dunavant 3 | points: 4, degree: 3 + /// - blending map: AnnulusMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 720 1096 28 20 4 0 0 4 - void toMatrix_macro_2D( idx_t* RESTRICT _data_dstEdge, - idx_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - idx_t* RESTRICT _data_srcEdge, - idx_t* RESTRICT _data_srcVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - std::shared_ptr< SparseMatrixProxy > mat, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t refVertex_0, - real_t refVertex_1, - real_t thrVertex_0, - real_t thrVertex_1 ) const; - /// Kernel type: computeInverseDiagonalOperatorValues + void toMatrix_P2ElementwiseEpsilonAnnulusMap_0_0_macro_2D( idx_t* RESTRICT _data_dstEdge, + idx_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + idx_t* RESTRICT _data_srcEdge, + idx_t* RESTRICT _data_srcVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + std::shared_ptr< SparseMatrixProxy > mat, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t refVertex_0, + real_t refVertex_1, + real_t thrVertex_0, + real_t thrVertex_1 ) const; + + /// Integral: P2ElementwiseEpsilonAnnulusMap_0_0 + /// - volume element: triangle, dim: 2, vertices: 3, spacedim: 2 + /// - kernel type: computeInverseDiagonalOperatorValues + /// - loop strategy: SAWTOOTH /// - quadrature rule: Dunavant 3 | points: 4, degree: 3 + /// - blending map: AnnulusMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 546 916 28 20 4 0 0 1 - void computeInverseDiagonalOperatorValues_macro_2D( real_t* RESTRICT _data_invDiag_Edge, - real_t* RESTRICT _data_invDiag_Vertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t refVertex_0, - real_t refVertex_1, - real_t thrVertex_0, - real_t thrVertex_1 ) const; + void computeInverseDiagonalOperatorValues_P2ElementwiseEpsilonAnnulusMap_0_0_macro_2D( real_t* RESTRICT _data_invDiag_Edge, + real_t* RESTRICT _data_invDiag_Vertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t refVertex_0, + real_t refVertex_1, + real_t thrVertex_0, + real_t thrVertex_1 ) const; std::shared_ptr< P2Function< real_t > > invDiag_; P2Function< real_t > mu; diff --git a/operators/epsilon/P2ElementwiseEpsilonAnnulusMap_0_1.cpp b/operators/epsilon/P2ElementwiseEpsilonAnnulusMap_0_1.cpp index 64e79f28..f95365a7 100644 --- a/operators/epsilon/P2ElementwiseEpsilonAnnulusMap_0_1.cpp +++ b/operators/epsilon/P2ElementwiseEpsilonAnnulusMap_0_1.cpp @@ -149,7 +149,7 @@ void P2ElementwiseEpsilonAnnulusMap_0_1::apply( const P2Function< real_t >& src, this->timingTree_->start( "kernel" ); - apply_macro_2D( + apply_P2ElementwiseEpsilonAnnulusMap_0_1_macro_2D( _data_dstEdge, _data_dstVertex, @@ -173,6 +173,7 @@ void P2ElementwiseEpsilonAnnulusMap_0_1::apply( const P2Function< real_t >& src, refVertex_1, thrVertex_0, thrVertex_1 ); + this->timingTree_->stop( "kernel" ); } @@ -256,7 +257,7 @@ void P2ElementwiseEpsilonAnnulusMap_0_1::toMatrix( const std::shared_ptr< Sparse this->timingTree_->start( "kernel" ); - toMatrix_macro_2D( + toMatrix_P2ElementwiseEpsilonAnnulusMap_0_1_macro_2D( _data_dstEdge, _data_dstVertex, @@ -281,6 +282,7 @@ void P2ElementwiseEpsilonAnnulusMap_0_1::toMatrix( const std::shared_ptr< Sparse refVertex_1, thrVertex_0, thrVertex_1 ); + this->timingTree_->stop( "kernel" ); } } diff --git a/operators/epsilon/P2ElementwiseEpsilonAnnulusMap_0_1.hpp b/operators/epsilon/P2ElementwiseEpsilonAnnulusMap_0_1.hpp index a14a2551..a702fc80 100644 --- a/operators/epsilon/P2ElementwiseEpsilonAnnulusMap_0_1.hpp +++ b/operators/epsilon/P2ElementwiseEpsilonAnnulusMap_0_1.hpp @@ -29,6 +29,7 @@ #include "core/DataTypes.h" #include "hyteg/LikwidWrapper.hpp" +#include "hyteg/boundary/BoundaryConditions.hpp" #include "hyteg/communication/Syncing.hpp" #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp" #include "hyteg/geometry/AnnulusMap.hpp" @@ -36,6 +37,7 @@ #include "hyteg/p2functionspace/P2Function.hpp" #include "hyteg/primitivestorage/PrimitiveStorage.hpp" #include "hyteg/sparseassembly/SparseMatrixProxy.hpp" +#include "hyteg/types/types.hpp" #define FUNC_PREFIX @@ -83,63 +85,72 @@ class P2ElementwiseEpsilonAnnulusMap_0_1 : public Operator< P2Function< real_t > protected: private: - /// Kernel type: apply + /// Integral: P2ElementwiseEpsilonAnnulusMap_0_1 + /// - volume element: triangle, dim: 2, vertices: 3, spacedim: 2 + /// - kernel type: apply + /// - loop strategy: SAWTOOTH /// - quadrature rule: Dunavant 3 | points: 4, degree: 3 + /// - blending map: AnnulusMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 936 1192 28 20 4 0 0 1 - void apply_macro_2D( real_t* RESTRICT _data_dstEdge, - real_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - real_t* RESTRICT _data_srcEdge, - real_t* RESTRICT _data_srcVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t refVertex_0, - real_t refVertex_1, - real_t thrVertex_0, - real_t thrVertex_1 ) const; - /// Kernel type: toMatrix + void apply_P2ElementwiseEpsilonAnnulusMap_0_1_macro_2D( real_t* RESTRICT _data_dstEdge, + real_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + real_t* RESTRICT _data_srcEdge, + real_t* RESTRICT _data_srcVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t refVertex_0, + real_t refVertex_1, + real_t thrVertex_0, + real_t thrVertex_1 ) const; + + /// Integral: P2ElementwiseEpsilonAnnulusMap_0_1 + /// - volume element: triangle, dim: 2, vertices: 3, spacedim: 2 + /// - kernel type: toMatrix + /// - loop strategy: SAWTOOTH /// - quadrature rule: Dunavant 3 | points: 4, degree: 3 + /// - blending map: AnnulusMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 900 1156 28 20 4 0 0 4 - void toMatrix_macro_2D( idx_t* RESTRICT _data_dstEdge, - idx_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - idx_t* RESTRICT _data_srcEdge, - idx_t* RESTRICT _data_srcVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - std::shared_ptr< SparseMatrixProxy > mat, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t refVertex_0, - real_t refVertex_1, - real_t thrVertex_0, - real_t thrVertex_1 ) const; + void toMatrix_P2ElementwiseEpsilonAnnulusMap_0_1_macro_2D( idx_t* RESTRICT _data_dstEdge, + idx_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + idx_t* RESTRICT _data_srcEdge, + idx_t* RESTRICT _data_srcVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + std::shared_ptr< SparseMatrixProxy > mat, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t refVertex_0, + real_t refVertex_1, + real_t thrVertex_0, + real_t thrVertex_1 ) const; P2Function< real_t > mu; }; diff --git a/operators/epsilon/P2ElementwiseEpsilonAnnulusMap_1_0.cpp b/operators/epsilon/P2ElementwiseEpsilonAnnulusMap_1_0.cpp index 1dc47728..edd35d7b 100644 --- a/operators/epsilon/P2ElementwiseEpsilonAnnulusMap_1_0.cpp +++ b/operators/epsilon/P2ElementwiseEpsilonAnnulusMap_1_0.cpp @@ -149,7 +149,7 @@ void P2ElementwiseEpsilonAnnulusMap_1_0::apply( const P2Function< real_t >& src, this->timingTree_->start( "kernel" ); - apply_macro_2D( + apply_P2ElementwiseEpsilonAnnulusMap_1_0_macro_2D( _data_dstEdge, _data_dstVertex, @@ -173,6 +173,7 @@ void P2ElementwiseEpsilonAnnulusMap_1_0::apply( const P2Function< real_t >& src, refVertex_1, thrVertex_0, thrVertex_1 ); + this->timingTree_->stop( "kernel" ); } @@ -256,7 +257,7 @@ void P2ElementwiseEpsilonAnnulusMap_1_0::toMatrix( const std::shared_ptr< Sparse this->timingTree_->start( "kernel" ); - toMatrix_macro_2D( + toMatrix_P2ElementwiseEpsilonAnnulusMap_1_0_macro_2D( _data_dstEdge, _data_dstVertex, @@ -281,6 +282,7 @@ void P2ElementwiseEpsilonAnnulusMap_1_0::toMatrix( const std::shared_ptr< Sparse refVertex_1, thrVertex_0, thrVertex_1 ); + this->timingTree_->stop( "kernel" ); } } diff --git a/operators/epsilon/P2ElementwiseEpsilonAnnulusMap_1_0.hpp b/operators/epsilon/P2ElementwiseEpsilonAnnulusMap_1_0.hpp index eba130ce..4d137d85 100644 --- a/operators/epsilon/P2ElementwiseEpsilonAnnulusMap_1_0.hpp +++ b/operators/epsilon/P2ElementwiseEpsilonAnnulusMap_1_0.hpp @@ -29,6 +29,7 @@ #include "core/DataTypes.h" #include "hyteg/LikwidWrapper.hpp" +#include "hyteg/boundary/BoundaryConditions.hpp" #include "hyteg/communication/Syncing.hpp" #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp" #include "hyteg/geometry/AnnulusMap.hpp" @@ -36,6 +37,7 @@ #include "hyteg/p2functionspace/P2Function.hpp" #include "hyteg/primitivestorage/PrimitiveStorage.hpp" #include "hyteg/sparseassembly/SparseMatrixProxy.hpp" +#include "hyteg/types/types.hpp" #define FUNC_PREFIX @@ -83,63 +85,72 @@ class P2ElementwiseEpsilonAnnulusMap_1_0 : public Operator< P2Function< real_t > protected: private: - /// Kernel type: apply + /// Integral: P2ElementwiseEpsilonAnnulusMap_1_0 + /// - volume element: triangle, dim: 2, vertices: 3, spacedim: 2 + /// - kernel type: apply + /// - loop strategy: SAWTOOTH /// - quadrature rule: Dunavant 3 | points: 4, degree: 3 + /// - blending map: AnnulusMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 936 1192 28 20 4 0 0 1 - void apply_macro_2D( real_t* RESTRICT _data_dstEdge, - real_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - real_t* RESTRICT _data_srcEdge, - real_t* RESTRICT _data_srcVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t refVertex_0, - real_t refVertex_1, - real_t thrVertex_0, - real_t thrVertex_1 ) const; - /// Kernel type: toMatrix + void apply_P2ElementwiseEpsilonAnnulusMap_1_0_macro_2D( real_t* RESTRICT _data_dstEdge, + real_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + real_t* RESTRICT _data_srcEdge, + real_t* RESTRICT _data_srcVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t refVertex_0, + real_t refVertex_1, + real_t thrVertex_0, + real_t thrVertex_1 ) const; + + /// Integral: P2ElementwiseEpsilonAnnulusMap_1_0 + /// - volume element: triangle, dim: 2, vertices: 3, spacedim: 2 + /// - kernel type: toMatrix + /// - loop strategy: SAWTOOTH /// - quadrature rule: Dunavant 3 | points: 4, degree: 3 + /// - blending map: AnnulusMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 900 1156 28 20 4 0 0 4 - void toMatrix_macro_2D( idx_t* RESTRICT _data_dstEdge, - idx_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - idx_t* RESTRICT _data_srcEdge, - idx_t* RESTRICT _data_srcVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - std::shared_ptr< SparseMatrixProxy > mat, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t refVertex_0, - real_t refVertex_1, - real_t thrVertex_0, - real_t thrVertex_1 ) const; + void toMatrix_P2ElementwiseEpsilonAnnulusMap_1_0_macro_2D( idx_t* RESTRICT _data_dstEdge, + idx_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + idx_t* RESTRICT _data_srcEdge, + idx_t* RESTRICT _data_srcVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + std::shared_ptr< SparseMatrixProxy > mat, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t refVertex_0, + real_t refVertex_1, + real_t thrVertex_0, + real_t thrVertex_1 ) const; P2Function< real_t > mu; }; diff --git a/operators/epsilon/P2ElementwiseEpsilonAnnulusMap_1_1.cpp b/operators/epsilon/P2ElementwiseEpsilonAnnulusMap_1_1.cpp index e42063e8..3a421100 100644 --- a/operators/epsilon/P2ElementwiseEpsilonAnnulusMap_1_1.cpp +++ b/operators/epsilon/P2ElementwiseEpsilonAnnulusMap_1_1.cpp @@ -149,7 +149,7 @@ void P2ElementwiseEpsilonAnnulusMap_1_1::apply( const P2Function< real_t >& src, this->timingTree_->start( "kernel" ); - apply_macro_2D( + apply_P2ElementwiseEpsilonAnnulusMap_1_1_macro_2D( _data_dstEdge, _data_dstVertex, @@ -173,6 +173,7 @@ void P2ElementwiseEpsilonAnnulusMap_1_1::apply( const P2Function< real_t >& src, refVertex_1, thrVertex_0, thrVertex_1 ); + this->timingTree_->stop( "kernel" ); } @@ -256,7 +257,7 @@ void P2ElementwiseEpsilonAnnulusMap_1_1::toMatrix( const std::shared_ptr< Sparse this->timingTree_->start( "kernel" ); - toMatrix_macro_2D( + toMatrix_P2ElementwiseEpsilonAnnulusMap_1_1_macro_2D( _data_dstEdge, _data_dstVertex, @@ -281,6 +282,7 @@ void P2ElementwiseEpsilonAnnulusMap_1_1::toMatrix( const std::shared_ptr< Sparse refVertex_1, thrVertex_0, thrVertex_1 ); + this->timingTree_->stop( "kernel" ); } } @@ -349,7 +351,7 @@ void P2ElementwiseEpsilonAnnulusMap_1_1::computeInverseDiagonalOperatorValues() this->timingTree_->start( "kernel" ); - computeInverseDiagonalOperatorValues_macro_2D( + computeInverseDiagonalOperatorValues_P2ElementwiseEpsilonAnnulusMap_1_1_macro_2D( _data_invDiag_Edge, _data_invDiag_Vertex, @@ -371,6 +373,7 @@ void P2ElementwiseEpsilonAnnulusMap_1_1::computeInverseDiagonalOperatorValues() refVertex_1, thrVertex_0, thrVertex_1 ); + this->timingTree_->stop( "kernel" ); } diff --git a/operators/epsilon/P2ElementwiseEpsilonAnnulusMap_1_1.hpp b/operators/epsilon/P2ElementwiseEpsilonAnnulusMap_1_1.hpp index 8233c00e..8c85f610 100644 --- a/operators/epsilon/P2ElementwiseEpsilonAnnulusMap_1_1.hpp +++ b/operators/epsilon/P2ElementwiseEpsilonAnnulusMap_1_1.hpp @@ -29,6 +29,7 @@ #include "core/DataTypes.h" #include "hyteg/LikwidWrapper.hpp" +#include "hyteg/boundary/BoundaryConditions.hpp" #include "hyteg/communication/Syncing.hpp" #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp" #include "hyteg/geometry/AnnulusMap.hpp" @@ -37,6 +38,7 @@ #include "hyteg/primitivestorage/PrimitiveStorage.hpp" #include "hyteg/solvers/Smoothables.hpp" #include "hyteg/sparseassembly/SparseMatrixProxy.hpp" +#include "hyteg/types/types.hpp" #define FUNC_PREFIX @@ -89,89 +91,103 @@ class P2ElementwiseEpsilonAnnulusMap_1_1 : public Operator< P2Function< real_t > protected: private: - /// Kernel type: apply + /// Integral: P2ElementwiseEpsilonAnnulusMap_1_1 + /// - volume element: triangle, dim: 2, vertices: 3, spacedim: 2 + /// - kernel type: apply + /// - loop strategy: SAWTOOTH /// - quadrature rule: Dunavant 3 | points: 4, degree: 3 + /// - blending map: AnnulusMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 756 1132 28 20 4 0 0 1 - void apply_macro_2D( real_t* RESTRICT _data_dstEdge, - real_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - real_t* RESTRICT _data_srcEdge, - real_t* RESTRICT _data_srcVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t refVertex_0, - real_t refVertex_1, - real_t thrVertex_0, - real_t thrVertex_1 ) const; - /// Kernel type: toMatrix + void apply_P2ElementwiseEpsilonAnnulusMap_1_1_macro_2D( real_t* RESTRICT _data_dstEdge, + real_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + real_t* RESTRICT _data_srcEdge, + real_t* RESTRICT _data_srcVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t refVertex_0, + real_t refVertex_1, + real_t thrVertex_0, + real_t thrVertex_1 ) const; + + /// Integral: P2ElementwiseEpsilonAnnulusMap_1_1 + /// - volume element: triangle, dim: 2, vertices: 3, spacedim: 2 + /// - kernel type: toMatrix + /// - loop strategy: SAWTOOTH /// - quadrature rule: Dunavant 3 | points: 4, degree: 3 + /// - blending map: AnnulusMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 720 1096 28 20 4 0 0 4 - void toMatrix_macro_2D( idx_t* RESTRICT _data_dstEdge, - idx_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - idx_t* RESTRICT _data_srcEdge, - idx_t* RESTRICT _data_srcVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - std::shared_ptr< SparseMatrixProxy > mat, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t refVertex_0, - real_t refVertex_1, - real_t thrVertex_0, - real_t thrVertex_1 ) const; - /// Kernel type: computeInverseDiagonalOperatorValues + void toMatrix_P2ElementwiseEpsilonAnnulusMap_1_1_macro_2D( idx_t* RESTRICT _data_dstEdge, + idx_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + idx_t* RESTRICT _data_srcEdge, + idx_t* RESTRICT _data_srcVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + std::shared_ptr< SparseMatrixProxy > mat, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t refVertex_0, + real_t refVertex_1, + real_t thrVertex_0, + real_t thrVertex_1 ) const; + + /// Integral: P2ElementwiseEpsilonAnnulusMap_1_1 + /// - volume element: triangle, dim: 2, vertices: 3, spacedim: 2 + /// - kernel type: computeInverseDiagonalOperatorValues + /// - loop strategy: SAWTOOTH /// - quadrature rule: Dunavant 3 | points: 4, degree: 3 + /// - blending map: AnnulusMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 546 916 28 20 4 0 0 1 - void computeInverseDiagonalOperatorValues_macro_2D( real_t* RESTRICT _data_invDiag_Edge, - real_t* RESTRICT _data_invDiag_Vertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t refVertex_0, - real_t refVertex_1, - real_t thrVertex_0, - real_t thrVertex_1 ) const; + void computeInverseDiagonalOperatorValues_P2ElementwiseEpsilonAnnulusMap_1_1_macro_2D( real_t* RESTRICT _data_invDiag_Edge, + real_t* RESTRICT _data_invDiag_Vertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t refVertex_0, + real_t refVertex_1, + real_t thrVertex_0, + real_t thrVertex_1 ) const; std::shared_ptr< P2Function< real_t > > invDiag_; P2Function< real_t > mu; diff --git a/operators/epsilon/P2ElementwiseEpsilonIcosahedralShellMap_0_0.cpp b/operators/epsilon/P2ElementwiseEpsilonIcosahedralShellMap_0_0.cpp index a65fdf3f..ad2f1db1 100644 --- a/operators/epsilon/P2ElementwiseEpsilonIcosahedralShellMap_0_0.cpp +++ b/operators/epsilon/P2ElementwiseEpsilonIcosahedralShellMap_0_0.cpp @@ -154,7 +154,7 @@ void P2ElementwiseEpsilonIcosahedralShellMap_0_0::apply( const P2Function< real_ this->timingTree_->start( "kernel" ); - apply_macro_3D( + apply_P2ElementwiseEpsilonIcosahedralShellMap_0_0_macro_3D( _data_dstEdge, _data_dstVertex, @@ -190,6 +190,7 @@ void P2ElementwiseEpsilonIcosahedralShellMap_0_0::apply( const P2Function< real_ thrVertex_0, thrVertex_1, thrVertex_2 ); + this->timingTree_->stop( "kernel" ); } @@ -285,7 +286,7 @@ void P2ElementwiseEpsilonIcosahedralShellMap_0_0::toMatrix( const std::shared_pt this->timingTree_->start( "kernel" ); - toMatrix_macro_3D( + toMatrix_P2ElementwiseEpsilonIcosahedralShellMap_0_0_macro_3D( _data_dstEdge, _data_dstVertex, @@ -322,6 +323,7 @@ void P2ElementwiseEpsilonIcosahedralShellMap_0_0::toMatrix( const std::shared_pt thrVertex_0, thrVertex_1, thrVertex_2 ); + this->timingTree_->stop( "kernel" ); } } @@ -401,7 +403,7 @@ void P2ElementwiseEpsilonIcosahedralShellMap_0_0::computeInverseDiagonalOperator this->timingTree_->start( "kernel" ); - computeInverseDiagonalOperatorValues_macro_3D( + computeInverseDiagonalOperatorValues_P2ElementwiseEpsilonIcosahedralShellMap_0_0_macro_3D( _data_invDiag_Edge, _data_invDiag_Vertex, @@ -435,6 +437,7 @@ void P2ElementwiseEpsilonIcosahedralShellMap_0_0::computeInverseDiagonalOperator thrVertex_0, thrVertex_1, thrVertex_2 ); + this->timingTree_->stop( "kernel" ); } diff --git a/operators/epsilon/P2ElementwiseEpsilonIcosahedralShellMap_0_0.hpp b/operators/epsilon/P2ElementwiseEpsilonIcosahedralShellMap_0_0.hpp index d7ff98d6..28b2a406 100644 --- a/operators/epsilon/P2ElementwiseEpsilonIcosahedralShellMap_0_0.hpp +++ b/operators/epsilon/P2ElementwiseEpsilonIcosahedralShellMap_0_0.hpp @@ -29,6 +29,7 @@ #include "core/DataTypes.h" #include "hyteg/LikwidWrapper.hpp" +#include "hyteg/boundary/BoundaryConditions.hpp" #include "hyteg/communication/Syncing.hpp" #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp" #include "hyteg/geometry/IcosahedralShellMap.hpp" @@ -37,6 +38,7 @@ #include "hyteg/primitivestorage/PrimitiveStorage.hpp" #include "hyteg/solvers/Smoothables.hpp" #include "hyteg/sparseassembly/SparseMatrixProxy.hpp" +#include "hyteg/types/types.hpp" #define FUNC_PREFIX @@ -89,125 +91,140 @@ class P2ElementwiseEpsilonIcosahedralShellMap_0_0 : public Operator< P2Function< protected: private: - /// Kernel type: apply + /// Integral: P2ElementwiseEpsilonIcosahedralShellMap_0_0 + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: apply + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 3 | points: 5, degree: 3 + /// - blending map: IcosahedralShellMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 3038 4397 66 15 5 0 0 1 - void apply_macro_3D( real_t* RESTRICT _data_dstEdge, - real_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - real_t* RESTRICT _data_srcEdge, - real_t* RESTRICT _data_srcVertex, - real_t forVertex_0, - real_t forVertex_1, - real_t forVertex_2, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t rayVertex_2, - real_t refVertex_0, - real_t refVertex_1, - real_t refVertex_2, - real_t thrVertex_0, - real_t thrVertex_1, - real_t thrVertex_2 ) const; - /// Kernel type: toMatrix + void apply_P2ElementwiseEpsilonIcosahedralShellMap_0_0_macro_3D( real_t* RESTRICT _data_dstEdge, + real_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + real_t* RESTRICT _data_srcEdge, + real_t* RESTRICT _data_srcVertex, + real_t forVertex_0, + real_t forVertex_1, + real_t forVertex_2, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t rayVertex_2, + real_t refVertex_0, + real_t refVertex_1, + real_t refVertex_2, + real_t thrVertex_0, + real_t thrVertex_1, + real_t thrVertex_2 ) const; + + /// Integral: P2ElementwiseEpsilonIcosahedralShellMap_0_0 + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: toMatrix + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 3 | points: 5, degree: 3 + /// - blending map: IcosahedralShellMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 2938 4297 66 15 5 0 0 4 - void toMatrix_macro_3D( idx_t* RESTRICT _data_dstEdge, - idx_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - idx_t* RESTRICT _data_srcEdge, - idx_t* RESTRICT _data_srcVertex, - real_t forVertex_0, - real_t forVertex_1, - real_t forVertex_2, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - std::shared_ptr< SparseMatrixProxy > mat, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t rayVertex_2, - real_t refVertex_0, - real_t refVertex_1, - real_t refVertex_2, - real_t thrVertex_0, - real_t thrVertex_1, - real_t thrVertex_2 ) const; - /// Kernel type: computeInverseDiagonalOperatorValues + void toMatrix_P2ElementwiseEpsilonIcosahedralShellMap_0_0_macro_3D( idx_t* RESTRICT _data_dstEdge, + idx_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + idx_t* RESTRICT _data_srcEdge, + idx_t* RESTRICT _data_srcVertex, + real_t forVertex_0, + real_t forVertex_1, + real_t forVertex_2, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + std::shared_ptr< SparseMatrixProxy > mat, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t rayVertex_2, + real_t refVertex_0, + real_t refVertex_1, + real_t refVertex_2, + real_t thrVertex_0, + real_t thrVertex_1, + real_t thrVertex_2 ) const; + + /// Integral: P2ElementwiseEpsilonIcosahedralShellMap_0_0 + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: computeInverseDiagonalOperatorValues + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 3 | points: 5, degree: 3 + /// - blending map: IcosahedralShellMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 2048 3397 66 15 5 0 0 1 - void computeInverseDiagonalOperatorValues_macro_3D( real_t* RESTRICT _data_invDiag_Edge, - real_t* RESTRICT _data_invDiag_Vertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - real_t forVertex_0, - real_t forVertex_1, - real_t forVertex_2, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t rayVertex_2, - real_t refVertex_0, - real_t refVertex_1, - real_t refVertex_2, - real_t thrVertex_0, - real_t thrVertex_1, - real_t thrVertex_2 ) const; + void computeInverseDiagonalOperatorValues_P2ElementwiseEpsilonIcosahedralShellMap_0_0_macro_3D( + real_t* RESTRICT _data_invDiag_Edge, + real_t* RESTRICT _data_invDiag_Vertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + real_t forVertex_0, + real_t forVertex_1, + real_t forVertex_2, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t rayVertex_2, + real_t refVertex_0, + real_t refVertex_1, + real_t refVertex_2, + real_t thrVertex_0, + real_t thrVertex_1, + real_t thrVertex_2 ) const; std::shared_ptr< P2Function< real_t > > invDiag_; P2Function< real_t > mu; diff --git a/operators/epsilon/P2ElementwiseEpsilonIcosahedralShellMap_0_1.cpp b/operators/epsilon/P2ElementwiseEpsilonIcosahedralShellMap_0_1.cpp index 94573669..bb2a2f72 100644 --- a/operators/epsilon/P2ElementwiseEpsilonIcosahedralShellMap_0_1.cpp +++ b/operators/epsilon/P2ElementwiseEpsilonIcosahedralShellMap_0_1.cpp @@ -154,7 +154,7 @@ void P2ElementwiseEpsilonIcosahedralShellMap_0_1::apply( const P2Function< real_ this->timingTree_->start( "kernel" ); - apply_macro_3D( + apply_P2ElementwiseEpsilonIcosahedralShellMap_0_1_macro_3D( _data_dstEdge, _data_dstVertex, @@ -190,6 +190,7 @@ void P2ElementwiseEpsilonIcosahedralShellMap_0_1::apply( const P2Function< real_ thrVertex_0, thrVertex_1, thrVertex_2 ); + this->timingTree_->stop( "kernel" ); } @@ -285,7 +286,7 @@ void P2ElementwiseEpsilonIcosahedralShellMap_0_1::toMatrix( const std::shared_pt this->timingTree_->start( "kernel" ); - toMatrix_macro_3D( + toMatrix_P2ElementwiseEpsilonIcosahedralShellMap_0_1_macro_3D( _data_dstEdge, _data_dstVertex, @@ -322,6 +323,7 @@ void P2ElementwiseEpsilonIcosahedralShellMap_0_1::toMatrix( const std::shared_pt thrVertex_0, thrVertex_1, thrVertex_2 ); + this->timingTree_->stop( "kernel" ); } } diff --git a/operators/epsilon/P2ElementwiseEpsilonIcosahedralShellMap_0_1.hpp b/operators/epsilon/P2ElementwiseEpsilonIcosahedralShellMap_0_1.hpp index 9dae9aad..f477f5d3 100644 --- a/operators/epsilon/P2ElementwiseEpsilonIcosahedralShellMap_0_1.hpp +++ b/operators/epsilon/P2ElementwiseEpsilonIcosahedralShellMap_0_1.hpp @@ -29,6 +29,7 @@ #include "core/DataTypes.h" #include "hyteg/LikwidWrapper.hpp" +#include "hyteg/boundary/BoundaryConditions.hpp" #include "hyteg/communication/Syncing.hpp" #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp" #include "hyteg/geometry/IcosahedralShellMap.hpp" @@ -36,6 +37,7 @@ #include "hyteg/p2functionspace/P2Function.hpp" #include "hyteg/primitivestorage/PrimitiveStorage.hpp" #include "hyteg/sparseassembly/SparseMatrixProxy.hpp" +#include "hyteg/types/types.hpp" #define FUNC_PREFIX @@ -83,87 +85,96 @@ class P2ElementwiseEpsilonIcosahedralShellMap_0_1 : public Operator< P2Function< protected: private: - /// Kernel type: apply + /// Integral: P2ElementwiseEpsilonIcosahedralShellMap_0_1 + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: apply + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 3 | points: 5, degree: 3 + /// - blending map: IcosahedralShellMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 3583 4397 66 15 5 0 0 1 - void apply_macro_3D( real_t* RESTRICT _data_dstEdge, - real_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - real_t* RESTRICT _data_srcEdge, - real_t* RESTRICT _data_srcVertex, - real_t forVertex_0, - real_t forVertex_1, - real_t forVertex_2, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t rayVertex_2, - real_t refVertex_0, - real_t refVertex_1, - real_t refVertex_2, - real_t thrVertex_0, - real_t thrVertex_1, - real_t thrVertex_2 ) const; - /// Kernel type: toMatrix + void apply_P2ElementwiseEpsilonIcosahedralShellMap_0_1_macro_3D( real_t* RESTRICT _data_dstEdge, + real_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + real_t* RESTRICT _data_srcEdge, + real_t* RESTRICT _data_srcVertex, + real_t forVertex_0, + real_t forVertex_1, + real_t forVertex_2, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t rayVertex_2, + real_t refVertex_0, + real_t refVertex_1, + real_t refVertex_2, + real_t thrVertex_0, + real_t thrVertex_1, + real_t thrVertex_2 ) const; + + /// Integral: P2ElementwiseEpsilonIcosahedralShellMap_0_1 + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: toMatrix + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 3 | points: 5, degree: 3 + /// - blending map: IcosahedralShellMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 3483 4297 66 15 5 0 0 4 - void toMatrix_macro_3D( idx_t* RESTRICT _data_dstEdge, - idx_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - idx_t* RESTRICT _data_srcEdge, - idx_t* RESTRICT _data_srcVertex, - real_t forVertex_0, - real_t forVertex_1, - real_t forVertex_2, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - std::shared_ptr< SparseMatrixProxy > mat, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t rayVertex_2, - real_t refVertex_0, - real_t refVertex_1, - real_t refVertex_2, - real_t thrVertex_0, - real_t thrVertex_1, - real_t thrVertex_2 ) const; + void toMatrix_P2ElementwiseEpsilonIcosahedralShellMap_0_1_macro_3D( idx_t* RESTRICT _data_dstEdge, + idx_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + idx_t* RESTRICT _data_srcEdge, + idx_t* RESTRICT _data_srcVertex, + real_t forVertex_0, + real_t forVertex_1, + real_t forVertex_2, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + std::shared_ptr< SparseMatrixProxy > mat, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t rayVertex_2, + real_t refVertex_0, + real_t refVertex_1, + real_t refVertex_2, + real_t thrVertex_0, + real_t thrVertex_1, + real_t thrVertex_2 ) const; P2Function< real_t > mu; }; diff --git a/operators/epsilon/P2ElementwiseEpsilonIcosahedralShellMap_0_2.cpp b/operators/epsilon/P2ElementwiseEpsilonIcosahedralShellMap_0_2.cpp index bd6716ee..2bcf6f1f 100644 --- a/operators/epsilon/P2ElementwiseEpsilonIcosahedralShellMap_0_2.cpp +++ b/operators/epsilon/P2ElementwiseEpsilonIcosahedralShellMap_0_2.cpp @@ -154,7 +154,7 @@ void P2ElementwiseEpsilonIcosahedralShellMap_0_2::apply( const P2Function< real_ this->timingTree_->start( "kernel" ); - apply_macro_3D( + apply_P2ElementwiseEpsilonIcosahedralShellMap_0_2_macro_3D( _data_dstEdge, _data_dstVertex, @@ -190,6 +190,7 @@ void P2ElementwiseEpsilonIcosahedralShellMap_0_2::apply( const P2Function< real_ thrVertex_0, thrVertex_1, thrVertex_2 ); + this->timingTree_->stop( "kernel" ); } @@ -285,7 +286,7 @@ void P2ElementwiseEpsilonIcosahedralShellMap_0_2::toMatrix( const std::shared_pt this->timingTree_->start( "kernel" ); - toMatrix_macro_3D( + toMatrix_P2ElementwiseEpsilonIcosahedralShellMap_0_2_macro_3D( _data_dstEdge, _data_dstVertex, @@ -322,6 +323,7 @@ void P2ElementwiseEpsilonIcosahedralShellMap_0_2::toMatrix( const std::shared_pt thrVertex_0, thrVertex_1, thrVertex_2 ); + this->timingTree_->stop( "kernel" ); } } diff --git a/operators/epsilon/P2ElementwiseEpsilonIcosahedralShellMap_0_2.hpp b/operators/epsilon/P2ElementwiseEpsilonIcosahedralShellMap_0_2.hpp index 91701d41..269b0394 100644 --- a/operators/epsilon/P2ElementwiseEpsilonIcosahedralShellMap_0_2.hpp +++ b/operators/epsilon/P2ElementwiseEpsilonIcosahedralShellMap_0_2.hpp @@ -29,6 +29,7 @@ #include "core/DataTypes.h" #include "hyteg/LikwidWrapper.hpp" +#include "hyteg/boundary/BoundaryConditions.hpp" #include "hyteg/communication/Syncing.hpp" #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp" #include "hyteg/geometry/IcosahedralShellMap.hpp" @@ -36,6 +37,7 @@ #include "hyteg/p2functionspace/P2Function.hpp" #include "hyteg/primitivestorage/PrimitiveStorage.hpp" #include "hyteg/sparseassembly/SparseMatrixProxy.hpp" +#include "hyteg/types/types.hpp" #define FUNC_PREFIX @@ -83,87 +85,96 @@ class P2ElementwiseEpsilonIcosahedralShellMap_0_2 : public Operator< P2Function< protected: private: - /// Kernel type: apply + /// Integral: P2ElementwiseEpsilonIcosahedralShellMap_0_2 + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: apply + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 3 | points: 5, degree: 3 + /// - blending map: IcosahedralShellMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 3583 4397 66 15 5 0 0 1 - void apply_macro_3D( real_t* RESTRICT _data_dstEdge, - real_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - real_t* RESTRICT _data_srcEdge, - real_t* RESTRICT _data_srcVertex, - real_t forVertex_0, - real_t forVertex_1, - real_t forVertex_2, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t rayVertex_2, - real_t refVertex_0, - real_t refVertex_1, - real_t refVertex_2, - real_t thrVertex_0, - real_t thrVertex_1, - real_t thrVertex_2 ) const; - /// Kernel type: toMatrix + void apply_P2ElementwiseEpsilonIcosahedralShellMap_0_2_macro_3D( real_t* RESTRICT _data_dstEdge, + real_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + real_t* RESTRICT _data_srcEdge, + real_t* RESTRICT _data_srcVertex, + real_t forVertex_0, + real_t forVertex_1, + real_t forVertex_2, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t rayVertex_2, + real_t refVertex_0, + real_t refVertex_1, + real_t refVertex_2, + real_t thrVertex_0, + real_t thrVertex_1, + real_t thrVertex_2 ) const; + + /// Integral: P2ElementwiseEpsilonIcosahedralShellMap_0_2 + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: toMatrix + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 3 | points: 5, degree: 3 + /// - blending map: IcosahedralShellMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 3483 4297 66 15 5 0 0 4 - void toMatrix_macro_3D( idx_t* RESTRICT _data_dstEdge, - idx_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - idx_t* RESTRICT _data_srcEdge, - idx_t* RESTRICT _data_srcVertex, - real_t forVertex_0, - real_t forVertex_1, - real_t forVertex_2, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - std::shared_ptr< SparseMatrixProxy > mat, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t rayVertex_2, - real_t refVertex_0, - real_t refVertex_1, - real_t refVertex_2, - real_t thrVertex_0, - real_t thrVertex_1, - real_t thrVertex_2 ) const; + void toMatrix_P2ElementwiseEpsilonIcosahedralShellMap_0_2_macro_3D( idx_t* RESTRICT _data_dstEdge, + idx_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + idx_t* RESTRICT _data_srcEdge, + idx_t* RESTRICT _data_srcVertex, + real_t forVertex_0, + real_t forVertex_1, + real_t forVertex_2, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + std::shared_ptr< SparseMatrixProxy > mat, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t rayVertex_2, + real_t refVertex_0, + real_t refVertex_1, + real_t refVertex_2, + real_t thrVertex_0, + real_t thrVertex_1, + real_t thrVertex_2 ) const; P2Function< real_t > mu; }; diff --git a/operators/epsilon/P2ElementwiseEpsilonIcosahedralShellMap_1_0.cpp b/operators/epsilon/P2ElementwiseEpsilonIcosahedralShellMap_1_0.cpp index f4acec7b..0851bfd7 100644 --- a/operators/epsilon/P2ElementwiseEpsilonIcosahedralShellMap_1_0.cpp +++ b/operators/epsilon/P2ElementwiseEpsilonIcosahedralShellMap_1_0.cpp @@ -154,7 +154,7 @@ void P2ElementwiseEpsilonIcosahedralShellMap_1_0::apply( const P2Function< real_ this->timingTree_->start( "kernel" ); - apply_macro_3D( + apply_P2ElementwiseEpsilonIcosahedralShellMap_1_0_macro_3D( _data_dstEdge, _data_dstVertex, @@ -190,6 +190,7 @@ void P2ElementwiseEpsilonIcosahedralShellMap_1_0::apply( const P2Function< real_ thrVertex_0, thrVertex_1, thrVertex_2 ); + this->timingTree_->stop( "kernel" ); } @@ -285,7 +286,7 @@ void P2ElementwiseEpsilonIcosahedralShellMap_1_0::toMatrix( const std::shared_pt this->timingTree_->start( "kernel" ); - toMatrix_macro_3D( + toMatrix_P2ElementwiseEpsilonIcosahedralShellMap_1_0_macro_3D( _data_dstEdge, _data_dstVertex, @@ -322,6 +323,7 @@ void P2ElementwiseEpsilonIcosahedralShellMap_1_0::toMatrix( const std::shared_pt thrVertex_0, thrVertex_1, thrVertex_2 ); + this->timingTree_->stop( "kernel" ); } } diff --git a/operators/epsilon/P2ElementwiseEpsilonIcosahedralShellMap_1_0.hpp b/operators/epsilon/P2ElementwiseEpsilonIcosahedralShellMap_1_0.hpp index 832bb4d7..26e41a15 100644 --- a/operators/epsilon/P2ElementwiseEpsilonIcosahedralShellMap_1_0.hpp +++ b/operators/epsilon/P2ElementwiseEpsilonIcosahedralShellMap_1_0.hpp @@ -29,6 +29,7 @@ #include "core/DataTypes.h" #include "hyteg/LikwidWrapper.hpp" +#include "hyteg/boundary/BoundaryConditions.hpp" #include "hyteg/communication/Syncing.hpp" #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp" #include "hyteg/geometry/IcosahedralShellMap.hpp" @@ -36,6 +37,7 @@ #include "hyteg/p2functionspace/P2Function.hpp" #include "hyteg/primitivestorage/PrimitiveStorage.hpp" #include "hyteg/sparseassembly/SparseMatrixProxy.hpp" +#include "hyteg/types/types.hpp" #define FUNC_PREFIX @@ -83,87 +85,96 @@ class P2ElementwiseEpsilonIcosahedralShellMap_1_0 : public Operator< P2Function< protected: private: - /// Kernel type: apply + /// Integral: P2ElementwiseEpsilonIcosahedralShellMap_1_0 + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: apply + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 3 | points: 5, degree: 3 + /// - blending map: IcosahedralShellMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 3583 4397 66 15 5 0 0 1 - void apply_macro_3D( real_t* RESTRICT _data_dstEdge, - real_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - real_t* RESTRICT _data_srcEdge, - real_t* RESTRICT _data_srcVertex, - real_t forVertex_0, - real_t forVertex_1, - real_t forVertex_2, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t rayVertex_2, - real_t refVertex_0, - real_t refVertex_1, - real_t refVertex_2, - real_t thrVertex_0, - real_t thrVertex_1, - real_t thrVertex_2 ) const; - /// Kernel type: toMatrix + void apply_P2ElementwiseEpsilonIcosahedralShellMap_1_0_macro_3D( real_t* RESTRICT _data_dstEdge, + real_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + real_t* RESTRICT _data_srcEdge, + real_t* RESTRICT _data_srcVertex, + real_t forVertex_0, + real_t forVertex_1, + real_t forVertex_2, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t rayVertex_2, + real_t refVertex_0, + real_t refVertex_1, + real_t refVertex_2, + real_t thrVertex_0, + real_t thrVertex_1, + real_t thrVertex_2 ) const; + + /// Integral: P2ElementwiseEpsilonIcosahedralShellMap_1_0 + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: toMatrix + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 3 | points: 5, degree: 3 + /// - blending map: IcosahedralShellMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 3483 4297 66 15 5 0 0 4 - void toMatrix_macro_3D( idx_t* RESTRICT _data_dstEdge, - idx_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - idx_t* RESTRICT _data_srcEdge, - idx_t* RESTRICT _data_srcVertex, - real_t forVertex_0, - real_t forVertex_1, - real_t forVertex_2, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - std::shared_ptr< SparseMatrixProxy > mat, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t rayVertex_2, - real_t refVertex_0, - real_t refVertex_1, - real_t refVertex_2, - real_t thrVertex_0, - real_t thrVertex_1, - real_t thrVertex_2 ) const; + void toMatrix_P2ElementwiseEpsilonIcosahedralShellMap_1_0_macro_3D( idx_t* RESTRICT _data_dstEdge, + idx_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + idx_t* RESTRICT _data_srcEdge, + idx_t* RESTRICT _data_srcVertex, + real_t forVertex_0, + real_t forVertex_1, + real_t forVertex_2, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + std::shared_ptr< SparseMatrixProxy > mat, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t rayVertex_2, + real_t refVertex_0, + real_t refVertex_1, + real_t refVertex_2, + real_t thrVertex_0, + real_t thrVertex_1, + real_t thrVertex_2 ) const; P2Function< real_t > mu; }; diff --git a/operators/epsilon/P2ElementwiseEpsilonIcosahedralShellMap_1_1.cpp b/operators/epsilon/P2ElementwiseEpsilonIcosahedralShellMap_1_1.cpp index 39237dc7..03369a4c 100644 --- a/operators/epsilon/P2ElementwiseEpsilonIcosahedralShellMap_1_1.cpp +++ b/operators/epsilon/P2ElementwiseEpsilonIcosahedralShellMap_1_1.cpp @@ -154,7 +154,7 @@ void P2ElementwiseEpsilonIcosahedralShellMap_1_1::apply( const P2Function< real_ this->timingTree_->start( "kernel" ); - apply_macro_3D( + apply_P2ElementwiseEpsilonIcosahedralShellMap_1_1_macro_3D( _data_dstEdge, _data_dstVertex, @@ -190,6 +190,7 @@ void P2ElementwiseEpsilonIcosahedralShellMap_1_1::apply( const P2Function< real_ thrVertex_0, thrVertex_1, thrVertex_2 ); + this->timingTree_->stop( "kernel" ); } @@ -285,7 +286,7 @@ void P2ElementwiseEpsilonIcosahedralShellMap_1_1::toMatrix( const std::shared_pt this->timingTree_->start( "kernel" ); - toMatrix_macro_3D( + toMatrix_P2ElementwiseEpsilonIcosahedralShellMap_1_1_macro_3D( _data_dstEdge, _data_dstVertex, @@ -322,6 +323,7 @@ void P2ElementwiseEpsilonIcosahedralShellMap_1_1::toMatrix( const std::shared_pt thrVertex_0, thrVertex_1, thrVertex_2 ); + this->timingTree_->stop( "kernel" ); } } @@ -401,7 +403,7 @@ void P2ElementwiseEpsilonIcosahedralShellMap_1_1::computeInverseDiagonalOperator this->timingTree_->start( "kernel" ); - computeInverseDiagonalOperatorValues_macro_3D( + computeInverseDiagonalOperatorValues_P2ElementwiseEpsilonIcosahedralShellMap_1_1_macro_3D( _data_invDiag_Edge, _data_invDiag_Vertex, @@ -435,6 +437,7 @@ void P2ElementwiseEpsilonIcosahedralShellMap_1_1::computeInverseDiagonalOperator thrVertex_0, thrVertex_1, thrVertex_2 ); + this->timingTree_->stop( "kernel" ); } diff --git a/operators/epsilon/P2ElementwiseEpsilonIcosahedralShellMap_1_1.hpp b/operators/epsilon/P2ElementwiseEpsilonIcosahedralShellMap_1_1.hpp index a22e0b98..9311dbc3 100644 --- a/operators/epsilon/P2ElementwiseEpsilonIcosahedralShellMap_1_1.hpp +++ b/operators/epsilon/P2ElementwiseEpsilonIcosahedralShellMap_1_1.hpp @@ -29,6 +29,7 @@ #include "core/DataTypes.h" #include "hyteg/LikwidWrapper.hpp" +#include "hyteg/boundary/BoundaryConditions.hpp" #include "hyteg/communication/Syncing.hpp" #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp" #include "hyteg/geometry/IcosahedralShellMap.hpp" @@ -37,6 +38,7 @@ #include "hyteg/primitivestorage/PrimitiveStorage.hpp" #include "hyteg/solvers/Smoothables.hpp" #include "hyteg/sparseassembly/SparseMatrixProxy.hpp" +#include "hyteg/types/types.hpp" #define FUNC_PREFIX @@ -89,125 +91,140 @@ class P2ElementwiseEpsilonIcosahedralShellMap_1_1 : public Operator< P2Function< protected: private: - /// Kernel type: apply + /// Integral: P2ElementwiseEpsilonIcosahedralShellMap_1_1 + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: apply + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 3 | points: 5, degree: 3 + /// - blending map: IcosahedralShellMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 3038 4397 66 15 5 0 0 1 - void apply_macro_3D( real_t* RESTRICT _data_dstEdge, - real_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - real_t* RESTRICT _data_srcEdge, - real_t* RESTRICT _data_srcVertex, - real_t forVertex_0, - real_t forVertex_1, - real_t forVertex_2, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t rayVertex_2, - real_t refVertex_0, - real_t refVertex_1, - real_t refVertex_2, - real_t thrVertex_0, - real_t thrVertex_1, - real_t thrVertex_2 ) const; - /// Kernel type: toMatrix + void apply_P2ElementwiseEpsilonIcosahedralShellMap_1_1_macro_3D( real_t* RESTRICT _data_dstEdge, + real_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + real_t* RESTRICT _data_srcEdge, + real_t* RESTRICT _data_srcVertex, + real_t forVertex_0, + real_t forVertex_1, + real_t forVertex_2, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t rayVertex_2, + real_t refVertex_0, + real_t refVertex_1, + real_t refVertex_2, + real_t thrVertex_0, + real_t thrVertex_1, + real_t thrVertex_2 ) const; + + /// Integral: P2ElementwiseEpsilonIcosahedralShellMap_1_1 + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: toMatrix + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 3 | points: 5, degree: 3 + /// - blending map: IcosahedralShellMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 2938 4297 66 15 5 0 0 4 - void toMatrix_macro_3D( idx_t* RESTRICT _data_dstEdge, - idx_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - idx_t* RESTRICT _data_srcEdge, - idx_t* RESTRICT _data_srcVertex, - real_t forVertex_0, - real_t forVertex_1, - real_t forVertex_2, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - std::shared_ptr< SparseMatrixProxy > mat, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t rayVertex_2, - real_t refVertex_0, - real_t refVertex_1, - real_t refVertex_2, - real_t thrVertex_0, - real_t thrVertex_1, - real_t thrVertex_2 ) const; - /// Kernel type: computeInverseDiagonalOperatorValues + void toMatrix_P2ElementwiseEpsilonIcosahedralShellMap_1_1_macro_3D( idx_t* RESTRICT _data_dstEdge, + idx_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + idx_t* RESTRICT _data_srcEdge, + idx_t* RESTRICT _data_srcVertex, + real_t forVertex_0, + real_t forVertex_1, + real_t forVertex_2, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + std::shared_ptr< SparseMatrixProxy > mat, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t rayVertex_2, + real_t refVertex_0, + real_t refVertex_1, + real_t refVertex_2, + real_t thrVertex_0, + real_t thrVertex_1, + real_t thrVertex_2 ) const; + + /// Integral: P2ElementwiseEpsilonIcosahedralShellMap_1_1 + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: computeInverseDiagonalOperatorValues + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 3 | points: 5, degree: 3 + /// - blending map: IcosahedralShellMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 2048 3397 66 15 5 0 0 1 - void computeInverseDiagonalOperatorValues_macro_3D( real_t* RESTRICT _data_invDiag_Edge, - real_t* RESTRICT _data_invDiag_Vertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - real_t forVertex_0, - real_t forVertex_1, - real_t forVertex_2, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t rayVertex_2, - real_t refVertex_0, - real_t refVertex_1, - real_t refVertex_2, - real_t thrVertex_0, - real_t thrVertex_1, - real_t thrVertex_2 ) const; + void computeInverseDiagonalOperatorValues_P2ElementwiseEpsilonIcosahedralShellMap_1_1_macro_3D( + real_t* RESTRICT _data_invDiag_Edge, + real_t* RESTRICT _data_invDiag_Vertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + real_t forVertex_0, + real_t forVertex_1, + real_t forVertex_2, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t rayVertex_2, + real_t refVertex_0, + real_t refVertex_1, + real_t refVertex_2, + real_t thrVertex_0, + real_t thrVertex_1, + real_t thrVertex_2 ) const; std::shared_ptr< P2Function< real_t > > invDiag_; P2Function< real_t > mu; diff --git a/operators/epsilon/P2ElementwiseEpsilonIcosahedralShellMap_1_2.cpp b/operators/epsilon/P2ElementwiseEpsilonIcosahedralShellMap_1_2.cpp index 4605790e..6b147ca6 100644 --- a/operators/epsilon/P2ElementwiseEpsilonIcosahedralShellMap_1_2.cpp +++ b/operators/epsilon/P2ElementwiseEpsilonIcosahedralShellMap_1_2.cpp @@ -154,7 +154,7 @@ void P2ElementwiseEpsilonIcosahedralShellMap_1_2::apply( const P2Function< real_ this->timingTree_->start( "kernel" ); - apply_macro_3D( + apply_P2ElementwiseEpsilonIcosahedralShellMap_1_2_macro_3D( _data_dstEdge, _data_dstVertex, @@ -190,6 +190,7 @@ void P2ElementwiseEpsilonIcosahedralShellMap_1_2::apply( const P2Function< real_ thrVertex_0, thrVertex_1, thrVertex_2 ); + this->timingTree_->stop( "kernel" ); } @@ -285,7 +286,7 @@ void P2ElementwiseEpsilonIcosahedralShellMap_1_2::toMatrix( const std::shared_pt this->timingTree_->start( "kernel" ); - toMatrix_macro_3D( + toMatrix_P2ElementwiseEpsilonIcosahedralShellMap_1_2_macro_3D( _data_dstEdge, _data_dstVertex, @@ -322,6 +323,7 @@ void P2ElementwiseEpsilonIcosahedralShellMap_1_2::toMatrix( const std::shared_pt thrVertex_0, thrVertex_1, thrVertex_2 ); + this->timingTree_->stop( "kernel" ); } } diff --git a/operators/epsilon/P2ElementwiseEpsilonIcosahedralShellMap_1_2.hpp b/operators/epsilon/P2ElementwiseEpsilonIcosahedralShellMap_1_2.hpp index 5592492c..9e94c249 100644 --- a/operators/epsilon/P2ElementwiseEpsilonIcosahedralShellMap_1_2.hpp +++ b/operators/epsilon/P2ElementwiseEpsilonIcosahedralShellMap_1_2.hpp @@ -29,6 +29,7 @@ #include "core/DataTypes.h" #include "hyteg/LikwidWrapper.hpp" +#include "hyteg/boundary/BoundaryConditions.hpp" #include "hyteg/communication/Syncing.hpp" #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp" #include "hyteg/geometry/IcosahedralShellMap.hpp" @@ -36,6 +37,7 @@ #include "hyteg/p2functionspace/P2Function.hpp" #include "hyteg/primitivestorage/PrimitiveStorage.hpp" #include "hyteg/sparseassembly/SparseMatrixProxy.hpp" +#include "hyteg/types/types.hpp" #define FUNC_PREFIX @@ -83,87 +85,96 @@ class P2ElementwiseEpsilonIcosahedralShellMap_1_2 : public Operator< P2Function< protected: private: - /// Kernel type: apply + /// Integral: P2ElementwiseEpsilonIcosahedralShellMap_1_2 + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: apply + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 3 | points: 5, degree: 3 + /// - blending map: IcosahedralShellMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 3583 4397 66 15 5 0 0 1 - void apply_macro_3D( real_t* RESTRICT _data_dstEdge, - real_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - real_t* RESTRICT _data_srcEdge, - real_t* RESTRICT _data_srcVertex, - real_t forVertex_0, - real_t forVertex_1, - real_t forVertex_2, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t rayVertex_2, - real_t refVertex_0, - real_t refVertex_1, - real_t refVertex_2, - real_t thrVertex_0, - real_t thrVertex_1, - real_t thrVertex_2 ) const; - /// Kernel type: toMatrix + void apply_P2ElementwiseEpsilonIcosahedralShellMap_1_2_macro_3D( real_t* RESTRICT _data_dstEdge, + real_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + real_t* RESTRICT _data_srcEdge, + real_t* RESTRICT _data_srcVertex, + real_t forVertex_0, + real_t forVertex_1, + real_t forVertex_2, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t rayVertex_2, + real_t refVertex_0, + real_t refVertex_1, + real_t refVertex_2, + real_t thrVertex_0, + real_t thrVertex_1, + real_t thrVertex_2 ) const; + + /// Integral: P2ElementwiseEpsilonIcosahedralShellMap_1_2 + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: toMatrix + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 3 | points: 5, degree: 3 + /// - blending map: IcosahedralShellMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 3483 4297 66 15 5 0 0 4 - void toMatrix_macro_3D( idx_t* RESTRICT _data_dstEdge, - idx_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - idx_t* RESTRICT _data_srcEdge, - idx_t* RESTRICT _data_srcVertex, - real_t forVertex_0, - real_t forVertex_1, - real_t forVertex_2, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - std::shared_ptr< SparseMatrixProxy > mat, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t rayVertex_2, - real_t refVertex_0, - real_t refVertex_1, - real_t refVertex_2, - real_t thrVertex_0, - real_t thrVertex_1, - real_t thrVertex_2 ) const; + void toMatrix_P2ElementwiseEpsilonIcosahedralShellMap_1_2_macro_3D( idx_t* RESTRICT _data_dstEdge, + idx_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + idx_t* RESTRICT _data_srcEdge, + idx_t* RESTRICT _data_srcVertex, + real_t forVertex_0, + real_t forVertex_1, + real_t forVertex_2, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + std::shared_ptr< SparseMatrixProxy > mat, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t rayVertex_2, + real_t refVertex_0, + real_t refVertex_1, + real_t refVertex_2, + real_t thrVertex_0, + real_t thrVertex_1, + real_t thrVertex_2 ) const; P2Function< real_t > mu; }; diff --git a/operators/epsilon/P2ElementwiseEpsilonIcosahedralShellMap_2_0.cpp b/operators/epsilon/P2ElementwiseEpsilonIcosahedralShellMap_2_0.cpp index e065d633..27352945 100644 --- a/operators/epsilon/P2ElementwiseEpsilonIcosahedralShellMap_2_0.cpp +++ b/operators/epsilon/P2ElementwiseEpsilonIcosahedralShellMap_2_0.cpp @@ -154,7 +154,7 @@ void P2ElementwiseEpsilonIcosahedralShellMap_2_0::apply( const P2Function< real_ this->timingTree_->start( "kernel" ); - apply_macro_3D( + apply_P2ElementwiseEpsilonIcosahedralShellMap_2_0_macro_3D( _data_dstEdge, _data_dstVertex, @@ -190,6 +190,7 @@ void P2ElementwiseEpsilonIcosahedralShellMap_2_0::apply( const P2Function< real_ thrVertex_0, thrVertex_1, thrVertex_2 ); + this->timingTree_->stop( "kernel" ); } @@ -285,7 +286,7 @@ void P2ElementwiseEpsilonIcosahedralShellMap_2_0::toMatrix( const std::shared_pt this->timingTree_->start( "kernel" ); - toMatrix_macro_3D( + toMatrix_P2ElementwiseEpsilonIcosahedralShellMap_2_0_macro_3D( _data_dstEdge, _data_dstVertex, @@ -322,6 +323,7 @@ void P2ElementwiseEpsilonIcosahedralShellMap_2_0::toMatrix( const std::shared_pt thrVertex_0, thrVertex_1, thrVertex_2 ); + this->timingTree_->stop( "kernel" ); } } diff --git a/operators/epsilon/P2ElementwiseEpsilonIcosahedralShellMap_2_0.hpp b/operators/epsilon/P2ElementwiseEpsilonIcosahedralShellMap_2_0.hpp index 8ae0e66c..a2020310 100644 --- a/operators/epsilon/P2ElementwiseEpsilonIcosahedralShellMap_2_0.hpp +++ b/operators/epsilon/P2ElementwiseEpsilonIcosahedralShellMap_2_0.hpp @@ -29,6 +29,7 @@ #include "core/DataTypes.h" #include "hyteg/LikwidWrapper.hpp" +#include "hyteg/boundary/BoundaryConditions.hpp" #include "hyteg/communication/Syncing.hpp" #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp" #include "hyteg/geometry/IcosahedralShellMap.hpp" @@ -36,6 +37,7 @@ #include "hyteg/p2functionspace/P2Function.hpp" #include "hyteg/primitivestorage/PrimitiveStorage.hpp" #include "hyteg/sparseassembly/SparseMatrixProxy.hpp" +#include "hyteg/types/types.hpp" #define FUNC_PREFIX @@ -83,87 +85,96 @@ class P2ElementwiseEpsilonIcosahedralShellMap_2_0 : public Operator< P2Function< protected: private: - /// Kernel type: apply + /// Integral: P2ElementwiseEpsilonIcosahedralShellMap_2_0 + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: apply + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 3 | points: 5, degree: 3 + /// - blending map: IcosahedralShellMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 3583 4397 66 15 5 0 0 1 - void apply_macro_3D( real_t* RESTRICT _data_dstEdge, - real_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - real_t* RESTRICT _data_srcEdge, - real_t* RESTRICT _data_srcVertex, - real_t forVertex_0, - real_t forVertex_1, - real_t forVertex_2, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t rayVertex_2, - real_t refVertex_0, - real_t refVertex_1, - real_t refVertex_2, - real_t thrVertex_0, - real_t thrVertex_1, - real_t thrVertex_2 ) const; - /// Kernel type: toMatrix + void apply_P2ElementwiseEpsilonIcosahedralShellMap_2_0_macro_3D( real_t* RESTRICT _data_dstEdge, + real_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + real_t* RESTRICT _data_srcEdge, + real_t* RESTRICT _data_srcVertex, + real_t forVertex_0, + real_t forVertex_1, + real_t forVertex_2, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t rayVertex_2, + real_t refVertex_0, + real_t refVertex_1, + real_t refVertex_2, + real_t thrVertex_0, + real_t thrVertex_1, + real_t thrVertex_2 ) const; + + /// Integral: P2ElementwiseEpsilonIcosahedralShellMap_2_0 + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: toMatrix + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 3 | points: 5, degree: 3 + /// - blending map: IcosahedralShellMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 3483 4297 66 15 5 0 0 4 - void toMatrix_macro_3D( idx_t* RESTRICT _data_dstEdge, - idx_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - idx_t* RESTRICT _data_srcEdge, - idx_t* RESTRICT _data_srcVertex, - real_t forVertex_0, - real_t forVertex_1, - real_t forVertex_2, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - std::shared_ptr< SparseMatrixProxy > mat, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t rayVertex_2, - real_t refVertex_0, - real_t refVertex_1, - real_t refVertex_2, - real_t thrVertex_0, - real_t thrVertex_1, - real_t thrVertex_2 ) const; + void toMatrix_P2ElementwiseEpsilonIcosahedralShellMap_2_0_macro_3D( idx_t* RESTRICT _data_dstEdge, + idx_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + idx_t* RESTRICT _data_srcEdge, + idx_t* RESTRICT _data_srcVertex, + real_t forVertex_0, + real_t forVertex_1, + real_t forVertex_2, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + std::shared_ptr< SparseMatrixProxy > mat, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t rayVertex_2, + real_t refVertex_0, + real_t refVertex_1, + real_t refVertex_2, + real_t thrVertex_0, + real_t thrVertex_1, + real_t thrVertex_2 ) const; P2Function< real_t > mu; }; diff --git a/operators/epsilon/P2ElementwiseEpsilonIcosahedralShellMap_2_1.cpp b/operators/epsilon/P2ElementwiseEpsilonIcosahedralShellMap_2_1.cpp index f5cc8cd0..92a66ed3 100644 --- a/operators/epsilon/P2ElementwiseEpsilonIcosahedralShellMap_2_1.cpp +++ b/operators/epsilon/P2ElementwiseEpsilonIcosahedralShellMap_2_1.cpp @@ -154,7 +154,7 @@ void P2ElementwiseEpsilonIcosahedralShellMap_2_1::apply( const P2Function< real_ this->timingTree_->start( "kernel" ); - apply_macro_3D( + apply_P2ElementwiseEpsilonIcosahedralShellMap_2_1_macro_3D( _data_dstEdge, _data_dstVertex, @@ -190,6 +190,7 @@ void P2ElementwiseEpsilonIcosahedralShellMap_2_1::apply( const P2Function< real_ thrVertex_0, thrVertex_1, thrVertex_2 ); + this->timingTree_->stop( "kernel" ); } @@ -285,7 +286,7 @@ void P2ElementwiseEpsilonIcosahedralShellMap_2_1::toMatrix( const std::shared_pt this->timingTree_->start( "kernel" ); - toMatrix_macro_3D( + toMatrix_P2ElementwiseEpsilonIcosahedralShellMap_2_1_macro_3D( _data_dstEdge, _data_dstVertex, @@ -322,6 +323,7 @@ void P2ElementwiseEpsilonIcosahedralShellMap_2_1::toMatrix( const std::shared_pt thrVertex_0, thrVertex_1, thrVertex_2 ); + this->timingTree_->stop( "kernel" ); } } diff --git a/operators/epsilon/P2ElementwiseEpsilonIcosahedralShellMap_2_1.hpp b/operators/epsilon/P2ElementwiseEpsilonIcosahedralShellMap_2_1.hpp index 324284a6..db912ccf 100644 --- a/operators/epsilon/P2ElementwiseEpsilonIcosahedralShellMap_2_1.hpp +++ b/operators/epsilon/P2ElementwiseEpsilonIcosahedralShellMap_2_1.hpp @@ -29,6 +29,7 @@ #include "core/DataTypes.h" #include "hyteg/LikwidWrapper.hpp" +#include "hyteg/boundary/BoundaryConditions.hpp" #include "hyteg/communication/Syncing.hpp" #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp" #include "hyteg/geometry/IcosahedralShellMap.hpp" @@ -36,6 +37,7 @@ #include "hyteg/p2functionspace/P2Function.hpp" #include "hyteg/primitivestorage/PrimitiveStorage.hpp" #include "hyteg/sparseassembly/SparseMatrixProxy.hpp" +#include "hyteg/types/types.hpp" #define FUNC_PREFIX @@ -83,87 +85,96 @@ class P2ElementwiseEpsilonIcosahedralShellMap_2_1 : public Operator< P2Function< protected: private: - /// Kernel type: apply + /// Integral: P2ElementwiseEpsilonIcosahedralShellMap_2_1 + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: apply + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 3 | points: 5, degree: 3 + /// - blending map: IcosahedralShellMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 3583 4397 66 15 5 0 0 1 - void apply_macro_3D( real_t* RESTRICT _data_dstEdge, - real_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - real_t* RESTRICT _data_srcEdge, - real_t* RESTRICT _data_srcVertex, - real_t forVertex_0, - real_t forVertex_1, - real_t forVertex_2, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t rayVertex_2, - real_t refVertex_0, - real_t refVertex_1, - real_t refVertex_2, - real_t thrVertex_0, - real_t thrVertex_1, - real_t thrVertex_2 ) const; - /// Kernel type: toMatrix + void apply_P2ElementwiseEpsilonIcosahedralShellMap_2_1_macro_3D( real_t* RESTRICT _data_dstEdge, + real_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + real_t* RESTRICT _data_srcEdge, + real_t* RESTRICT _data_srcVertex, + real_t forVertex_0, + real_t forVertex_1, + real_t forVertex_2, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t rayVertex_2, + real_t refVertex_0, + real_t refVertex_1, + real_t refVertex_2, + real_t thrVertex_0, + real_t thrVertex_1, + real_t thrVertex_2 ) const; + + /// Integral: P2ElementwiseEpsilonIcosahedralShellMap_2_1 + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: toMatrix + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 3 | points: 5, degree: 3 + /// - blending map: IcosahedralShellMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 3483 4297 66 15 5 0 0 4 - void toMatrix_macro_3D( idx_t* RESTRICT _data_dstEdge, - idx_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - idx_t* RESTRICT _data_srcEdge, - idx_t* RESTRICT _data_srcVertex, - real_t forVertex_0, - real_t forVertex_1, - real_t forVertex_2, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - std::shared_ptr< SparseMatrixProxy > mat, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t rayVertex_2, - real_t refVertex_0, - real_t refVertex_1, - real_t refVertex_2, - real_t thrVertex_0, - real_t thrVertex_1, - real_t thrVertex_2 ) const; + void toMatrix_P2ElementwiseEpsilonIcosahedralShellMap_2_1_macro_3D( idx_t* RESTRICT _data_dstEdge, + idx_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + idx_t* RESTRICT _data_srcEdge, + idx_t* RESTRICT _data_srcVertex, + real_t forVertex_0, + real_t forVertex_1, + real_t forVertex_2, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + std::shared_ptr< SparseMatrixProxy > mat, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t rayVertex_2, + real_t refVertex_0, + real_t refVertex_1, + real_t refVertex_2, + real_t thrVertex_0, + real_t thrVertex_1, + real_t thrVertex_2 ) const; P2Function< real_t > mu; }; diff --git a/operators/epsilon/P2ElementwiseEpsilonIcosahedralShellMap_2_2.cpp b/operators/epsilon/P2ElementwiseEpsilonIcosahedralShellMap_2_2.cpp index 18938aa2..8052ed92 100644 --- a/operators/epsilon/P2ElementwiseEpsilonIcosahedralShellMap_2_2.cpp +++ b/operators/epsilon/P2ElementwiseEpsilonIcosahedralShellMap_2_2.cpp @@ -154,7 +154,7 @@ void P2ElementwiseEpsilonIcosahedralShellMap_2_2::apply( const P2Function< real_ this->timingTree_->start( "kernel" ); - apply_macro_3D( + apply_P2ElementwiseEpsilonIcosahedralShellMap_2_2_macro_3D( _data_dstEdge, _data_dstVertex, @@ -190,6 +190,7 @@ void P2ElementwiseEpsilonIcosahedralShellMap_2_2::apply( const P2Function< real_ thrVertex_0, thrVertex_1, thrVertex_2 ); + this->timingTree_->stop( "kernel" ); } @@ -285,7 +286,7 @@ void P2ElementwiseEpsilonIcosahedralShellMap_2_2::toMatrix( const std::shared_pt this->timingTree_->start( "kernel" ); - toMatrix_macro_3D( + toMatrix_P2ElementwiseEpsilonIcosahedralShellMap_2_2_macro_3D( _data_dstEdge, _data_dstVertex, @@ -322,6 +323,7 @@ void P2ElementwiseEpsilonIcosahedralShellMap_2_2::toMatrix( const std::shared_pt thrVertex_0, thrVertex_1, thrVertex_2 ); + this->timingTree_->stop( "kernel" ); } } @@ -401,7 +403,7 @@ void P2ElementwiseEpsilonIcosahedralShellMap_2_2::computeInverseDiagonalOperator this->timingTree_->start( "kernel" ); - computeInverseDiagonalOperatorValues_macro_3D( + computeInverseDiagonalOperatorValues_P2ElementwiseEpsilonIcosahedralShellMap_2_2_macro_3D( _data_invDiag_Edge, _data_invDiag_Vertex, @@ -435,6 +437,7 @@ void P2ElementwiseEpsilonIcosahedralShellMap_2_2::computeInverseDiagonalOperator thrVertex_0, thrVertex_1, thrVertex_2 ); + this->timingTree_->stop( "kernel" ); } diff --git a/operators/epsilon/P2ElementwiseEpsilonIcosahedralShellMap_2_2.hpp b/operators/epsilon/P2ElementwiseEpsilonIcosahedralShellMap_2_2.hpp index 49b5755f..ce9ba70f 100644 --- a/operators/epsilon/P2ElementwiseEpsilonIcosahedralShellMap_2_2.hpp +++ b/operators/epsilon/P2ElementwiseEpsilonIcosahedralShellMap_2_2.hpp @@ -29,6 +29,7 @@ #include "core/DataTypes.h" #include "hyteg/LikwidWrapper.hpp" +#include "hyteg/boundary/BoundaryConditions.hpp" #include "hyteg/communication/Syncing.hpp" #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp" #include "hyteg/geometry/IcosahedralShellMap.hpp" @@ -37,6 +38,7 @@ #include "hyteg/primitivestorage/PrimitiveStorage.hpp" #include "hyteg/solvers/Smoothables.hpp" #include "hyteg/sparseassembly/SparseMatrixProxy.hpp" +#include "hyteg/types/types.hpp" #define FUNC_PREFIX @@ -89,125 +91,140 @@ class P2ElementwiseEpsilonIcosahedralShellMap_2_2 : public Operator< P2Function< protected: private: - /// Kernel type: apply + /// Integral: P2ElementwiseEpsilonIcosahedralShellMap_2_2 + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: apply + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 3 | points: 5, degree: 3 + /// - blending map: IcosahedralShellMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 3038 4397 66 15 5 0 0 1 - void apply_macro_3D( real_t* RESTRICT _data_dstEdge, - real_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - real_t* RESTRICT _data_srcEdge, - real_t* RESTRICT _data_srcVertex, - real_t forVertex_0, - real_t forVertex_1, - real_t forVertex_2, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t rayVertex_2, - real_t refVertex_0, - real_t refVertex_1, - real_t refVertex_2, - real_t thrVertex_0, - real_t thrVertex_1, - real_t thrVertex_2 ) const; - /// Kernel type: toMatrix + void apply_P2ElementwiseEpsilonIcosahedralShellMap_2_2_macro_3D( real_t* RESTRICT _data_dstEdge, + real_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + real_t* RESTRICT _data_srcEdge, + real_t* RESTRICT _data_srcVertex, + real_t forVertex_0, + real_t forVertex_1, + real_t forVertex_2, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t rayVertex_2, + real_t refVertex_0, + real_t refVertex_1, + real_t refVertex_2, + real_t thrVertex_0, + real_t thrVertex_1, + real_t thrVertex_2 ) const; + + /// Integral: P2ElementwiseEpsilonIcosahedralShellMap_2_2 + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: toMatrix + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 3 | points: 5, degree: 3 + /// - blending map: IcosahedralShellMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 2938 4297 66 15 5 0 0 4 - void toMatrix_macro_3D( idx_t* RESTRICT _data_dstEdge, - idx_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - idx_t* RESTRICT _data_srcEdge, - idx_t* RESTRICT _data_srcVertex, - real_t forVertex_0, - real_t forVertex_1, - real_t forVertex_2, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - std::shared_ptr< SparseMatrixProxy > mat, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t rayVertex_2, - real_t refVertex_0, - real_t refVertex_1, - real_t refVertex_2, - real_t thrVertex_0, - real_t thrVertex_1, - real_t thrVertex_2 ) const; - /// Kernel type: computeInverseDiagonalOperatorValues + void toMatrix_P2ElementwiseEpsilonIcosahedralShellMap_2_2_macro_3D( idx_t* RESTRICT _data_dstEdge, + idx_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + idx_t* RESTRICT _data_srcEdge, + idx_t* RESTRICT _data_srcVertex, + real_t forVertex_0, + real_t forVertex_1, + real_t forVertex_2, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + std::shared_ptr< SparseMatrixProxy > mat, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t rayVertex_2, + real_t refVertex_0, + real_t refVertex_1, + real_t refVertex_2, + real_t thrVertex_0, + real_t thrVertex_1, + real_t thrVertex_2 ) const; + + /// Integral: P2ElementwiseEpsilonIcosahedralShellMap_2_2 + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: computeInverseDiagonalOperatorValues + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 3 | points: 5, degree: 3 + /// - blending map: IcosahedralShellMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 2048 3397 66 15 5 0 0 1 - void computeInverseDiagonalOperatorValues_macro_3D( real_t* RESTRICT _data_invDiag_Edge, - real_t* RESTRICT _data_invDiag_Vertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - real_t forVertex_0, - real_t forVertex_1, - real_t forVertex_2, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t rayVertex_2, - real_t refVertex_0, - real_t refVertex_1, - real_t refVertex_2, - real_t thrVertex_0, - real_t thrVertex_1, - real_t thrVertex_2 ) const; + void computeInverseDiagonalOperatorValues_P2ElementwiseEpsilonIcosahedralShellMap_2_2_macro_3D( + real_t* RESTRICT _data_invDiag_Edge, + real_t* RESTRICT _data_invDiag_Vertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + real_t forVertex_0, + real_t forVertex_1, + real_t forVertex_2, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t rayVertex_2, + real_t refVertex_0, + real_t refVertex_1, + real_t refVertex_2, + real_t thrVertex_0, + real_t thrVertex_1, + real_t thrVertex_2 ) const; std::shared_ptr< P2Function< real_t > > invDiag_; P2Function< real_t > mu; diff --git a/operators/epsilon/P2ElementwiseEpsilon_0_0.cpp b/operators/epsilon/P2ElementwiseEpsilon_0_0.cpp index a9ec5f85..0fdca154 100644 --- a/operators/epsilon/P2ElementwiseEpsilon_0_0.cpp +++ b/operators/epsilon/P2ElementwiseEpsilon_0_0.cpp @@ -137,7 +137,7 @@ void P2ElementwiseEpsilon_0_0::apply( const P2Function< real_t >& src, this->timingTree_->start( "kernel" ); - apply_macro_3D( + apply_P2ElementwiseEpsilon_0_0_macro_3D( _data_dstEdge, _data_dstVertex, @@ -159,6 +159,7 @@ void P2ElementwiseEpsilon_0_0::apply( const P2Function< real_t >& src, macro_vertex_coord_id_3comp2, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } @@ -228,7 +229,7 @@ void P2ElementwiseEpsilon_0_0::apply( const P2Function< real_t >& src, this->timingTree_->start( "kernel" ); - apply_macro_2D( + apply_P2ElementwiseEpsilon_0_0_macro_2D( _data_dstEdge, _data_dstVertex, @@ -244,6 +245,7 @@ void P2ElementwiseEpsilon_0_0::apply( const P2Function< real_t >& src, macro_vertex_coord_id_2comp1, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } @@ -314,7 +316,7 @@ void P2ElementwiseEpsilon_0_0::toMatrix( const std::shared_ptr< SparseMatrixProx this->timingTree_->start( "kernel" ); - toMatrix_macro_3D( + toMatrix_P2ElementwiseEpsilon_0_0_macro_3D( _data_dstEdge, _data_dstVertex, @@ -337,6 +339,7 @@ void P2ElementwiseEpsilon_0_0::toMatrix( const std::shared_ptr< SparseMatrixProx mat, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } } @@ -369,7 +372,7 @@ void P2ElementwiseEpsilon_0_0::toMatrix( const std::shared_ptr< SparseMatrixProx this->timingTree_->start( "kernel" ); - toMatrix_macro_2D( + toMatrix_P2ElementwiseEpsilon_0_0_macro_2D( _data_dstEdge, _data_dstVertex, @@ -386,6 +389,7 @@ void P2ElementwiseEpsilon_0_0::toMatrix( const std::shared_ptr< SparseMatrixProx mat, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } } @@ -440,7 +444,7 @@ void P2ElementwiseEpsilon_0_0::computeInverseDiagonalOperatorValues() this->timingTree_->start( "kernel" ); - computeInverseDiagonalOperatorValues_macro_3D( + computeInverseDiagonalOperatorValues_P2ElementwiseEpsilon_0_0_macro_3D( _data_invDiag_Edge, _data_invDiag_Vertex, @@ -460,6 +464,7 @@ void P2ElementwiseEpsilon_0_0::computeInverseDiagonalOperatorValues() macro_vertex_coord_id_3comp2, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } @@ -504,7 +509,7 @@ void P2ElementwiseEpsilon_0_0::computeInverseDiagonalOperatorValues() this->timingTree_->start( "kernel" ); - computeInverseDiagonalOperatorValues_macro_2D( + computeInverseDiagonalOperatorValues_P2ElementwiseEpsilon_0_0_macro_2D( _data_invDiag_Edge, _data_invDiag_Vertex, @@ -518,6 +523,7 @@ void P2ElementwiseEpsilon_0_0::computeInverseDiagonalOperatorValues() macro_vertex_coord_id_2comp1, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } diff --git a/operators/epsilon/P2ElementwiseEpsilon_0_0.hpp b/operators/epsilon/P2ElementwiseEpsilon_0_0.hpp index 0c72a0ab..2c51d0da 100644 --- a/operators/epsilon/P2ElementwiseEpsilon_0_0.hpp +++ b/operators/epsilon/P2ElementwiseEpsilon_0_0.hpp @@ -29,6 +29,7 @@ #include "core/DataTypes.h" #include "hyteg/LikwidWrapper.hpp" +#include "hyteg/boundary/BoundaryConditions.hpp" #include "hyteg/communication/Syncing.hpp" #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp" #include "hyteg/operators/Operator.hpp" @@ -36,6 +37,7 @@ #include "hyteg/primitivestorage/PrimitiveStorage.hpp" #include "hyteg/solvers/Smoothables.hpp" #include "hyteg/sparseassembly/SparseMatrixProxy.hpp" +#include "hyteg/types/types.hpp" #define FUNC_PREFIX @@ -88,142 +90,171 @@ class P2ElementwiseEpsilon_0_0 : public Operator< P2Function< real_t >, P2Functi protected: private: - /// Kernel type: apply + /// Integral: P2ElementwiseEpsilon_0_0 + /// - volume element: triangle, dim: 2, vertices: 3, spacedim: 2 + /// - kernel type: apply + /// - loop strategy: SAWTOOTH /// - quadrature rule: Dunavant 2 | points: 3, degree: 2 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 146 144 12 0 0 0 0 1 - void apply_macro_2D( real_t* RESTRICT _data_dstEdge, - real_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - real_t* RESTRICT _data_srcEdge, - real_t* RESTRICT _data_srcVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; - /// Kernel type: apply + void apply_P2ElementwiseEpsilon_0_0_macro_2D( real_t* RESTRICT _data_dstEdge, + real_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + real_t* RESTRICT _data_srcEdge, + real_t* RESTRICT _data_srcVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; + + /// Integral: P2ElementwiseEpsilon_0_0 + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: apply + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 1 | points: 4, degree: 2 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 449 436 36 0 0 0 0 1 - void apply_macro_3D( real_t* RESTRICT _data_dstEdge, - real_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - real_t* RESTRICT _data_srcEdge, - real_t* RESTRICT _data_srcVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; - /// Kernel type: toMatrix + void apply_P2ElementwiseEpsilon_0_0_macro_3D( real_t* RESTRICT _data_dstEdge, + real_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + real_t* RESTRICT _data_srcEdge, + real_t* RESTRICT _data_srcVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; + + /// Integral: P2ElementwiseEpsilon_0_0 + /// - volume element: triangle, dim: 2, vertices: 3, spacedim: 2 + /// - kernel type: toMatrix + /// - loop strategy: SAWTOOTH /// - quadrature rule: Dunavant 2 | points: 3, degree: 2 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 110 108 12 0 0 0 0 4 - void toMatrix_macro_2D( idx_t* RESTRICT _data_dstEdge, - idx_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - idx_t* RESTRICT _data_srcEdge, - idx_t* RESTRICT _data_srcVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - std::shared_ptr< SparseMatrixProxy > mat, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; - /// Kernel type: toMatrix + void toMatrix_P2ElementwiseEpsilon_0_0_macro_2D( idx_t* RESTRICT _data_dstEdge, + idx_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + idx_t* RESTRICT _data_srcEdge, + idx_t* RESTRICT _data_srcVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + std::shared_ptr< SparseMatrixProxy > mat, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; + + /// Integral: P2ElementwiseEpsilon_0_0 + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: toMatrix + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 1 | points: 4, degree: 2 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 349 336 36 0 0 0 0 4 - void toMatrix_macro_3D( idx_t* RESTRICT _data_dstEdge, - idx_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - idx_t* RESTRICT _data_srcEdge, - idx_t* RESTRICT _data_srcVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - std::shared_ptr< SparseMatrixProxy > mat, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; - /// Kernel type: computeInverseDiagonalOperatorValues + void toMatrix_P2ElementwiseEpsilon_0_0_macro_3D( idx_t* RESTRICT _data_dstEdge, + idx_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + idx_t* RESTRICT _data_srcEdge, + idx_t* RESTRICT _data_srcVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + std::shared_ptr< SparseMatrixProxy > mat, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; + + /// Integral: P2ElementwiseEpsilon_0_0 + /// - volume element: triangle, dim: 2, vertices: 3, spacedim: 2 + /// - kernel type: computeInverseDiagonalOperatorValues + /// - loop strategy: SAWTOOTH /// - quadrature rule: Dunavant 2 | points: 3, degree: 2 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 71 63 12 0 0 0 0 1 - void computeInverseDiagonalOperatorValues_macro_2D( real_t* RESTRICT _data_invDiag_Edge, - real_t* RESTRICT _data_invDiag_Vertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; - /// Kernel type: computeInverseDiagonalOperatorValues + void computeInverseDiagonalOperatorValues_P2ElementwiseEpsilon_0_0_macro_2D( real_t* RESTRICT _data_invDiag_Edge, + real_t* RESTRICT _data_invDiag_Vertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; + + /// Integral: P2ElementwiseEpsilon_0_0 + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: computeInverseDiagonalOperatorValues + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 1 | points: 4, degree: 2 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 179 156 36 0 0 0 0 1 - void computeInverseDiagonalOperatorValues_macro_3D( real_t* RESTRICT _data_invDiag_Edge, - real_t* RESTRICT _data_invDiag_Vertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; + void computeInverseDiagonalOperatorValues_P2ElementwiseEpsilon_0_0_macro_3D( real_t* RESTRICT _data_invDiag_Edge, + real_t* RESTRICT _data_invDiag_Vertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; std::shared_ptr< P2Function< real_t > > invDiag_; P2Function< real_t > mu; diff --git a/operators/epsilon/P2ElementwiseEpsilon_0_1.cpp b/operators/epsilon/P2ElementwiseEpsilon_0_1.cpp index 96e1003e..e1c16d43 100644 --- a/operators/epsilon/P2ElementwiseEpsilon_0_1.cpp +++ b/operators/epsilon/P2ElementwiseEpsilon_0_1.cpp @@ -137,7 +137,7 @@ void P2ElementwiseEpsilon_0_1::apply( const P2Function< real_t >& src, this->timingTree_->start( "kernel" ); - apply_macro_3D( + apply_P2ElementwiseEpsilon_0_1_macro_3D( _data_dstEdge, _data_dstVertex, @@ -159,6 +159,7 @@ void P2ElementwiseEpsilon_0_1::apply( const P2Function< real_t >& src, macro_vertex_coord_id_3comp2, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } @@ -228,7 +229,7 @@ void P2ElementwiseEpsilon_0_1::apply( const P2Function< real_t >& src, this->timingTree_->start( "kernel" ); - apply_macro_2D( + apply_P2ElementwiseEpsilon_0_1_macro_2D( _data_dstEdge, _data_dstVertex, @@ -244,6 +245,7 @@ void P2ElementwiseEpsilon_0_1::apply( const P2Function< real_t >& src, macro_vertex_coord_id_2comp1, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } @@ -314,7 +316,7 @@ void P2ElementwiseEpsilon_0_1::toMatrix( const std::shared_ptr< SparseMatrixProx this->timingTree_->start( "kernel" ); - toMatrix_macro_3D( + toMatrix_P2ElementwiseEpsilon_0_1_macro_3D( _data_dstEdge, _data_dstVertex, @@ -337,6 +339,7 @@ void P2ElementwiseEpsilon_0_1::toMatrix( const std::shared_ptr< SparseMatrixProx mat, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } } @@ -369,7 +372,7 @@ void P2ElementwiseEpsilon_0_1::toMatrix( const std::shared_ptr< SparseMatrixProx this->timingTree_->start( "kernel" ); - toMatrix_macro_2D( + toMatrix_P2ElementwiseEpsilon_0_1_macro_2D( _data_dstEdge, _data_dstVertex, @@ -386,6 +389,7 @@ void P2ElementwiseEpsilon_0_1::toMatrix( const std::shared_ptr< SparseMatrixProx mat, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } } diff --git a/operators/epsilon/P2ElementwiseEpsilon_0_1.hpp b/operators/epsilon/P2ElementwiseEpsilon_0_1.hpp index a27df5e5..78c96a47 100644 --- a/operators/epsilon/P2ElementwiseEpsilon_0_1.hpp +++ b/operators/epsilon/P2ElementwiseEpsilon_0_1.hpp @@ -29,12 +29,14 @@ #include "core/DataTypes.h" #include "hyteg/LikwidWrapper.hpp" +#include "hyteg/boundary/BoundaryConditions.hpp" #include "hyteg/communication/Syncing.hpp" #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp" #include "hyteg/operators/Operator.hpp" #include "hyteg/p2functionspace/P2Function.hpp" #include "hyteg/primitivestorage/PrimitiveStorage.hpp" #include "hyteg/sparseassembly/SparseMatrixProxy.hpp" +#include "hyteg/types/types.hpp" #define FUNC_PREFIX @@ -82,100 +84,119 @@ class P2ElementwiseEpsilon_0_1 : public Operator< P2Function< real_t >, P2Functi protected: private: - /// Kernel type: apply + /// Integral: P2ElementwiseEpsilon_0_1 + /// - volume element: triangle, dim: 2, vertices: 3, spacedim: 2 + /// - kernel type: apply + /// - loop strategy: SAWTOOTH /// - quadrature rule: Dunavant 2 | points: 3, degree: 2 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 191 189 12 0 0 0 0 1 - void apply_macro_2D( real_t* RESTRICT _data_dstEdge, - real_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - real_t* RESTRICT _data_srcEdge, - real_t* RESTRICT _data_srcVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; - /// Kernel type: apply + void apply_P2ElementwiseEpsilon_0_1_macro_2D( real_t* RESTRICT _data_dstEdge, + real_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + real_t* RESTRICT _data_srcEdge, + real_t* RESTRICT _data_srcVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; + + /// Integral: P2ElementwiseEpsilon_0_1 + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: apply + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 1 | points: 4, degree: 2 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 629 616 36 0 0 0 0 1 - void apply_macro_3D( real_t* RESTRICT _data_dstEdge, - real_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - real_t* RESTRICT _data_srcEdge, - real_t* RESTRICT _data_srcVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; - /// Kernel type: toMatrix + void apply_P2ElementwiseEpsilon_0_1_macro_3D( real_t* RESTRICT _data_dstEdge, + real_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + real_t* RESTRICT _data_srcEdge, + real_t* RESTRICT _data_srcVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; + + /// Integral: P2ElementwiseEpsilon_0_1 + /// - volume element: triangle, dim: 2, vertices: 3, spacedim: 2 + /// - kernel type: toMatrix + /// - loop strategy: SAWTOOTH /// - quadrature rule: Dunavant 2 | points: 3, degree: 2 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 155 153 12 0 0 0 0 4 - void toMatrix_macro_2D( idx_t* RESTRICT _data_dstEdge, - idx_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - idx_t* RESTRICT _data_srcEdge, - idx_t* RESTRICT _data_srcVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - std::shared_ptr< SparseMatrixProxy > mat, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; - /// Kernel type: toMatrix + void toMatrix_P2ElementwiseEpsilon_0_1_macro_2D( idx_t* RESTRICT _data_dstEdge, + idx_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + idx_t* RESTRICT _data_srcEdge, + idx_t* RESTRICT _data_srcVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + std::shared_ptr< SparseMatrixProxy > mat, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; + + /// Integral: P2ElementwiseEpsilon_0_1 + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: toMatrix + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 1 | points: 4, degree: 2 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 529 516 36 0 0 0 0 4 - void toMatrix_macro_3D( idx_t* RESTRICT _data_dstEdge, - idx_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - idx_t* RESTRICT _data_srcEdge, - idx_t* RESTRICT _data_srcVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - std::shared_ptr< SparseMatrixProxy > mat, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; + void toMatrix_P2ElementwiseEpsilon_0_1_macro_3D( idx_t* RESTRICT _data_dstEdge, + idx_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + idx_t* RESTRICT _data_srcEdge, + idx_t* RESTRICT _data_srcVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + std::shared_ptr< SparseMatrixProxy > mat, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; P2Function< real_t > mu; }; diff --git a/operators/epsilon/P2ElementwiseEpsilon_0_2.cpp b/operators/epsilon/P2ElementwiseEpsilon_0_2.cpp index 5a5ece8e..49e11fc2 100644 --- a/operators/epsilon/P2ElementwiseEpsilon_0_2.cpp +++ b/operators/epsilon/P2ElementwiseEpsilon_0_2.cpp @@ -136,7 +136,7 @@ void P2ElementwiseEpsilon_0_2::apply( const P2Function< real_t >& src, this->timingTree_->start( "kernel" ); - apply_macro_3D( + apply_P2ElementwiseEpsilon_0_2_macro_3D( _data_dstEdge, _data_dstVertex, @@ -158,6 +158,7 @@ void P2ElementwiseEpsilon_0_2::apply( const P2Function< real_t >& src, macro_vertex_coord_id_3comp2, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } @@ -236,7 +237,7 @@ void P2ElementwiseEpsilon_0_2::toMatrix( const std::shared_ptr< SparseMatrixProx this->timingTree_->start( "kernel" ); - toMatrix_macro_3D( + toMatrix_P2ElementwiseEpsilon_0_2_macro_3D( _data_dstEdge, _data_dstVertex, @@ -259,6 +260,7 @@ void P2ElementwiseEpsilon_0_2::toMatrix( const std::shared_ptr< SparseMatrixProx mat, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } } diff --git a/operators/epsilon/P2ElementwiseEpsilon_0_2.hpp b/operators/epsilon/P2ElementwiseEpsilon_0_2.hpp index 6d2a6616..ab6f49ec 100644 --- a/operators/epsilon/P2ElementwiseEpsilon_0_2.hpp +++ b/operators/epsilon/P2ElementwiseEpsilon_0_2.hpp @@ -29,12 +29,14 @@ #include "core/DataTypes.h" #include "hyteg/LikwidWrapper.hpp" +#include "hyteg/boundary/BoundaryConditions.hpp" #include "hyteg/communication/Syncing.hpp" #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp" #include "hyteg/operators/Operator.hpp" #include "hyteg/p2functionspace/P2Function.hpp" #include "hyteg/primitivestorage/PrimitiveStorage.hpp" #include "hyteg/sparseassembly/SparseMatrixProxy.hpp" +#include "hyteg/types/types.hpp" #define FUNC_PREFIX @@ -82,59 +84,68 @@ class P2ElementwiseEpsilon_0_2 : public Operator< P2Function< real_t >, P2Functi protected: private: - /// Kernel type: apply + /// Integral: P2ElementwiseEpsilon_0_2 + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: apply + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 1 | points: 4, degree: 2 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 629 616 36 0 0 0 0 1 - void apply_macro_3D( real_t* RESTRICT _data_dstEdge, - real_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - real_t* RESTRICT _data_srcEdge, - real_t* RESTRICT _data_srcVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; - /// Kernel type: toMatrix + void apply_P2ElementwiseEpsilon_0_2_macro_3D( real_t* RESTRICT _data_dstEdge, + real_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + real_t* RESTRICT _data_srcEdge, + real_t* RESTRICT _data_srcVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; + + /// Integral: P2ElementwiseEpsilon_0_2 + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: toMatrix + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 1 | points: 4, degree: 2 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 529 516 36 0 0 0 0 4 - void toMatrix_macro_3D( idx_t* RESTRICT _data_dstEdge, - idx_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - idx_t* RESTRICT _data_srcEdge, - idx_t* RESTRICT _data_srcVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - std::shared_ptr< SparseMatrixProxy > mat, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; + void toMatrix_P2ElementwiseEpsilon_0_2_macro_3D( idx_t* RESTRICT _data_dstEdge, + idx_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + idx_t* RESTRICT _data_srcEdge, + idx_t* RESTRICT _data_srcVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + std::shared_ptr< SparseMatrixProxy > mat, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; P2Function< real_t > mu; }; diff --git a/operators/epsilon/P2ElementwiseEpsilon_1_0.cpp b/operators/epsilon/P2ElementwiseEpsilon_1_0.cpp index 2381b5ed..666edf6b 100644 --- a/operators/epsilon/P2ElementwiseEpsilon_1_0.cpp +++ b/operators/epsilon/P2ElementwiseEpsilon_1_0.cpp @@ -137,7 +137,7 @@ void P2ElementwiseEpsilon_1_0::apply( const P2Function< real_t >& src, this->timingTree_->start( "kernel" ); - apply_macro_3D( + apply_P2ElementwiseEpsilon_1_0_macro_3D( _data_dstEdge, _data_dstVertex, @@ -159,6 +159,7 @@ void P2ElementwiseEpsilon_1_0::apply( const P2Function< real_t >& src, macro_vertex_coord_id_3comp2, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } @@ -228,7 +229,7 @@ void P2ElementwiseEpsilon_1_0::apply( const P2Function< real_t >& src, this->timingTree_->start( "kernel" ); - apply_macro_2D( + apply_P2ElementwiseEpsilon_1_0_macro_2D( _data_dstEdge, _data_dstVertex, @@ -244,6 +245,7 @@ void P2ElementwiseEpsilon_1_0::apply( const P2Function< real_t >& src, macro_vertex_coord_id_2comp1, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } @@ -314,7 +316,7 @@ void P2ElementwiseEpsilon_1_0::toMatrix( const std::shared_ptr< SparseMatrixProx this->timingTree_->start( "kernel" ); - toMatrix_macro_3D( + toMatrix_P2ElementwiseEpsilon_1_0_macro_3D( _data_dstEdge, _data_dstVertex, @@ -337,6 +339,7 @@ void P2ElementwiseEpsilon_1_0::toMatrix( const std::shared_ptr< SparseMatrixProx mat, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } } @@ -369,7 +372,7 @@ void P2ElementwiseEpsilon_1_0::toMatrix( const std::shared_ptr< SparseMatrixProx this->timingTree_->start( "kernel" ); - toMatrix_macro_2D( + toMatrix_P2ElementwiseEpsilon_1_0_macro_2D( _data_dstEdge, _data_dstVertex, @@ -386,6 +389,7 @@ void P2ElementwiseEpsilon_1_0::toMatrix( const std::shared_ptr< SparseMatrixProx mat, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } } diff --git a/operators/epsilon/P2ElementwiseEpsilon_1_0.hpp b/operators/epsilon/P2ElementwiseEpsilon_1_0.hpp index 7ef99417..a7fdce7d 100644 --- a/operators/epsilon/P2ElementwiseEpsilon_1_0.hpp +++ b/operators/epsilon/P2ElementwiseEpsilon_1_0.hpp @@ -29,12 +29,14 @@ #include "core/DataTypes.h" #include "hyteg/LikwidWrapper.hpp" +#include "hyteg/boundary/BoundaryConditions.hpp" #include "hyteg/communication/Syncing.hpp" #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp" #include "hyteg/operators/Operator.hpp" #include "hyteg/p2functionspace/P2Function.hpp" #include "hyteg/primitivestorage/PrimitiveStorage.hpp" #include "hyteg/sparseassembly/SparseMatrixProxy.hpp" +#include "hyteg/types/types.hpp" #define FUNC_PREFIX @@ -82,100 +84,119 @@ class P2ElementwiseEpsilon_1_0 : public Operator< P2Function< real_t >, P2Functi protected: private: - /// Kernel type: apply + /// Integral: P2ElementwiseEpsilon_1_0 + /// - volume element: triangle, dim: 2, vertices: 3, spacedim: 2 + /// - kernel type: apply + /// - loop strategy: SAWTOOTH /// - quadrature rule: Dunavant 2 | points: 3, degree: 2 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 191 189 12 0 0 0 0 1 - void apply_macro_2D( real_t* RESTRICT _data_dstEdge, - real_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - real_t* RESTRICT _data_srcEdge, - real_t* RESTRICT _data_srcVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; - /// Kernel type: apply + void apply_P2ElementwiseEpsilon_1_0_macro_2D( real_t* RESTRICT _data_dstEdge, + real_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + real_t* RESTRICT _data_srcEdge, + real_t* RESTRICT _data_srcVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; + + /// Integral: P2ElementwiseEpsilon_1_0 + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: apply + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 1 | points: 4, degree: 2 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 629 616 36 0 0 0 0 1 - void apply_macro_3D( real_t* RESTRICT _data_dstEdge, - real_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - real_t* RESTRICT _data_srcEdge, - real_t* RESTRICT _data_srcVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; - /// Kernel type: toMatrix + void apply_P2ElementwiseEpsilon_1_0_macro_3D( real_t* RESTRICT _data_dstEdge, + real_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + real_t* RESTRICT _data_srcEdge, + real_t* RESTRICT _data_srcVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; + + /// Integral: P2ElementwiseEpsilon_1_0 + /// - volume element: triangle, dim: 2, vertices: 3, spacedim: 2 + /// - kernel type: toMatrix + /// - loop strategy: SAWTOOTH /// - quadrature rule: Dunavant 2 | points: 3, degree: 2 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 155 153 12 0 0 0 0 4 - void toMatrix_macro_2D( idx_t* RESTRICT _data_dstEdge, - idx_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - idx_t* RESTRICT _data_srcEdge, - idx_t* RESTRICT _data_srcVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - std::shared_ptr< SparseMatrixProxy > mat, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; - /// Kernel type: toMatrix + void toMatrix_P2ElementwiseEpsilon_1_0_macro_2D( idx_t* RESTRICT _data_dstEdge, + idx_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + idx_t* RESTRICT _data_srcEdge, + idx_t* RESTRICT _data_srcVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + std::shared_ptr< SparseMatrixProxy > mat, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; + + /// Integral: P2ElementwiseEpsilon_1_0 + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: toMatrix + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 1 | points: 4, degree: 2 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 529 516 36 0 0 0 0 4 - void toMatrix_macro_3D( idx_t* RESTRICT _data_dstEdge, - idx_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - idx_t* RESTRICT _data_srcEdge, - idx_t* RESTRICT _data_srcVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - std::shared_ptr< SparseMatrixProxy > mat, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; + void toMatrix_P2ElementwiseEpsilon_1_0_macro_3D( idx_t* RESTRICT _data_dstEdge, + idx_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + idx_t* RESTRICT _data_srcEdge, + idx_t* RESTRICT _data_srcVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + std::shared_ptr< SparseMatrixProxy > mat, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; P2Function< real_t > mu; }; diff --git a/operators/epsilon/P2ElementwiseEpsilon_1_1.cpp b/operators/epsilon/P2ElementwiseEpsilon_1_1.cpp index 3abb5d43..37551a93 100644 --- a/operators/epsilon/P2ElementwiseEpsilon_1_1.cpp +++ b/operators/epsilon/P2ElementwiseEpsilon_1_1.cpp @@ -137,7 +137,7 @@ void P2ElementwiseEpsilon_1_1::apply( const P2Function< real_t >& src, this->timingTree_->start( "kernel" ); - apply_macro_3D( + apply_P2ElementwiseEpsilon_1_1_macro_3D( _data_dstEdge, _data_dstVertex, @@ -159,6 +159,7 @@ void P2ElementwiseEpsilon_1_1::apply( const P2Function< real_t >& src, macro_vertex_coord_id_3comp2, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } @@ -228,7 +229,7 @@ void P2ElementwiseEpsilon_1_1::apply( const P2Function< real_t >& src, this->timingTree_->start( "kernel" ); - apply_macro_2D( + apply_P2ElementwiseEpsilon_1_1_macro_2D( _data_dstEdge, _data_dstVertex, @@ -244,6 +245,7 @@ void P2ElementwiseEpsilon_1_1::apply( const P2Function< real_t >& src, macro_vertex_coord_id_2comp1, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } @@ -314,7 +316,7 @@ void P2ElementwiseEpsilon_1_1::toMatrix( const std::shared_ptr< SparseMatrixProx this->timingTree_->start( "kernel" ); - toMatrix_macro_3D( + toMatrix_P2ElementwiseEpsilon_1_1_macro_3D( _data_dstEdge, _data_dstVertex, @@ -337,6 +339,7 @@ void P2ElementwiseEpsilon_1_1::toMatrix( const std::shared_ptr< SparseMatrixProx mat, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } } @@ -369,7 +372,7 @@ void P2ElementwiseEpsilon_1_1::toMatrix( const std::shared_ptr< SparseMatrixProx this->timingTree_->start( "kernel" ); - toMatrix_macro_2D( + toMatrix_P2ElementwiseEpsilon_1_1_macro_2D( _data_dstEdge, _data_dstVertex, @@ -386,6 +389,7 @@ void P2ElementwiseEpsilon_1_1::toMatrix( const std::shared_ptr< SparseMatrixProx mat, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } } @@ -440,7 +444,7 @@ void P2ElementwiseEpsilon_1_1::computeInverseDiagonalOperatorValues() this->timingTree_->start( "kernel" ); - computeInverseDiagonalOperatorValues_macro_3D( + computeInverseDiagonalOperatorValues_P2ElementwiseEpsilon_1_1_macro_3D( _data_invDiag_Edge, _data_invDiag_Vertex, @@ -460,6 +464,7 @@ void P2ElementwiseEpsilon_1_1::computeInverseDiagonalOperatorValues() macro_vertex_coord_id_3comp2, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } @@ -504,7 +509,7 @@ void P2ElementwiseEpsilon_1_1::computeInverseDiagonalOperatorValues() this->timingTree_->start( "kernel" ); - computeInverseDiagonalOperatorValues_macro_2D( + computeInverseDiagonalOperatorValues_P2ElementwiseEpsilon_1_1_macro_2D( _data_invDiag_Edge, _data_invDiag_Vertex, @@ -518,6 +523,7 @@ void P2ElementwiseEpsilon_1_1::computeInverseDiagonalOperatorValues() macro_vertex_coord_id_2comp1, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } diff --git a/operators/epsilon/P2ElementwiseEpsilon_1_1.hpp b/operators/epsilon/P2ElementwiseEpsilon_1_1.hpp index 13bcd9f6..db1ad8ea 100644 --- a/operators/epsilon/P2ElementwiseEpsilon_1_1.hpp +++ b/operators/epsilon/P2ElementwiseEpsilon_1_1.hpp @@ -29,6 +29,7 @@ #include "core/DataTypes.h" #include "hyteg/LikwidWrapper.hpp" +#include "hyteg/boundary/BoundaryConditions.hpp" #include "hyteg/communication/Syncing.hpp" #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp" #include "hyteg/operators/Operator.hpp" @@ -36,6 +37,7 @@ #include "hyteg/primitivestorage/PrimitiveStorage.hpp" #include "hyteg/solvers/Smoothables.hpp" #include "hyteg/sparseassembly/SparseMatrixProxy.hpp" +#include "hyteg/types/types.hpp" #define FUNC_PREFIX @@ -88,142 +90,171 @@ class P2ElementwiseEpsilon_1_1 : public Operator< P2Function< real_t >, P2Functi protected: private: - /// Kernel type: apply + /// Integral: P2ElementwiseEpsilon_1_1 + /// - volume element: triangle, dim: 2, vertices: 3, spacedim: 2 + /// - kernel type: apply + /// - loop strategy: SAWTOOTH /// - quadrature rule: Dunavant 2 | points: 3, degree: 2 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 146 144 12 0 0 0 0 1 - void apply_macro_2D( real_t* RESTRICT _data_dstEdge, - real_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - real_t* RESTRICT _data_srcEdge, - real_t* RESTRICT _data_srcVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; - /// Kernel type: apply + void apply_P2ElementwiseEpsilon_1_1_macro_2D( real_t* RESTRICT _data_dstEdge, + real_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + real_t* RESTRICT _data_srcEdge, + real_t* RESTRICT _data_srcVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; + + /// Integral: P2ElementwiseEpsilon_1_1 + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: apply + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 1 | points: 4, degree: 2 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 449 436 36 0 0 0 0 1 - void apply_macro_3D( real_t* RESTRICT _data_dstEdge, - real_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - real_t* RESTRICT _data_srcEdge, - real_t* RESTRICT _data_srcVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; - /// Kernel type: toMatrix + void apply_P2ElementwiseEpsilon_1_1_macro_3D( real_t* RESTRICT _data_dstEdge, + real_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + real_t* RESTRICT _data_srcEdge, + real_t* RESTRICT _data_srcVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; + + /// Integral: P2ElementwiseEpsilon_1_1 + /// - volume element: triangle, dim: 2, vertices: 3, spacedim: 2 + /// - kernel type: toMatrix + /// - loop strategy: SAWTOOTH /// - quadrature rule: Dunavant 2 | points: 3, degree: 2 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 110 108 12 0 0 0 0 4 - void toMatrix_macro_2D( idx_t* RESTRICT _data_dstEdge, - idx_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - idx_t* RESTRICT _data_srcEdge, - idx_t* RESTRICT _data_srcVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - std::shared_ptr< SparseMatrixProxy > mat, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; - /// Kernel type: toMatrix + void toMatrix_P2ElementwiseEpsilon_1_1_macro_2D( idx_t* RESTRICT _data_dstEdge, + idx_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + idx_t* RESTRICT _data_srcEdge, + idx_t* RESTRICT _data_srcVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + std::shared_ptr< SparseMatrixProxy > mat, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; + + /// Integral: P2ElementwiseEpsilon_1_1 + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: toMatrix + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 1 | points: 4, degree: 2 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 349 336 36 0 0 0 0 4 - void toMatrix_macro_3D( idx_t* RESTRICT _data_dstEdge, - idx_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - idx_t* RESTRICT _data_srcEdge, - idx_t* RESTRICT _data_srcVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - std::shared_ptr< SparseMatrixProxy > mat, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; - /// Kernel type: computeInverseDiagonalOperatorValues + void toMatrix_P2ElementwiseEpsilon_1_1_macro_3D( idx_t* RESTRICT _data_dstEdge, + idx_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + idx_t* RESTRICT _data_srcEdge, + idx_t* RESTRICT _data_srcVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + std::shared_ptr< SparseMatrixProxy > mat, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; + + /// Integral: P2ElementwiseEpsilon_1_1 + /// - volume element: triangle, dim: 2, vertices: 3, spacedim: 2 + /// - kernel type: computeInverseDiagonalOperatorValues + /// - loop strategy: SAWTOOTH /// - quadrature rule: Dunavant 2 | points: 3, degree: 2 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 71 63 12 0 0 0 0 1 - void computeInverseDiagonalOperatorValues_macro_2D( real_t* RESTRICT _data_invDiag_Edge, - real_t* RESTRICT _data_invDiag_Vertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; - /// Kernel type: computeInverseDiagonalOperatorValues + void computeInverseDiagonalOperatorValues_P2ElementwiseEpsilon_1_1_macro_2D( real_t* RESTRICT _data_invDiag_Edge, + real_t* RESTRICT _data_invDiag_Vertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; + + /// Integral: P2ElementwiseEpsilon_1_1 + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: computeInverseDiagonalOperatorValues + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 1 | points: 4, degree: 2 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 179 156 36 0 0 0 0 1 - void computeInverseDiagonalOperatorValues_macro_3D( real_t* RESTRICT _data_invDiag_Edge, - real_t* RESTRICT _data_invDiag_Vertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; + void computeInverseDiagonalOperatorValues_P2ElementwiseEpsilon_1_1_macro_3D( real_t* RESTRICT _data_invDiag_Edge, + real_t* RESTRICT _data_invDiag_Vertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; std::shared_ptr< P2Function< real_t > > invDiag_; P2Function< real_t > mu; diff --git a/operators/epsilon/P2ElementwiseEpsilon_1_2.cpp b/operators/epsilon/P2ElementwiseEpsilon_1_2.cpp index a82145ad..229162c8 100644 --- a/operators/epsilon/P2ElementwiseEpsilon_1_2.cpp +++ b/operators/epsilon/P2ElementwiseEpsilon_1_2.cpp @@ -136,7 +136,7 @@ void P2ElementwiseEpsilon_1_2::apply( const P2Function< real_t >& src, this->timingTree_->start( "kernel" ); - apply_macro_3D( + apply_P2ElementwiseEpsilon_1_2_macro_3D( _data_dstEdge, _data_dstVertex, @@ -158,6 +158,7 @@ void P2ElementwiseEpsilon_1_2::apply( const P2Function< real_t >& src, macro_vertex_coord_id_3comp2, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } @@ -236,7 +237,7 @@ void P2ElementwiseEpsilon_1_2::toMatrix( const std::shared_ptr< SparseMatrixProx this->timingTree_->start( "kernel" ); - toMatrix_macro_3D( + toMatrix_P2ElementwiseEpsilon_1_2_macro_3D( _data_dstEdge, _data_dstVertex, @@ -259,6 +260,7 @@ void P2ElementwiseEpsilon_1_2::toMatrix( const std::shared_ptr< SparseMatrixProx mat, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } } diff --git a/operators/epsilon/P2ElementwiseEpsilon_1_2.hpp b/operators/epsilon/P2ElementwiseEpsilon_1_2.hpp index 061fe534..cc82cde0 100644 --- a/operators/epsilon/P2ElementwiseEpsilon_1_2.hpp +++ b/operators/epsilon/P2ElementwiseEpsilon_1_2.hpp @@ -29,12 +29,14 @@ #include "core/DataTypes.h" #include "hyteg/LikwidWrapper.hpp" +#include "hyteg/boundary/BoundaryConditions.hpp" #include "hyteg/communication/Syncing.hpp" #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp" #include "hyteg/operators/Operator.hpp" #include "hyteg/p2functionspace/P2Function.hpp" #include "hyteg/primitivestorage/PrimitiveStorage.hpp" #include "hyteg/sparseassembly/SparseMatrixProxy.hpp" +#include "hyteg/types/types.hpp" #define FUNC_PREFIX @@ -82,59 +84,68 @@ class P2ElementwiseEpsilon_1_2 : public Operator< P2Function< real_t >, P2Functi protected: private: - /// Kernel type: apply + /// Integral: P2ElementwiseEpsilon_1_2 + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: apply + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 1 | points: 4, degree: 2 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 629 616 36 0 0 0 0 1 - void apply_macro_3D( real_t* RESTRICT _data_dstEdge, - real_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - real_t* RESTRICT _data_srcEdge, - real_t* RESTRICT _data_srcVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; - /// Kernel type: toMatrix + void apply_P2ElementwiseEpsilon_1_2_macro_3D( real_t* RESTRICT _data_dstEdge, + real_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + real_t* RESTRICT _data_srcEdge, + real_t* RESTRICT _data_srcVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; + + /// Integral: P2ElementwiseEpsilon_1_2 + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: toMatrix + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 1 | points: 4, degree: 2 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 529 516 36 0 0 0 0 4 - void toMatrix_macro_3D( idx_t* RESTRICT _data_dstEdge, - idx_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - idx_t* RESTRICT _data_srcEdge, - idx_t* RESTRICT _data_srcVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - std::shared_ptr< SparseMatrixProxy > mat, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; + void toMatrix_P2ElementwiseEpsilon_1_2_macro_3D( idx_t* RESTRICT _data_dstEdge, + idx_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + idx_t* RESTRICT _data_srcEdge, + idx_t* RESTRICT _data_srcVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + std::shared_ptr< SparseMatrixProxy > mat, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; P2Function< real_t > mu; }; diff --git a/operators/epsilon/P2ElementwiseEpsilon_2_0.cpp b/operators/epsilon/P2ElementwiseEpsilon_2_0.cpp index 88fcdb37..42e1e428 100644 --- a/operators/epsilon/P2ElementwiseEpsilon_2_0.cpp +++ b/operators/epsilon/P2ElementwiseEpsilon_2_0.cpp @@ -136,7 +136,7 @@ void P2ElementwiseEpsilon_2_0::apply( const P2Function< real_t >& src, this->timingTree_->start( "kernel" ); - apply_macro_3D( + apply_P2ElementwiseEpsilon_2_0_macro_3D( _data_dstEdge, _data_dstVertex, @@ -158,6 +158,7 @@ void P2ElementwiseEpsilon_2_0::apply( const P2Function< real_t >& src, macro_vertex_coord_id_3comp2, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } @@ -236,7 +237,7 @@ void P2ElementwiseEpsilon_2_0::toMatrix( const std::shared_ptr< SparseMatrixProx this->timingTree_->start( "kernel" ); - toMatrix_macro_3D( + toMatrix_P2ElementwiseEpsilon_2_0_macro_3D( _data_dstEdge, _data_dstVertex, @@ -259,6 +260,7 @@ void P2ElementwiseEpsilon_2_0::toMatrix( const std::shared_ptr< SparseMatrixProx mat, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } } diff --git a/operators/epsilon/P2ElementwiseEpsilon_2_0.hpp b/operators/epsilon/P2ElementwiseEpsilon_2_0.hpp index 7313eff9..bfb70f48 100644 --- a/operators/epsilon/P2ElementwiseEpsilon_2_0.hpp +++ b/operators/epsilon/P2ElementwiseEpsilon_2_0.hpp @@ -29,12 +29,14 @@ #include "core/DataTypes.h" #include "hyteg/LikwidWrapper.hpp" +#include "hyteg/boundary/BoundaryConditions.hpp" #include "hyteg/communication/Syncing.hpp" #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp" #include "hyteg/operators/Operator.hpp" #include "hyteg/p2functionspace/P2Function.hpp" #include "hyteg/primitivestorage/PrimitiveStorage.hpp" #include "hyteg/sparseassembly/SparseMatrixProxy.hpp" +#include "hyteg/types/types.hpp" #define FUNC_PREFIX @@ -82,59 +84,68 @@ class P2ElementwiseEpsilon_2_0 : public Operator< P2Function< real_t >, P2Functi protected: private: - /// Kernel type: apply + /// Integral: P2ElementwiseEpsilon_2_0 + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: apply + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 1 | points: 4, degree: 2 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 629 616 36 0 0 0 0 1 - void apply_macro_3D( real_t* RESTRICT _data_dstEdge, - real_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - real_t* RESTRICT _data_srcEdge, - real_t* RESTRICT _data_srcVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; - /// Kernel type: toMatrix + void apply_P2ElementwiseEpsilon_2_0_macro_3D( real_t* RESTRICT _data_dstEdge, + real_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + real_t* RESTRICT _data_srcEdge, + real_t* RESTRICT _data_srcVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; + + /// Integral: P2ElementwiseEpsilon_2_0 + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: toMatrix + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 1 | points: 4, degree: 2 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 529 516 36 0 0 0 0 4 - void toMatrix_macro_3D( idx_t* RESTRICT _data_dstEdge, - idx_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - idx_t* RESTRICT _data_srcEdge, - idx_t* RESTRICT _data_srcVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - std::shared_ptr< SparseMatrixProxy > mat, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; + void toMatrix_P2ElementwiseEpsilon_2_0_macro_3D( idx_t* RESTRICT _data_dstEdge, + idx_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + idx_t* RESTRICT _data_srcEdge, + idx_t* RESTRICT _data_srcVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + std::shared_ptr< SparseMatrixProxy > mat, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; P2Function< real_t > mu; }; diff --git a/operators/epsilon/P2ElementwiseEpsilon_2_1.cpp b/operators/epsilon/P2ElementwiseEpsilon_2_1.cpp index 2e531a7c..4f5d19bc 100644 --- a/operators/epsilon/P2ElementwiseEpsilon_2_1.cpp +++ b/operators/epsilon/P2ElementwiseEpsilon_2_1.cpp @@ -136,7 +136,7 @@ void P2ElementwiseEpsilon_2_1::apply( const P2Function< real_t >& src, this->timingTree_->start( "kernel" ); - apply_macro_3D( + apply_P2ElementwiseEpsilon_2_1_macro_3D( _data_dstEdge, _data_dstVertex, @@ -158,6 +158,7 @@ void P2ElementwiseEpsilon_2_1::apply( const P2Function< real_t >& src, macro_vertex_coord_id_3comp2, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } @@ -236,7 +237,7 @@ void P2ElementwiseEpsilon_2_1::toMatrix( const std::shared_ptr< SparseMatrixProx this->timingTree_->start( "kernel" ); - toMatrix_macro_3D( + toMatrix_P2ElementwiseEpsilon_2_1_macro_3D( _data_dstEdge, _data_dstVertex, @@ -259,6 +260,7 @@ void P2ElementwiseEpsilon_2_1::toMatrix( const std::shared_ptr< SparseMatrixProx mat, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } } diff --git a/operators/epsilon/P2ElementwiseEpsilon_2_1.hpp b/operators/epsilon/P2ElementwiseEpsilon_2_1.hpp index 9ab103ad..7487ce1a 100644 --- a/operators/epsilon/P2ElementwiseEpsilon_2_1.hpp +++ b/operators/epsilon/P2ElementwiseEpsilon_2_1.hpp @@ -29,12 +29,14 @@ #include "core/DataTypes.h" #include "hyteg/LikwidWrapper.hpp" +#include "hyteg/boundary/BoundaryConditions.hpp" #include "hyteg/communication/Syncing.hpp" #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp" #include "hyteg/operators/Operator.hpp" #include "hyteg/p2functionspace/P2Function.hpp" #include "hyteg/primitivestorage/PrimitiveStorage.hpp" #include "hyteg/sparseassembly/SparseMatrixProxy.hpp" +#include "hyteg/types/types.hpp" #define FUNC_PREFIX @@ -82,59 +84,68 @@ class P2ElementwiseEpsilon_2_1 : public Operator< P2Function< real_t >, P2Functi protected: private: - /// Kernel type: apply + /// Integral: P2ElementwiseEpsilon_2_1 + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: apply + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 1 | points: 4, degree: 2 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 629 616 36 0 0 0 0 1 - void apply_macro_3D( real_t* RESTRICT _data_dstEdge, - real_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - real_t* RESTRICT _data_srcEdge, - real_t* RESTRICT _data_srcVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; - /// Kernel type: toMatrix + void apply_P2ElementwiseEpsilon_2_1_macro_3D( real_t* RESTRICT _data_dstEdge, + real_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + real_t* RESTRICT _data_srcEdge, + real_t* RESTRICT _data_srcVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; + + /// Integral: P2ElementwiseEpsilon_2_1 + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: toMatrix + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 1 | points: 4, degree: 2 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 529 516 36 0 0 0 0 4 - void toMatrix_macro_3D( idx_t* RESTRICT _data_dstEdge, - idx_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - idx_t* RESTRICT _data_srcEdge, - idx_t* RESTRICT _data_srcVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - std::shared_ptr< SparseMatrixProxy > mat, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; + void toMatrix_P2ElementwiseEpsilon_2_1_macro_3D( idx_t* RESTRICT _data_dstEdge, + idx_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + idx_t* RESTRICT _data_srcEdge, + idx_t* RESTRICT _data_srcVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + std::shared_ptr< SparseMatrixProxy > mat, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; P2Function< real_t > mu; }; diff --git a/operators/epsilon/P2ElementwiseEpsilon_2_2.cpp b/operators/epsilon/P2ElementwiseEpsilon_2_2.cpp index 257b6228..74d1a338 100644 --- a/operators/epsilon/P2ElementwiseEpsilon_2_2.cpp +++ b/operators/epsilon/P2ElementwiseEpsilon_2_2.cpp @@ -136,7 +136,7 @@ void P2ElementwiseEpsilon_2_2::apply( const P2Function< real_t >& src, this->timingTree_->start( "kernel" ); - apply_macro_3D( + apply_P2ElementwiseEpsilon_2_2_macro_3D( _data_dstEdge, _data_dstVertex, @@ -158,6 +158,7 @@ void P2ElementwiseEpsilon_2_2::apply( const P2Function< real_t >& src, macro_vertex_coord_id_3comp2, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } @@ -236,7 +237,7 @@ void P2ElementwiseEpsilon_2_2::toMatrix( const std::shared_ptr< SparseMatrixProx this->timingTree_->start( "kernel" ); - toMatrix_macro_3D( + toMatrix_P2ElementwiseEpsilon_2_2_macro_3D( _data_dstEdge, _data_dstVertex, @@ -259,6 +260,7 @@ void P2ElementwiseEpsilon_2_2::toMatrix( const std::shared_ptr< SparseMatrixProx mat, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } } @@ -321,7 +323,7 @@ void P2ElementwiseEpsilon_2_2::computeInverseDiagonalOperatorValues() this->timingTree_->start( "kernel" ); - computeInverseDiagonalOperatorValues_macro_3D( + computeInverseDiagonalOperatorValues_P2ElementwiseEpsilon_2_2_macro_3D( _data_invDiag_Edge, _data_invDiag_Vertex, @@ -341,6 +343,7 @@ void P2ElementwiseEpsilon_2_2::computeInverseDiagonalOperatorValues() macro_vertex_coord_id_3comp2, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } diff --git a/operators/epsilon/P2ElementwiseEpsilon_2_2.hpp b/operators/epsilon/P2ElementwiseEpsilon_2_2.hpp index c6677d45..444c1266 100644 --- a/operators/epsilon/P2ElementwiseEpsilon_2_2.hpp +++ b/operators/epsilon/P2ElementwiseEpsilon_2_2.hpp @@ -29,6 +29,7 @@ #include "core/DataTypes.h" #include "hyteg/LikwidWrapper.hpp" +#include "hyteg/boundary/BoundaryConditions.hpp" #include "hyteg/communication/Syncing.hpp" #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp" #include "hyteg/operators/Operator.hpp" @@ -36,6 +37,7 @@ #include "hyteg/primitivestorage/PrimitiveStorage.hpp" #include "hyteg/solvers/Smoothables.hpp" #include "hyteg/sparseassembly/SparseMatrixProxy.hpp" +#include "hyteg/types/types.hpp" #define FUNC_PREFIX @@ -88,83 +90,97 @@ class P2ElementwiseEpsilon_2_2 : public Operator< P2Function< real_t >, P2Functi protected: private: - /// Kernel type: apply + /// Integral: P2ElementwiseEpsilon_2_2 + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: apply + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 1 | points: 4, degree: 2 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 449 436 36 0 0 0 0 1 - void apply_macro_3D( real_t* RESTRICT _data_dstEdge, - real_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - real_t* RESTRICT _data_srcEdge, - real_t* RESTRICT _data_srcVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; - /// Kernel type: toMatrix + void apply_P2ElementwiseEpsilon_2_2_macro_3D( real_t* RESTRICT _data_dstEdge, + real_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + real_t* RESTRICT _data_srcEdge, + real_t* RESTRICT _data_srcVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; + + /// Integral: P2ElementwiseEpsilon_2_2 + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: toMatrix + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 1 | points: 4, degree: 2 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 349 336 36 0 0 0 0 4 - void toMatrix_macro_3D( idx_t* RESTRICT _data_dstEdge, - idx_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - idx_t* RESTRICT _data_srcEdge, - idx_t* RESTRICT _data_srcVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - std::shared_ptr< SparseMatrixProxy > mat, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; - /// Kernel type: computeInverseDiagonalOperatorValues + void toMatrix_P2ElementwiseEpsilon_2_2_macro_3D( idx_t* RESTRICT _data_dstEdge, + idx_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + idx_t* RESTRICT _data_srcEdge, + idx_t* RESTRICT _data_srcVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + std::shared_ptr< SparseMatrixProxy > mat, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; + + /// Integral: P2ElementwiseEpsilon_2_2 + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: computeInverseDiagonalOperatorValues + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 1 | points: 4, degree: 2 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 179 156 36 0 0 0 0 1 - void computeInverseDiagonalOperatorValues_macro_3D( real_t* RESTRICT _data_invDiag_Edge, - real_t* RESTRICT _data_invDiag_Vertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; + void computeInverseDiagonalOperatorValues_P2ElementwiseEpsilon_2_2_macro_3D( real_t* RESTRICT _data_invDiag_Edge, + real_t* RESTRICT _data_invDiag_Vertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; std::shared_ptr< P2Function< real_t > > invDiag_; P2Function< real_t > mu; diff --git a/operators/epsilon/avx/P2ElementwiseEpsilonAnnulusMap_0_0_apply_macro_2D.cpp b/operators/epsilon/avx/P2ElementwiseEpsilonAnnulusMap_0_0_apply_P2ElementwiseEpsilonAnnulusMap_0_0_macro_2D.cpp similarity index 99% rename from operators/epsilon/avx/P2ElementwiseEpsilonAnnulusMap_0_0_apply_macro_2D.cpp rename to operators/epsilon/avx/P2ElementwiseEpsilonAnnulusMap_0_0_apply_P2ElementwiseEpsilonAnnulusMap_0_0_macro_2D.cpp index ce842b48..b47e68a3 100644 --- a/operators/epsilon/avx/P2ElementwiseEpsilonAnnulusMap_0_0_apply_macro_2D.cpp +++ b/operators/epsilon/avx/P2ElementwiseEpsilonAnnulusMap_0_0_apply_P2ElementwiseEpsilonAnnulusMap_0_0_macro_2D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseEpsilonAnnulusMap_0_0::apply_macro_2D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const +void P2ElementwiseEpsilonAnnulusMap_0_0::apply_P2ElementwiseEpsilonAnnulusMap_0_0_macro_2D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const { { const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669}; diff --git a/operators/epsilon/avx/P2ElementwiseEpsilonAnnulusMap_0_0_computeInverseDiagonalOperatorValues_macro_2D.cpp b/operators/epsilon/avx/P2ElementwiseEpsilonAnnulusMap_0_0_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilonAnnulusMap_0_0_macro_2D.cpp similarity index 99% rename from operators/epsilon/avx/P2ElementwiseEpsilonAnnulusMap_0_0_computeInverseDiagonalOperatorValues_macro_2D.cpp rename to operators/epsilon/avx/P2ElementwiseEpsilonAnnulusMap_0_0_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilonAnnulusMap_0_0_macro_2D.cpp index ef8e4674..bbe9480e 100644 --- a/operators/epsilon/avx/P2ElementwiseEpsilonAnnulusMap_0_0_computeInverseDiagonalOperatorValues_macro_2D.cpp +++ b/operators/epsilon/avx/P2ElementwiseEpsilonAnnulusMap_0_0_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilonAnnulusMap_0_0_macro_2D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseEpsilonAnnulusMap_0_0::computeInverseDiagonalOperatorValues_macro_2D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const +void P2ElementwiseEpsilonAnnulusMap_0_0::computeInverseDiagonalOperatorValues_P2ElementwiseEpsilonAnnulusMap_0_0_macro_2D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const { { const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669}; diff --git a/operators/epsilon/avx/P2ElementwiseEpsilonAnnulusMap_0_1_apply_macro_2D.cpp b/operators/epsilon/avx/P2ElementwiseEpsilonAnnulusMap_0_1_apply_P2ElementwiseEpsilonAnnulusMap_0_1_macro_2D.cpp similarity index 99% rename from operators/epsilon/avx/P2ElementwiseEpsilonAnnulusMap_0_1_apply_macro_2D.cpp rename to operators/epsilon/avx/P2ElementwiseEpsilonAnnulusMap_0_1_apply_P2ElementwiseEpsilonAnnulusMap_0_1_macro_2D.cpp index bd671a16..d9cd38d5 100644 --- a/operators/epsilon/avx/P2ElementwiseEpsilonAnnulusMap_0_1_apply_macro_2D.cpp +++ b/operators/epsilon/avx/P2ElementwiseEpsilonAnnulusMap_0_1_apply_P2ElementwiseEpsilonAnnulusMap_0_1_macro_2D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseEpsilonAnnulusMap_0_1::apply_macro_2D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const +void P2ElementwiseEpsilonAnnulusMap_0_1::apply_P2ElementwiseEpsilonAnnulusMap_0_1_macro_2D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const { { const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669}; diff --git a/operators/epsilon/avx/P2ElementwiseEpsilonAnnulusMap_1_0_apply_macro_2D.cpp b/operators/epsilon/avx/P2ElementwiseEpsilonAnnulusMap_1_0_apply_P2ElementwiseEpsilonAnnulusMap_1_0_macro_2D.cpp similarity index 99% rename from operators/epsilon/avx/P2ElementwiseEpsilonAnnulusMap_1_0_apply_macro_2D.cpp rename to operators/epsilon/avx/P2ElementwiseEpsilonAnnulusMap_1_0_apply_P2ElementwiseEpsilonAnnulusMap_1_0_macro_2D.cpp index b84cbeab..59e329a7 100644 --- a/operators/epsilon/avx/P2ElementwiseEpsilonAnnulusMap_1_0_apply_macro_2D.cpp +++ b/operators/epsilon/avx/P2ElementwiseEpsilonAnnulusMap_1_0_apply_P2ElementwiseEpsilonAnnulusMap_1_0_macro_2D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseEpsilonAnnulusMap_1_0::apply_macro_2D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const +void P2ElementwiseEpsilonAnnulusMap_1_0::apply_P2ElementwiseEpsilonAnnulusMap_1_0_macro_2D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const { { const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669}; diff --git a/operators/epsilon/avx/P2ElementwiseEpsilonAnnulusMap_1_1_apply_macro_2D.cpp b/operators/epsilon/avx/P2ElementwiseEpsilonAnnulusMap_1_1_apply_P2ElementwiseEpsilonAnnulusMap_1_1_macro_2D.cpp similarity index 99% rename from operators/epsilon/avx/P2ElementwiseEpsilonAnnulusMap_1_1_apply_macro_2D.cpp rename to operators/epsilon/avx/P2ElementwiseEpsilonAnnulusMap_1_1_apply_P2ElementwiseEpsilonAnnulusMap_1_1_macro_2D.cpp index 56b89e6e..3baa7447 100644 --- a/operators/epsilon/avx/P2ElementwiseEpsilonAnnulusMap_1_1_apply_macro_2D.cpp +++ b/operators/epsilon/avx/P2ElementwiseEpsilonAnnulusMap_1_1_apply_P2ElementwiseEpsilonAnnulusMap_1_1_macro_2D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseEpsilonAnnulusMap_1_1::apply_macro_2D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const +void P2ElementwiseEpsilonAnnulusMap_1_1::apply_P2ElementwiseEpsilonAnnulusMap_1_1_macro_2D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const { { const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669}; diff --git a/operators/epsilon/avx/P2ElementwiseEpsilonAnnulusMap_1_1_computeInverseDiagonalOperatorValues_macro_2D.cpp b/operators/epsilon/avx/P2ElementwiseEpsilonAnnulusMap_1_1_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilonAnnulusMap_1_1_macro_2D.cpp similarity index 99% rename from operators/epsilon/avx/P2ElementwiseEpsilonAnnulusMap_1_1_computeInverseDiagonalOperatorValues_macro_2D.cpp rename to operators/epsilon/avx/P2ElementwiseEpsilonAnnulusMap_1_1_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilonAnnulusMap_1_1_macro_2D.cpp index b4709967..3a670082 100644 --- a/operators/epsilon/avx/P2ElementwiseEpsilonAnnulusMap_1_1_computeInverseDiagonalOperatorValues_macro_2D.cpp +++ b/operators/epsilon/avx/P2ElementwiseEpsilonAnnulusMap_1_1_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilonAnnulusMap_1_1_macro_2D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseEpsilonAnnulusMap_1_1::computeInverseDiagonalOperatorValues_macro_2D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const +void P2ElementwiseEpsilonAnnulusMap_1_1::computeInverseDiagonalOperatorValues_P2ElementwiseEpsilonAnnulusMap_1_1_macro_2D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const { { const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669}; diff --git a/operators/epsilon/avx/P2ElementwiseEpsilonIcosahedralShellMap_0_0_apply_macro_3D.cpp b/operators/epsilon/avx/P2ElementwiseEpsilonIcosahedralShellMap_0_0_apply_P2ElementwiseEpsilonIcosahedralShellMap_0_0_macro_3D.cpp similarity index 99% rename from operators/epsilon/avx/P2ElementwiseEpsilonIcosahedralShellMap_0_0_apply_macro_3D.cpp rename to operators/epsilon/avx/P2ElementwiseEpsilonIcosahedralShellMap_0_0_apply_P2ElementwiseEpsilonIcosahedralShellMap_0_0_macro_3D.cpp index 48a9372c..396e50cb 100644 --- a/operators/epsilon/avx/P2ElementwiseEpsilonIcosahedralShellMap_0_0_apply_macro_3D.cpp +++ b/operators/epsilon/avx/P2ElementwiseEpsilonIcosahedralShellMap_0_0_apply_P2ElementwiseEpsilonIcosahedralShellMap_0_0_macro_3D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseEpsilonIcosahedralShellMap_0_0::apply_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const +void P2ElementwiseEpsilonIcosahedralShellMap_0_0::apply_P2ElementwiseEpsilonIcosahedralShellMap_0_0_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const { { const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983}; diff --git a/operators/epsilon/avx/P2ElementwiseEpsilonIcosahedralShellMap_0_0_computeInverseDiagonalOperatorValues_macro_3D.cpp b/operators/epsilon/avx/P2ElementwiseEpsilonIcosahedralShellMap_0_0_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilonIcosahedralShellMap_0_0_macro_3D.cpp similarity index 99% rename from operators/epsilon/avx/P2ElementwiseEpsilonIcosahedralShellMap_0_0_computeInverseDiagonalOperatorValues_macro_3D.cpp rename to operators/epsilon/avx/P2ElementwiseEpsilonIcosahedralShellMap_0_0_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilonIcosahedralShellMap_0_0_macro_3D.cpp index 43f779ee..491f16c8 100644 --- a/operators/epsilon/avx/P2ElementwiseEpsilonIcosahedralShellMap_0_0_computeInverseDiagonalOperatorValues_macro_3D.cpp +++ b/operators/epsilon/avx/P2ElementwiseEpsilonIcosahedralShellMap_0_0_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilonIcosahedralShellMap_0_0_macro_3D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseEpsilonIcosahedralShellMap_0_0::computeInverseDiagonalOperatorValues_macro_3D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const +void P2ElementwiseEpsilonIcosahedralShellMap_0_0::computeInverseDiagonalOperatorValues_P2ElementwiseEpsilonIcosahedralShellMap_0_0_macro_3D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const { { const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983}; diff --git a/operators/epsilon/avx/P2ElementwiseEpsilonIcosahedralShellMap_0_1_apply_macro_3D.cpp b/operators/epsilon/avx/P2ElementwiseEpsilonIcosahedralShellMap_0_1_apply_P2ElementwiseEpsilonIcosahedralShellMap_0_1_macro_3D.cpp similarity index 99% rename from operators/epsilon/avx/P2ElementwiseEpsilonIcosahedralShellMap_0_1_apply_macro_3D.cpp rename to operators/epsilon/avx/P2ElementwiseEpsilonIcosahedralShellMap_0_1_apply_P2ElementwiseEpsilonIcosahedralShellMap_0_1_macro_3D.cpp index 9cd33c41..46604aa5 100644 --- a/operators/epsilon/avx/P2ElementwiseEpsilonIcosahedralShellMap_0_1_apply_macro_3D.cpp +++ b/operators/epsilon/avx/P2ElementwiseEpsilonIcosahedralShellMap_0_1_apply_P2ElementwiseEpsilonIcosahedralShellMap_0_1_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseEpsilonIcosahedralShellMap_0_1::apply_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const +void P2ElementwiseEpsilonIcosahedralShellMap_0_1::apply_P2ElementwiseEpsilonIcosahedralShellMap_0_1_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const { { const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983}; diff --git a/operators/epsilon/avx/P2ElementwiseEpsilonIcosahedralShellMap_0_2_apply_macro_3D.cpp b/operators/epsilon/avx/P2ElementwiseEpsilonIcosahedralShellMap_0_2_apply_P2ElementwiseEpsilonIcosahedralShellMap_0_2_macro_3D.cpp similarity index 99% rename from operators/epsilon/avx/P2ElementwiseEpsilonIcosahedralShellMap_0_2_apply_macro_3D.cpp rename to operators/epsilon/avx/P2ElementwiseEpsilonIcosahedralShellMap_0_2_apply_P2ElementwiseEpsilonIcosahedralShellMap_0_2_macro_3D.cpp index 4c6a2797..d95e76e4 100644 --- a/operators/epsilon/avx/P2ElementwiseEpsilonIcosahedralShellMap_0_2_apply_macro_3D.cpp +++ b/operators/epsilon/avx/P2ElementwiseEpsilonIcosahedralShellMap_0_2_apply_P2ElementwiseEpsilonIcosahedralShellMap_0_2_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseEpsilonIcosahedralShellMap_0_2::apply_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const +void P2ElementwiseEpsilonIcosahedralShellMap_0_2::apply_P2ElementwiseEpsilonIcosahedralShellMap_0_2_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const { { const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983}; diff --git a/operators/epsilon/avx/P2ElementwiseEpsilonIcosahedralShellMap_1_0_apply_macro_3D.cpp b/operators/epsilon/avx/P2ElementwiseEpsilonIcosahedralShellMap_1_0_apply_P2ElementwiseEpsilonIcosahedralShellMap_1_0_macro_3D.cpp similarity index 99% rename from operators/epsilon/avx/P2ElementwiseEpsilonIcosahedralShellMap_1_0_apply_macro_3D.cpp rename to operators/epsilon/avx/P2ElementwiseEpsilonIcosahedralShellMap_1_0_apply_P2ElementwiseEpsilonIcosahedralShellMap_1_0_macro_3D.cpp index 7b297955..d6e58a78 100644 --- a/operators/epsilon/avx/P2ElementwiseEpsilonIcosahedralShellMap_1_0_apply_macro_3D.cpp +++ b/operators/epsilon/avx/P2ElementwiseEpsilonIcosahedralShellMap_1_0_apply_P2ElementwiseEpsilonIcosahedralShellMap_1_0_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseEpsilonIcosahedralShellMap_1_0::apply_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const +void P2ElementwiseEpsilonIcosahedralShellMap_1_0::apply_P2ElementwiseEpsilonIcosahedralShellMap_1_0_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const { { const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983}; diff --git a/operators/epsilon/avx/P2ElementwiseEpsilonIcosahedralShellMap_1_1_apply_macro_3D.cpp b/operators/epsilon/avx/P2ElementwiseEpsilonIcosahedralShellMap_1_1_apply_P2ElementwiseEpsilonIcosahedralShellMap_1_1_macro_3D.cpp similarity index 99% rename from operators/epsilon/avx/P2ElementwiseEpsilonIcosahedralShellMap_1_1_apply_macro_3D.cpp rename to operators/epsilon/avx/P2ElementwiseEpsilonIcosahedralShellMap_1_1_apply_P2ElementwiseEpsilonIcosahedralShellMap_1_1_macro_3D.cpp index 598674e8..689f4fe7 100644 --- a/operators/epsilon/avx/P2ElementwiseEpsilonIcosahedralShellMap_1_1_apply_macro_3D.cpp +++ b/operators/epsilon/avx/P2ElementwiseEpsilonIcosahedralShellMap_1_1_apply_P2ElementwiseEpsilonIcosahedralShellMap_1_1_macro_3D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseEpsilonIcosahedralShellMap_1_1::apply_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const +void P2ElementwiseEpsilonIcosahedralShellMap_1_1::apply_P2ElementwiseEpsilonIcosahedralShellMap_1_1_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const { { const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983}; diff --git a/operators/epsilon/avx/P2ElementwiseEpsilonIcosahedralShellMap_1_1_computeInverseDiagonalOperatorValues_macro_3D.cpp b/operators/epsilon/avx/P2ElementwiseEpsilonIcosahedralShellMap_1_1_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilonIcosahedralShellMap_1_1_macro_3D.cpp similarity index 99% rename from operators/epsilon/avx/P2ElementwiseEpsilonIcosahedralShellMap_1_1_computeInverseDiagonalOperatorValues_macro_3D.cpp rename to operators/epsilon/avx/P2ElementwiseEpsilonIcosahedralShellMap_1_1_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilonIcosahedralShellMap_1_1_macro_3D.cpp index 005e2796..3f3d7eb3 100644 --- a/operators/epsilon/avx/P2ElementwiseEpsilonIcosahedralShellMap_1_1_computeInverseDiagonalOperatorValues_macro_3D.cpp +++ b/operators/epsilon/avx/P2ElementwiseEpsilonIcosahedralShellMap_1_1_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilonIcosahedralShellMap_1_1_macro_3D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseEpsilonIcosahedralShellMap_1_1::computeInverseDiagonalOperatorValues_macro_3D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const +void P2ElementwiseEpsilonIcosahedralShellMap_1_1::computeInverseDiagonalOperatorValues_P2ElementwiseEpsilonIcosahedralShellMap_1_1_macro_3D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const { { const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983}; diff --git a/operators/epsilon/avx/P2ElementwiseEpsilonIcosahedralShellMap_1_2_apply_macro_3D.cpp b/operators/epsilon/avx/P2ElementwiseEpsilonIcosahedralShellMap_1_2_apply_P2ElementwiseEpsilonIcosahedralShellMap_1_2_macro_3D.cpp similarity index 99% rename from operators/epsilon/avx/P2ElementwiseEpsilonIcosahedralShellMap_1_2_apply_macro_3D.cpp rename to operators/epsilon/avx/P2ElementwiseEpsilonIcosahedralShellMap_1_2_apply_P2ElementwiseEpsilonIcosahedralShellMap_1_2_macro_3D.cpp index bafa0faf..4ec48e9b 100644 --- a/operators/epsilon/avx/P2ElementwiseEpsilonIcosahedralShellMap_1_2_apply_macro_3D.cpp +++ b/operators/epsilon/avx/P2ElementwiseEpsilonIcosahedralShellMap_1_2_apply_P2ElementwiseEpsilonIcosahedralShellMap_1_2_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseEpsilonIcosahedralShellMap_1_2::apply_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const +void P2ElementwiseEpsilonIcosahedralShellMap_1_2::apply_P2ElementwiseEpsilonIcosahedralShellMap_1_2_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const { { const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983}; diff --git a/operators/epsilon/avx/P2ElementwiseEpsilonIcosahedralShellMap_2_0_apply_macro_3D.cpp b/operators/epsilon/avx/P2ElementwiseEpsilonIcosahedralShellMap_2_0_apply_P2ElementwiseEpsilonIcosahedralShellMap_2_0_macro_3D.cpp similarity index 99% rename from operators/epsilon/avx/P2ElementwiseEpsilonIcosahedralShellMap_2_0_apply_macro_3D.cpp rename to operators/epsilon/avx/P2ElementwiseEpsilonIcosahedralShellMap_2_0_apply_P2ElementwiseEpsilonIcosahedralShellMap_2_0_macro_3D.cpp index 17d106ef..878c913b 100644 --- a/operators/epsilon/avx/P2ElementwiseEpsilonIcosahedralShellMap_2_0_apply_macro_3D.cpp +++ b/operators/epsilon/avx/P2ElementwiseEpsilonIcosahedralShellMap_2_0_apply_P2ElementwiseEpsilonIcosahedralShellMap_2_0_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseEpsilonIcosahedralShellMap_2_0::apply_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const +void P2ElementwiseEpsilonIcosahedralShellMap_2_0::apply_P2ElementwiseEpsilonIcosahedralShellMap_2_0_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const { { const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983}; diff --git a/operators/epsilon/avx/P2ElementwiseEpsilonIcosahedralShellMap_2_1_apply_macro_3D.cpp b/operators/epsilon/avx/P2ElementwiseEpsilonIcosahedralShellMap_2_1_apply_P2ElementwiseEpsilonIcosahedralShellMap_2_1_macro_3D.cpp similarity index 99% rename from operators/epsilon/avx/P2ElementwiseEpsilonIcosahedralShellMap_2_1_apply_macro_3D.cpp rename to operators/epsilon/avx/P2ElementwiseEpsilonIcosahedralShellMap_2_1_apply_P2ElementwiseEpsilonIcosahedralShellMap_2_1_macro_3D.cpp index 66e3853b..a8829f8f 100644 --- a/operators/epsilon/avx/P2ElementwiseEpsilonIcosahedralShellMap_2_1_apply_macro_3D.cpp +++ b/operators/epsilon/avx/P2ElementwiseEpsilonIcosahedralShellMap_2_1_apply_P2ElementwiseEpsilonIcosahedralShellMap_2_1_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseEpsilonIcosahedralShellMap_2_1::apply_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const +void P2ElementwiseEpsilonIcosahedralShellMap_2_1::apply_P2ElementwiseEpsilonIcosahedralShellMap_2_1_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const { { const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983}; diff --git a/operators/epsilon/avx/P2ElementwiseEpsilonIcosahedralShellMap_2_2_apply_macro_3D.cpp b/operators/epsilon/avx/P2ElementwiseEpsilonIcosahedralShellMap_2_2_apply_P2ElementwiseEpsilonIcosahedralShellMap_2_2_macro_3D.cpp similarity index 99% rename from operators/epsilon/avx/P2ElementwiseEpsilonIcosahedralShellMap_2_2_apply_macro_3D.cpp rename to operators/epsilon/avx/P2ElementwiseEpsilonIcosahedralShellMap_2_2_apply_P2ElementwiseEpsilonIcosahedralShellMap_2_2_macro_3D.cpp index 9c538b03..7d75a6ae 100644 --- a/operators/epsilon/avx/P2ElementwiseEpsilonIcosahedralShellMap_2_2_apply_macro_3D.cpp +++ b/operators/epsilon/avx/P2ElementwiseEpsilonIcosahedralShellMap_2_2_apply_P2ElementwiseEpsilonIcosahedralShellMap_2_2_macro_3D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseEpsilonIcosahedralShellMap_2_2::apply_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const +void P2ElementwiseEpsilonIcosahedralShellMap_2_2::apply_P2ElementwiseEpsilonIcosahedralShellMap_2_2_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const { { const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983}; diff --git a/operators/epsilon/avx/P2ElementwiseEpsilonIcosahedralShellMap_2_2_computeInverseDiagonalOperatorValues_macro_3D.cpp b/operators/epsilon/avx/P2ElementwiseEpsilonIcosahedralShellMap_2_2_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilonIcosahedralShellMap_2_2_macro_3D.cpp similarity index 99% rename from operators/epsilon/avx/P2ElementwiseEpsilonIcosahedralShellMap_2_2_computeInverseDiagonalOperatorValues_macro_3D.cpp rename to operators/epsilon/avx/P2ElementwiseEpsilonIcosahedralShellMap_2_2_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilonIcosahedralShellMap_2_2_macro_3D.cpp index 012da950..112c7d97 100644 --- a/operators/epsilon/avx/P2ElementwiseEpsilonIcosahedralShellMap_2_2_computeInverseDiagonalOperatorValues_macro_3D.cpp +++ b/operators/epsilon/avx/P2ElementwiseEpsilonIcosahedralShellMap_2_2_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilonIcosahedralShellMap_2_2_macro_3D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseEpsilonIcosahedralShellMap_2_2::computeInverseDiagonalOperatorValues_macro_3D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const +void P2ElementwiseEpsilonIcosahedralShellMap_2_2::computeInverseDiagonalOperatorValues_P2ElementwiseEpsilonIcosahedralShellMap_2_2_macro_3D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const { { const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983}; diff --git a/operators/epsilon/avx/P2ElementwiseEpsilon_0_0_apply_macro_2D.cpp b/operators/epsilon/avx/P2ElementwiseEpsilon_0_0_apply_P2ElementwiseEpsilon_0_0_macro_2D.cpp similarity index 99% rename from operators/epsilon/avx/P2ElementwiseEpsilon_0_0_apply_macro_2D.cpp rename to operators/epsilon/avx/P2ElementwiseEpsilon_0_0_apply_P2ElementwiseEpsilon_0_0_macro_2D.cpp index 42313668..63d3593b 100644 --- a/operators/epsilon/avx/P2ElementwiseEpsilon_0_0_apply_macro_2D.cpp +++ b/operators/epsilon/avx/P2ElementwiseEpsilon_0_0_apply_P2ElementwiseEpsilon_0_0_macro_2D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseEpsilon_0_0::apply_macro_2D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseEpsilon_0_0::apply_P2ElementwiseEpsilon_0_0_macro_2D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.16666666666666666, 0.16666666666666666, 0.16666666666666666}; diff --git a/operators/epsilon/avx/P2ElementwiseEpsilon_0_0_apply_macro_3D.cpp b/operators/epsilon/avx/P2ElementwiseEpsilon_0_0_apply_P2ElementwiseEpsilon_0_0_macro_3D.cpp similarity index 99% rename from operators/epsilon/avx/P2ElementwiseEpsilon_0_0_apply_macro_3D.cpp rename to operators/epsilon/avx/P2ElementwiseEpsilon_0_0_apply_P2ElementwiseEpsilon_0_0_macro_3D.cpp index a8bd0968..36f61a5e 100644 --- a/operators/epsilon/avx/P2ElementwiseEpsilon_0_0_apply_macro_3D.cpp +++ b/operators/epsilon/avx/P2ElementwiseEpsilon_0_0_apply_P2ElementwiseEpsilon_0_0_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseEpsilon_0_0::apply_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseEpsilon_0_0::apply_P2ElementwiseEpsilon_0_0_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.041666666666666657, 0.041666666666666657, 0.041666666666666657, 0.041666666666666657}; diff --git a/operators/epsilon/avx/P2ElementwiseEpsilon_0_0_computeInverseDiagonalOperatorValues_macro_2D.cpp b/operators/epsilon/avx/P2ElementwiseEpsilon_0_0_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilon_0_0_macro_2D.cpp similarity index 99% rename from operators/epsilon/avx/P2ElementwiseEpsilon_0_0_computeInverseDiagonalOperatorValues_macro_2D.cpp rename to operators/epsilon/avx/P2ElementwiseEpsilon_0_0_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilon_0_0_macro_2D.cpp index 32acd0de..9b7c8917 100644 --- a/operators/epsilon/avx/P2ElementwiseEpsilon_0_0_computeInverseDiagonalOperatorValues_macro_2D.cpp +++ b/operators/epsilon/avx/P2ElementwiseEpsilon_0_0_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilon_0_0_macro_2D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseEpsilon_0_0::computeInverseDiagonalOperatorValues_macro_2D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseEpsilon_0_0::computeInverseDiagonalOperatorValues_P2ElementwiseEpsilon_0_0_macro_2D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.16666666666666666, 0.16666666666666666, 0.16666666666666666}; diff --git a/operators/epsilon/avx/P2ElementwiseEpsilon_0_0_computeInverseDiagonalOperatorValues_macro_3D.cpp b/operators/epsilon/avx/P2ElementwiseEpsilon_0_0_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilon_0_0_macro_3D.cpp similarity index 99% rename from operators/epsilon/avx/P2ElementwiseEpsilon_0_0_computeInverseDiagonalOperatorValues_macro_3D.cpp rename to operators/epsilon/avx/P2ElementwiseEpsilon_0_0_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilon_0_0_macro_3D.cpp index 2313d32f..21a76a0d 100644 --- a/operators/epsilon/avx/P2ElementwiseEpsilon_0_0_computeInverseDiagonalOperatorValues_macro_3D.cpp +++ b/operators/epsilon/avx/P2ElementwiseEpsilon_0_0_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilon_0_0_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseEpsilon_0_0::computeInverseDiagonalOperatorValues_macro_3D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseEpsilon_0_0::computeInverseDiagonalOperatorValues_P2ElementwiseEpsilon_0_0_macro_3D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.041666666666666657, 0.041666666666666657, 0.041666666666666657, 0.041666666666666657}; diff --git a/operators/epsilon/avx/P2ElementwiseEpsilon_0_1_apply_macro_2D.cpp b/operators/epsilon/avx/P2ElementwiseEpsilon_0_1_apply_P2ElementwiseEpsilon_0_1_macro_2D.cpp similarity index 99% rename from operators/epsilon/avx/P2ElementwiseEpsilon_0_1_apply_macro_2D.cpp rename to operators/epsilon/avx/P2ElementwiseEpsilon_0_1_apply_P2ElementwiseEpsilon_0_1_macro_2D.cpp index e991c681..bc7617f3 100644 --- a/operators/epsilon/avx/P2ElementwiseEpsilon_0_1_apply_macro_2D.cpp +++ b/operators/epsilon/avx/P2ElementwiseEpsilon_0_1_apply_P2ElementwiseEpsilon_0_1_macro_2D.cpp @@ -41,6 +41,10 @@ + + + + @@ -54,7 +58,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseEpsilon_0_1::apply_macro_2D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseEpsilon_0_1::apply_P2ElementwiseEpsilon_0_1_macro_2D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.16666666666666666, 0.16666666666666666, 0.16666666666666666}; diff --git a/operators/epsilon/avx/P2ElementwiseEpsilon_0_1_apply_macro_3D.cpp b/operators/epsilon/avx/P2ElementwiseEpsilon_0_1_apply_P2ElementwiseEpsilon_0_1_macro_3D.cpp similarity index 99% rename from operators/epsilon/avx/P2ElementwiseEpsilon_0_1_apply_macro_3D.cpp rename to operators/epsilon/avx/P2ElementwiseEpsilon_0_1_apply_P2ElementwiseEpsilon_0_1_macro_3D.cpp index 729b987d..8e91b3bc 100644 --- a/operators/epsilon/avx/P2ElementwiseEpsilon_0_1_apply_macro_3D.cpp +++ b/operators/epsilon/avx/P2ElementwiseEpsilon_0_1_apply_P2ElementwiseEpsilon_0_1_macro_3D.cpp @@ -41,6 +41,10 @@ + + + + @@ -54,7 +58,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseEpsilon_0_1::apply_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseEpsilon_0_1::apply_P2ElementwiseEpsilon_0_1_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.041666666666666657, 0.041666666666666657, 0.041666666666666657, 0.041666666666666657}; diff --git a/operators/epsilon/avx/P2ElementwiseEpsilon_0_2_apply_macro_3D.cpp b/operators/epsilon/avx/P2ElementwiseEpsilon_0_2_apply_P2ElementwiseEpsilon_0_2_macro_3D.cpp similarity index 99% rename from operators/epsilon/avx/P2ElementwiseEpsilon_0_2_apply_macro_3D.cpp rename to operators/epsilon/avx/P2ElementwiseEpsilon_0_2_apply_P2ElementwiseEpsilon_0_2_macro_3D.cpp index fdc396b2..71be2bfd 100644 --- a/operators/epsilon/avx/P2ElementwiseEpsilon_0_2_apply_macro_3D.cpp +++ b/operators/epsilon/avx/P2ElementwiseEpsilon_0_2_apply_P2ElementwiseEpsilon_0_2_macro_3D.cpp @@ -41,6 +41,10 @@ + + + + @@ -54,7 +58,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseEpsilon_0_2::apply_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseEpsilon_0_2::apply_P2ElementwiseEpsilon_0_2_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.041666666666666657, 0.041666666666666657, 0.041666666666666657, 0.041666666666666657}; diff --git a/operators/epsilon/avx/P2ElementwiseEpsilon_1_0_apply_macro_2D.cpp b/operators/epsilon/avx/P2ElementwiseEpsilon_1_0_apply_P2ElementwiseEpsilon_1_0_macro_2D.cpp similarity index 99% rename from operators/epsilon/avx/P2ElementwiseEpsilon_1_0_apply_macro_2D.cpp rename to operators/epsilon/avx/P2ElementwiseEpsilon_1_0_apply_P2ElementwiseEpsilon_1_0_macro_2D.cpp index 228462fe..49b83fbe 100644 --- a/operators/epsilon/avx/P2ElementwiseEpsilon_1_0_apply_macro_2D.cpp +++ b/operators/epsilon/avx/P2ElementwiseEpsilon_1_0_apply_P2ElementwiseEpsilon_1_0_macro_2D.cpp @@ -41,6 +41,10 @@ + + + + @@ -54,7 +58,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseEpsilon_1_0::apply_macro_2D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseEpsilon_1_0::apply_P2ElementwiseEpsilon_1_0_macro_2D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.16666666666666666, 0.16666666666666666, 0.16666666666666666}; diff --git a/operators/epsilon/avx/P2ElementwiseEpsilon_1_0_apply_macro_3D.cpp b/operators/epsilon/avx/P2ElementwiseEpsilon_1_0_apply_P2ElementwiseEpsilon_1_0_macro_3D.cpp similarity index 99% rename from operators/epsilon/avx/P2ElementwiseEpsilon_1_0_apply_macro_3D.cpp rename to operators/epsilon/avx/P2ElementwiseEpsilon_1_0_apply_P2ElementwiseEpsilon_1_0_macro_3D.cpp index 64c043ff..3494ba7f 100644 --- a/operators/epsilon/avx/P2ElementwiseEpsilon_1_0_apply_macro_3D.cpp +++ b/operators/epsilon/avx/P2ElementwiseEpsilon_1_0_apply_P2ElementwiseEpsilon_1_0_macro_3D.cpp @@ -41,6 +41,10 @@ + + + + @@ -54,7 +58,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseEpsilon_1_0::apply_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseEpsilon_1_0::apply_P2ElementwiseEpsilon_1_0_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.041666666666666657, 0.041666666666666657, 0.041666666666666657, 0.041666666666666657}; diff --git a/operators/epsilon/avx/P2ElementwiseEpsilon_1_1_apply_macro_2D.cpp b/operators/epsilon/avx/P2ElementwiseEpsilon_1_1_apply_P2ElementwiseEpsilon_1_1_macro_2D.cpp similarity index 99% rename from operators/epsilon/avx/P2ElementwiseEpsilon_1_1_apply_macro_2D.cpp rename to operators/epsilon/avx/P2ElementwiseEpsilon_1_1_apply_P2ElementwiseEpsilon_1_1_macro_2D.cpp index a89339ae..6df91095 100644 --- a/operators/epsilon/avx/P2ElementwiseEpsilon_1_1_apply_macro_2D.cpp +++ b/operators/epsilon/avx/P2ElementwiseEpsilon_1_1_apply_P2ElementwiseEpsilon_1_1_macro_2D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseEpsilon_1_1::apply_macro_2D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseEpsilon_1_1::apply_P2ElementwiseEpsilon_1_1_macro_2D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.16666666666666666, 0.16666666666666666, 0.16666666666666666}; diff --git a/operators/epsilon/avx/P2ElementwiseEpsilon_1_1_apply_macro_3D.cpp b/operators/epsilon/avx/P2ElementwiseEpsilon_1_1_apply_P2ElementwiseEpsilon_1_1_macro_3D.cpp similarity index 99% rename from operators/epsilon/avx/P2ElementwiseEpsilon_1_1_apply_macro_3D.cpp rename to operators/epsilon/avx/P2ElementwiseEpsilon_1_1_apply_P2ElementwiseEpsilon_1_1_macro_3D.cpp index 76f79e5e..ae126d2a 100644 --- a/operators/epsilon/avx/P2ElementwiseEpsilon_1_1_apply_macro_3D.cpp +++ b/operators/epsilon/avx/P2ElementwiseEpsilon_1_1_apply_P2ElementwiseEpsilon_1_1_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseEpsilon_1_1::apply_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseEpsilon_1_1::apply_P2ElementwiseEpsilon_1_1_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.041666666666666657, 0.041666666666666657, 0.041666666666666657, 0.041666666666666657}; diff --git a/operators/epsilon/avx/P2ElementwiseEpsilon_1_1_computeInverseDiagonalOperatorValues_macro_2D.cpp b/operators/epsilon/avx/P2ElementwiseEpsilon_1_1_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilon_1_1_macro_2D.cpp similarity index 99% rename from operators/epsilon/avx/P2ElementwiseEpsilon_1_1_computeInverseDiagonalOperatorValues_macro_2D.cpp rename to operators/epsilon/avx/P2ElementwiseEpsilon_1_1_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilon_1_1_macro_2D.cpp index d9e7eb50..b55ea264 100644 --- a/operators/epsilon/avx/P2ElementwiseEpsilon_1_1_computeInverseDiagonalOperatorValues_macro_2D.cpp +++ b/operators/epsilon/avx/P2ElementwiseEpsilon_1_1_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilon_1_1_macro_2D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseEpsilon_1_1::computeInverseDiagonalOperatorValues_macro_2D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseEpsilon_1_1::computeInverseDiagonalOperatorValues_P2ElementwiseEpsilon_1_1_macro_2D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.16666666666666666, 0.16666666666666666, 0.16666666666666666}; diff --git a/operators/epsilon/avx/P2ElementwiseEpsilon_1_1_computeInverseDiagonalOperatorValues_macro_3D.cpp b/operators/epsilon/avx/P2ElementwiseEpsilon_1_1_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilon_1_1_macro_3D.cpp similarity index 99% rename from operators/epsilon/avx/P2ElementwiseEpsilon_1_1_computeInverseDiagonalOperatorValues_macro_3D.cpp rename to operators/epsilon/avx/P2ElementwiseEpsilon_1_1_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilon_1_1_macro_3D.cpp index e6320861..11c5733e 100644 --- a/operators/epsilon/avx/P2ElementwiseEpsilon_1_1_computeInverseDiagonalOperatorValues_macro_3D.cpp +++ b/operators/epsilon/avx/P2ElementwiseEpsilon_1_1_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilon_1_1_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseEpsilon_1_1::computeInverseDiagonalOperatorValues_macro_3D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseEpsilon_1_1::computeInverseDiagonalOperatorValues_P2ElementwiseEpsilon_1_1_macro_3D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.041666666666666657, 0.041666666666666657, 0.041666666666666657, 0.041666666666666657}; diff --git a/operators/epsilon/avx/P2ElementwiseEpsilon_1_2_apply_macro_3D.cpp b/operators/epsilon/avx/P2ElementwiseEpsilon_1_2_apply_P2ElementwiseEpsilon_1_2_macro_3D.cpp similarity index 99% rename from operators/epsilon/avx/P2ElementwiseEpsilon_1_2_apply_macro_3D.cpp rename to operators/epsilon/avx/P2ElementwiseEpsilon_1_2_apply_P2ElementwiseEpsilon_1_2_macro_3D.cpp index 13ddacc4..86cf0c46 100644 --- a/operators/epsilon/avx/P2ElementwiseEpsilon_1_2_apply_macro_3D.cpp +++ b/operators/epsilon/avx/P2ElementwiseEpsilon_1_2_apply_P2ElementwiseEpsilon_1_2_macro_3D.cpp @@ -41,6 +41,10 @@ + + + + @@ -54,7 +58,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseEpsilon_1_2::apply_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseEpsilon_1_2::apply_P2ElementwiseEpsilon_1_2_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.041666666666666657, 0.041666666666666657, 0.041666666666666657, 0.041666666666666657}; diff --git a/operators/epsilon/avx/P2ElementwiseEpsilon_2_0_apply_macro_3D.cpp b/operators/epsilon/avx/P2ElementwiseEpsilon_2_0_apply_P2ElementwiseEpsilon_2_0_macro_3D.cpp similarity index 99% rename from operators/epsilon/avx/P2ElementwiseEpsilon_2_0_apply_macro_3D.cpp rename to operators/epsilon/avx/P2ElementwiseEpsilon_2_0_apply_P2ElementwiseEpsilon_2_0_macro_3D.cpp index 55827d3e..067d5ebd 100644 --- a/operators/epsilon/avx/P2ElementwiseEpsilon_2_0_apply_macro_3D.cpp +++ b/operators/epsilon/avx/P2ElementwiseEpsilon_2_0_apply_P2ElementwiseEpsilon_2_0_macro_3D.cpp @@ -41,6 +41,10 @@ + + + + @@ -54,7 +58,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseEpsilon_2_0::apply_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseEpsilon_2_0::apply_P2ElementwiseEpsilon_2_0_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.041666666666666657, 0.041666666666666657, 0.041666666666666657, 0.041666666666666657}; diff --git a/operators/epsilon/avx/P2ElementwiseEpsilon_2_1_apply_macro_3D.cpp b/operators/epsilon/avx/P2ElementwiseEpsilon_2_1_apply_P2ElementwiseEpsilon_2_1_macro_3D.cpp similarity index 99% rename from operators/epsilon/avx/P2ElementwiseEpsilon_2_1_apply_macro_3D.cpp rename to operators/epsilon/avx/P2ElementwiseEpsilon_2_1_apply_P2ElementwiseEpsilon_2_1_macro_3D.cpp index 1da51cbd..872f66db 100644 --- a/operators/epsilon/avx/P2ElementwiseEpsilon_2_1_apply_macro_3D.cpp +++ b/operators/epsilon/avx/P2ElementwiseEpsilon_2_1_apply_P2ElementwiseEpsilon_2_1_macro_3D.cpp @@ -41,6 +41,10 @@ + + + + @@ -54,7 +58,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseEpsilon_2_1::apply_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseEpsilon_2_1::apply_P2ElementwiseEpsilon_2_1_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.041666666666666657, 0.041666666666666657, 0.041666666666666657, 0.041666666666666657}; diff --git a/operators/epsilon/avx/P2ElementwiseEpsilon_2_2_apply_macro_3D.cpp b/operators/epsilon/avx/P2ElementwiseEpsilon_2_2_apply_P2ElementwiseEpsilon_2_2_macro_3D.cpp similarity index 99% rename from operators/epsilon/avx/P2ElementwiseEpsilon_2_2_apply_macro_3D.cpp rename to operators/epsilon/avx/P2ElementwiseEpsilon_2_2_apply_P2ElementwiseEpsilon_2_2_macro_3D.cpp index cfb3b76f..41c47dd7 100644 --- a/operators/epsilon/avx/P2ElementwiseEpsilon_2_2_apply_macro_3D.cpp +++ b/operators/epsilon/avx/P2ElementwiseEpsilon_2_2_apply_P2ElementwiseEpsilon_2_2_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseEpsilon_2_2::apply_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseEpsilon_2_2::apply_P2ElementwiseEpsilon_2_2_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.041666666666666657, 0.041666666666666657, 0.041666666666666657, 0.041666666666666657}; diff --git a/operators/epsilon/avx/P2ElementwiseEpsilon_2_2_computeInverseDiagonalOperatorValues_macro_3D.cpp b/operators/epsilon/avx/P2ElementwiseEpsilon_2_2_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilon_2_2_macro_3D.cpp similarity index 99% rename from operators/epsilon/avx/P2ElementwiseEpsilon_2_2_computeInverseDiagonalOperatorValues_macro_3D.cpp rename to operators/epsilon/avx/P2ElementwiseEpsilon_2_2_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilon_2_2_macro_3D.cpp index af6887f0..bfbc9bdc 100644 --- a/operators/epsilon/avx/P2ElementwiseEpsilon_2_2_computeInverseDiagonalOperatorValues_macro_3D.cpp +++ b/operators/epsilon/avx/P2ElementwiseEpsilon_2_2_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilon_2_2_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseEpsilon_2_2::computeInverseDiagonalOperatorValues_macro_3D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseEpsilon_2_2::computeInverseDiagonalOperatorValues_P2ElementwiseEpsilon_2_2_macro_3D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.041666666666666657, 0.041666666666666657, 0.041666666666666657, 0.041666666666666657}; diff --git a/operators/epsilon/noarch/P2ElementwiseEpsilonAnnulusMap_0_0_apply_macro_2D.cpp b/operators/epsilon/noarch/P2ElementwiseEpsilonAnnulusMap_0_0_apply_P2ElementwiseEpsilonAnnulusMap_0_0_macro_2D.cpp similarity index 98% rename from operators/epsilon/noarch/P2ElementwiseEpsilonAnnulusMap_0_0_apply_macro_2D.cpp rename to operators/epsilon/noarch/P2ElementwiseEpsilonAnnulusMap_0_0_apply_P2ElementwiseEpsilonAnnulusMap_0_0_macro_2D.cpp index 635fcc9a..8859de5a 100644 --- a/operators/epsilon/noarch/P2ElementwiseEpsilonAnnulusMap_0_0_apply_macro_2D.cpp +++ b/operators/epsilon/noarch/P2ElementwiseEpsilonAnnulusMap_0_0_apply_P2ElementwiseEpsilonAnnulusMap_0_0_macro_2D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseEpsilonAnnulusMap_0_0::apply_macro_2D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const +void P2ElementwiseEpsilonAnnulusMap_0_0::apply_P2ElementwiseEpsilonAnnulusMap_0_0_macro_2D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const { { const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669}; diff --git a/operators/epsilon/noarch/P2ElementwiseEpsilonAnnulusMap_0_0_computeInverseDiagonalOperatorValues_macro_2D.cpp b/operators/epsilon/noarch/P2ElementwiseEpsilonAnnulusMap_0_0_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilonAnnulusMap_0_0_macro_2D.cpp similarity index 98% rename from operators/epsilon/noarch/P2ElementwiseEpsilonAnnulusMap_0_0_computeInverseDiagonalOperatorValues_macro_2D.cpp rename to operators/epsilon/noarch/P2ElementwiseEpsilonAnnulusMap_0_0_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilonAnnulusMap_0_0_macro_2D.cpp index f624a381..7023d510 100644 --- a/operators/epsilon/noarch/P2ElementwiseEpsilonAnnulusMap_0_0_computeInverseDiagonalOperatorValues_macro_2D.cpp +++ b/operators/epsilon/noarch/P2ElementwiseEpsilonAnnulusMap_0_0_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilonAnnulusMap_0_0_macro_2D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseEpsilonAnnulusMap_0_0::computeInverseDiagonalOperatorValues_macro_2D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const +void P2ElementwiseEpsilonAnnulusMap_0_0::computeInverseDiagonalOperatorValues_P2ElementwiseEpsilonAnnulusMap_0_0_macro_2D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const { { const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669}; diff --git a/operators/epsilon/noarch/P2ElementwiseEpsilonAnnulusMap_0_0_toMatrix_macro_2D.cpp b/operators/epsilon/noarch/P2ElementwiseEpsilonAnnulusMap_0_0_toMatrix_P2ElementwiseEpsilonAnnulusMap_0_0_macro_2D.cpp similarity index 98% rename from operators/epsilon/noarch/P2ElementwiseEpsilonAnnulusMap_0_0_toMatrix_macro_2D.cpp rename to operators/epsilon/noarch/P2ElementwiseEpsilonAnnulusMap_0_0_toMatrix_P2ElementwiseEpsilonAnnulusMap_0_0_macro_2D.cpp index 45eb4ac0..f88bffdc 100644 --- a/operators/epsilon/noarch/P2ElementwiseEpsilonAnnulusMap_0_0_toMatrix_macro_2D.cpp +++ b/operators/epsilon/noarch/P2ElementwiseEpsilonAnnulusMap_0_0_toMatrix_P2ElementwiseEpsilonAnnulusMap_0_0_macro_2D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseEpsilonAnnulusMap_0_0::toMatrix_macro_2D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const +void P2ElementwiseEpsilonAnnulusMap_0_0::toMatrix_P2ElementwiseEpsilonAnnulusMap_0_0_macro_2D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const { { const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669}; diff --git a/operators/epsilon/noarch/P2ElementwiseEpsilonAnnulusMap_0_1_apply_macro_2D.cpp b/operators/epsilon/noarch/P2ElementwiseEpsilonAnnulusMap_0_1_apply_P2ElementwiseEpsilonAnnulusMap_0_1_macro_2D.cpp similarity index 99% rename from operators/epsilon/noarch/P2ElementwiseEpsilonAnnulusMap_0_1_apply_macro_2D.cpp rename to operators/epsilon/noarch/P2ElementwiseEpsilonAnnulusMap_0_1_apply_P2ElementwiseEpsilonAnnulusMap_0_1_macro_2D.cpp index b0ba0ca1..b6cdec48 100644 --- a/operators/epsilon/noarch/P2ElementwiseEpsilonAnnulusMap_0_1_apply_macro_2D.cpp +++ b/operators/epsilon/noarch/P2ElementwiseEpsilonAnnulusMap_0_1_apply_P2ElementwiseEpsilonAnnulusMap_0_1_macro_2D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseEpsilonAnnulusMap_0_1::apply_macro_2D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const +void P2ElementwiseEpsilonAnnulusMap_0_1::apply_P2ElementwiseEpsilonAnnulusMap_0_1_macro_2D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const { { const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669}; diff --git a/operators/epsilon/noarch/P2ElementwiseEpsilonAnnulusMap_0_1_toMatrix_macro_2D.cpp b/operators/epsilon/noarch/P2ElementwiseEpsilonAnnulusMap_0_1_toMatrix_P2ElementwiseEpsilonAnnulusMap_0_1_macro_2D.cpp similarity index 99% rename from operators/epsilon/noarch/P2ElementwiseEpsilonAnnulusMap_0_1_toMatrix_macro_2D.cpp rename to operators/epsilon/noarch/P2ElementwiseEpsilonAnnulusMap_0_1_toMatrix_P2ElementwiseEpsilonAnnulusMap_0_1_macro_2D.cpp index 6368a651..ef2f5199 100644 --- a/operators/epsilon/noarch/P2ElementwiseEpsilonAnnulusMap_0_1_toMatrix_macro_2D.cpp +++ b/operators/epsilon/noarch/P2ElementwiseEpsilonAnnulusMap_0_1_toMatrix_P2ElementwiseEpsilonAnnulusMap_0_1_macro_2D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseEpsilonAnnulusMap_0_1::toMatrix_macro_2D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const +void P2ElementwiseEpsilonAnnulusMap_0_1::toMatrix_P2ElementwiseEpsilonAnnulusMap_0_1_macro_2D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const { { const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669}; diff --git a/operators/epsilon/noarch/P2ElementwiseEpsilonAnnulusMap_1_0_apply_macro_2D.cpp b/operators/epsilon/noarch/P2ElementwiseEpsilonAnnulusMap_1_0_apply_P2ElementwiseEpsilonAnnulusMap_1_0_macro_2D.cpp similarity index 99% rename from operators/epsilon/noarch/P2ElementwiseEpsilonAnnulusMap_1_0_apply_macro_2D.cpp rename to operators/epsilon/noarch/P2ElementwiseEpsilonAnnulusMap_1_0_apply_P2ElementwiseEpsilonAnnulusMap_1_0_macro_2D.cpp index 39c9b043..d46b1131 100644 --- a/operators/epsilon/noarch/P2ElementwiseEpsilonAnnulusMap_1_0_apply_macro_2D.cpp +++ b/operators/epsilon/noarch/P2ElementwiseEpsilonAnnulusMap_1_0_apply_P2ElementwiseEpsilonAnnulusMap_1_0_macro_2D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseEpsilonAnnulusMap_1_0::apply_macro_2D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const +void P2ElementwiseEpsilonAnnulusMap_1_0::apply_P2ElementwiseEpsilonAnnulusMap_1_0_macro_2D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const { { const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669}; diff --git a/operators/epsilon/noarch/P2ElementwiseEpsilonAnnulusMap_1_0_toMatrix_macro_2D.cpp b/operators/epsilon/noarch/P2ElementwiseEpsilonAnnulusMap_1_0_toMatrix_P2ElementwiseEpsilonAnnulusMap_1_0_macro_2D.cpp similarity index 99% rename from operators/epsilon/noarch/P2ElementwiseEpsilonAnnulusMap_1_0_toMatrix_macro_2D.cpp rename to operators/epsilon/noarch/P2ElementwiseEpsilonAnnulusMap_1_0_toMatrix_P2ElementwiseEpsilonAnnulusMap_1_0_macro_2D.cpp index f5a609f8..17e02897 100644 --- a/operators/epsilon/noarch/P2ElementwiseEpsilonAnnulusMap_1_0_toMatrix_macro_2D.cpp +++ b/operators/epsilon/noarch/P2ElementwiseEpsilonAnnulusMap_1_0_toMatrix_P2ElementwiseEpsilonAnnulusMap_1_0_macro_2D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseEpsilonAnnulusMap_1_0::toMatrix_macro_2D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const +void P2ElementwiseEpsilonAnnulusMap_1_0::toMatrix_P2ElementwiseEpsilonAnnulusMap_1_0_macro_2D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const { { const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669}; diff --git a/operators/epsilon/noarch/P2ElementwiseEpsilonAnnulusMap_1_1_apply_macro_2D.cpp b/operators/epsilon/noarch/P2ElementwiseEpsilonAnnulusMap_1_1_apply_P2ElementwiseEpsilonAnnulusMap_1_1_macro_2D.cpp similarity index 98% rename from operators/epsilon/noarch/P2ElementwiseEpsilonAnnulusMap_1_1_apply_macro_2D.cpp rename to operators/epsilon/noarch/P2ElementwiseEpsilonAnnulusMap_1_1_apply_P2ElementwiseEpsilonAnnulusMap_1_1_macro_2D.cpp index b6cfddab..9ebafa72 100644 --- a/operators/epsilon/noarch/P2ElementwiseEpsilonAnnulusMap_1_1_apply_macro_2D.cpp +++ b/operators/epsilon/noarch/P2ElementwiseEpsilonAnnulusMap_1_1_apply_P2ElementwiseEpsilonAnnulusMap_1_1_macro_2D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseEpsilonAnnulusMap_1_1::apply_macro_2D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const +void P2ElementwiseEpsilonAnnulusMap_1_1::apply_P2ElementwiseEpsilonAnnulusMap_1_1_macro_2D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const { { const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669}; diff --git a/operators/epsilon/noarch/P2ElementwiseEpsilonAnnulusMap_1_1_computeInverseDiagonalOperatorValues_macro_2D.cpp b/operators/epsilon/noarch/P2ElementwiseEpsilonAnnulusMap_1_1_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilonAnnulusMap_1_1_macro_2D.cpp similarity index 98% rename from operators/epsilon/noarch/P2ElementwiseEpsilonAnnulusMap_1_1_computeInverseDiagonalOperatorValues_macro_2D.cpp rename to operators/epsilon/noarch/P2ElementwiseEpsilonAnnulusMap_1_1_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilonAnnulusMap_1_1_macro_2D.cpp index 50bb3db1..2a2c83dc 100644 --- a/operators/epsilon/noarch/P2ElementwiseEpsilonAnnulusMap_1_1_computeInverseDiagonalOperatorValues_macro_2D.cpp +++ b/operators/epsilon/noarch/P2ElementwiseEpsilonAnnulusMap_1_1_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilonAnnulusMap_1_1_macro_2D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseEpsilonAnnulusMap_1_1::computeInverseDiagonalOperatorValues_macro_2D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const +void P2ElementwiseEpsilonAnnulusMap_1_1::computeInverseDiagonalOperatorValues_P2ElementwiseEpsilonAnnulusMap_1_1_macro_2D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const { { const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669}; diff --git a/operators/epsilon/noarch/P2ElementwiseEpsilonAnnulusMap_1_1_toMatrix_macro_2D.cpp b/operators/epsilon/noarch/P2ElementwiseEpsilonAnnulusMap_1_1_toMatrix_P2ElementwiseEpsilonAnnulusMap_1_1_macro_2D.cpp similarity index 98% rename from operators/epsilon/noarch/P2ElementwiseEpsilonAnnulusMap_1_1_toMatrix_macro_2D.cpp rename to operators/epsilon/noarch/P2ElementwiseEpsilonAnnulusMap_1_1_toMatrix_P2ElementwiseEpsilonAnnulusMap_1_1_macro_2D.cpp index ae3d2188..4ff5f3ce 100644 --- a/operators/epsilon/noarch/P2ElementwiseEpsilonAnnulusMap_1_1_toMatrix_macro_2D.cpp +++ b/operators/epsilon/noarch/P2ElementwiseEpsilonAnnulusMap_1_1_toMatrix_P2ElementwiseEpsilonAnnulusMap_1_1_macro_2D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseEpsilonAnnulusMap_1_1::toMatrix_macro_2D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const +void P2ElementwiseEpsilonAnnulusMap_1_1::toMatrix_P2ElementwiseEpsilonAnnulusMap_1_1_macro_2D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const { { const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669}; diff --git a/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_0_0_apply_macro_3D.cpp b/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_0_0_apply_P2ElementwiseEpsilonIcosahedralShellMap_0_0_macro_3D.cpp similarity index 99% rename from operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_0_0_apply_macro_3D.cpp rename to operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_0_0_apply_P2ElementwiseEpsilonIcosahedralShellMap_0_0_macro_3D.cpp index 8d3f4915..d948db60 100644 --- a/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_0_0_apply_macro_3D.cpp +++ b/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_0_0_apply_P2ElementwiseEpsilonIcosahedralShellMap_0_0_macro_3D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseEpsilonIcosahedralShellMap_0_0::apply_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const +void P2ElementwiseEpsilonIcosahedralShellMap_0_0::apply_P2ElementwiseEpsilonIcosahedralShellMap_0_0_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const { { const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983}; diff --git a/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_0_0_computeInverseDiagonalOperatorValues_macro_3D.cpp b/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_0_0_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilonIcosahedralShellMap_0_0_macro_3D.cpp similarity index 99% rename from operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_0_0_computeInverseDiagonalOperatorValues_macro_3D.cpp rename to operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_0_0_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilonIcosahedralShellMap_0_0_macro_3D.cpp index 0429ae90..14a150f5 100644 --- a/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_0_0_computeInverseDiagonalOperatorValues_macro_3D.cpp +++ b/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_0_0_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilonIcosahedralShellMap_0_0_macro_3D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseEpsilonIcosahedralShellMap_0_0::computeInverseDiagonalOperatorValues_macro_3D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const +void P2ElementwiseEpsilonIcosahedralShellMap_0_0::computeInverseDiagonalOperatorValues_P2ElementwiseEpsilonIcosahedralShellMap_0_0_macro_3D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const { { const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983}; diff --git a/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_0_0_toMatrix_macro_3D.cpp b/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_0_0_toMatrix_P2ElementwiseEpsilonIcosahedralShellMap_0_0_macro_3D.cpp similarity index 99% rename from operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_0_0_toMatrix_macro_3D.cpp rename to operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_0_0_toMatrix_P2ElementwiseEpsilonIcosahedralShellMap_0_0_macro_3D.cpp index 6e00075f..24630d2c 100644 --- a/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_0_0_toMatrix_macro_3D.cpp +++ b/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_0_0_toMatrix_P2ElementwiseEpsilonIcosahedralShellMap_0_0_macro_3D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseEpsilonIcosahedralShellMap_0_0::toMatrix_macro_3D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const +void P2ElementwiseEpsilonIcosahedralShellMap_0_0::toMatrix_P2ElementwiseEpsilonIcosahedralShellMap_0_0_macro_3D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const { { const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983}; diff --git a/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_0_1_apply_macro_3D.cpp b/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_0_1_apply_P2ElementwiseEpsilonIcosahedralShellMap_0_1_macro_3D.cpp similarity index 99% rename from operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_0_1_apply_macro_3D.cpp rename to operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_0_1_apply_P2ElementwiseEpsilonIcosahedralShellMap_0_1_macro_3D.cpp index 9c2f8b13..0d6cc152 100644 --- a/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_0_1_apply_macro_3D.cpp +++ b/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_0_1_apply_P2ElementwiseEpsilonIcosahedralShellMap_0_1_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseEpsilonIcosahedralShellMap_0_1::apply_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const +void P2ElementwiseEpsilonIcosahedralShellMap_0_1::apply_P2ElementwiseEpsilonIcosahedralShellMap_0_1_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const { { const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983}; diff --git a/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_0_1_toMatrix_macro_3D.cpp b/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_0_1_toMatrix_P2ElementwiseEpsilonIcosahedralShellMap_0_1_macro_3D.cpp similarity index 99% rename from operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_0_1_toMatrix_macro_3D.cpp rename to operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_0_1_toMatrix_P2ElementwiseEpsilonIcosahedralShellMap_0_1_macro_3D.cpp index 9746d238..cf07447d 100644 --- a/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_0_1_toMatrix_macro_3D.cpp +++ b/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_0_1_toMatrix_P2ElementwiseEpsilonIcosahedralShellMap_0_1_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseEpsilonIcosahedralShellMap_0_1::toMatrix_macro_3D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const +void P2ElementwiseEpsilonIcosahedralShellMap_0_1::toMatrix_P2ElementwiseEpsilonIcosahedralShellMap_0_1_macro_3D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const { { const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983}; diff --git a/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_0_2_apply_macro_3D.cpp b/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_0_2_apply_P2ElementwiseEpsilonIcosahedralShellMap_0_2_macro_3D.cpp similarity index 99% rename from operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_0_2_apply_macro_3D.cpp rename to operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_0_2_apply_P2ElementwiseEpsilonIcosahedralShellMap_0_2_macro_3D.cpp index 40e85e49..093daae6 100644 --- a/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_0_2_apply_macro_3D.cpp +++ b/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_0_2_apply_P2ElementwiseEpsilonIcosahedralShellMap_0_2_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseEpsilonIcosahedralShellMap_0_2::apply_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const +void P2ElementwiseEpsilonIcosahedralShellMap_0_2::apply_P2ElementwiseEpsilonIcosahedralShellMap_0_2_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const { { const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983}; diff --git a/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_0_2_toMatrix_macro_3D.cpp b/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_0_2_toMatrix_P2ElementwiseEpsilonIcosahedralShellMap_0_2_macro_3D.cpp similarity index 99% rename from operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_0_2_toMatrix_macro_3D.cpp rename to operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_0_2_toMatrix_P2ElementwiseEpsilonIcosahedralShellMap_0_2_macro_3D.cpp index 2d1ae586..933f47e1 100644 --- a/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_0_2_toMatrix_macro_3D.cpp +++ b/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_0_2_toMatrix_P2ElementwiseEpsilonIcosahedralShellMap_0_2_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseEpsilonIcosahedralShellMap_0_2::toMatrix_macro_3D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const +void P2ElementwiseEpsilonIcosahedralShellMap_0_2::toMatrix_P2ElementwiseEpsilonIcosahedralShellMap_0_2_macro_3D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const { { const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983}; diff --git a/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_1_0_apply_macro_3D.cpp b/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_1_0_apply_P2ElementwiseEpsilonIcosahedralShellMap_1_0_macro_3D.cpp similarity index 99% rename from operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_1_0_apply_macro_3D.cpp rename to operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_1_0_apply_P2ElementwiseEpsilonIcosahedralShellMap_1_0_macro_3D.cpp index a3c8f831..8ec79a9f 100644 --- a/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_1_0_apply_macro_3D.cpp +++ b/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_1_0_apply_P2ElementwiseEpsilonIcosahedralShellMap_1_0_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseEpsilonIcosahedralShellMap_1_0::apply_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const +void P2ElementwiseEpsilonIcosahedralShellMap_1_0::apply_P2ElementwiseEpsilonIcosahedralShellMap_1_0_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const { { const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983}; diff --git a/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_1_0_toMatrix_macro_3D.cpp b/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_1_0_toMatrix_P2ElementwiseEpsilonIcosahedralShellMap_1_0_macro_3D.cpp similarity index 99% rename from operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_1_0_toMatrix_macro_3D.cpp rename to operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_1_0_toMatrix_P2ElementwiseEpsilonIcosahedralShellMap_1_0_macro_3D.cpp index 6ea180e2..35c9f7c5 100644 --- a/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_1_0_toMatrix_macro_3D.cpp +++ b/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_1_0_toMatrix_P2ElementwiseEpsilonIcosahedralShellMap_1_0_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseEpsilonIcosahedralShellMap_1_0::toMatrix_macro_3D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const +void P2ElementwiseEpsilonIcosahedralShellMap_1_0::toMatrix_P2ElementwiseEpsilonIcosahedralShellMap_1_0_macro_3D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const { { const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983}; diff --git a/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_1_1_apply_macro_3D.cpp b/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_1_1_apply_P2ElementwiseEpsilonIcosahedralShellMap_1_1_macro_3D.cpp similarity index 99% rename from operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_1_1_apply_macro_3D.cpp rename to operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_1_1_apply_P2ElementwiseEpsilonIcosahedralShellMap_1_1_macro_3D.cpp index b8fa2f15..92ad7a47 100644 --- a/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_1_1_apply_macro_3D.cpp +++ b/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_1_1_apply_P2ElementwiseEpsilonIcosahedralShellMap_1_1_macro_3D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseEpsilonIcosahedralShellMap_1_1::apply_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const +void P2ElementwiseEpsilonIcosahedralShellMap_1_1::apply_P2ElementwiseEpsilonIcosahedralShellMap_1_1_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const { { const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983}; diff --git a/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_1_1_computeInverseDiagonalOperatorValues_macro_3D.cpp b/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_1_1_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilonIcosahedralShellMap_1_1_macro_3D.cpp similarity index 99% rename from operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_1_1_computeInverseDiagonalOperatorValues_macro_3D.cpp rename to operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_1_1_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilonIcosahedralShellMap_1_1_macro_3D.cpp index d6ed3aaa..a2803b26 100644 --- a/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_1_1_computeInverseDiagonalOperatorValues_macro_3D.cpp +++ b/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_1_1_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilonIcosahedralShellMap_1_1_macro_3D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseEpsilonIcosahedralShellMap_1_1::computeInverseDiagonalOperatorValues_macro_3D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const +void P2ElementwiseEpsilonIcosahedralShellMap_1_1::computeInverseDiagonalOperatorValues_P2ElementwiseEpsilonIcosahedralShellMap_1_1_macro_3D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const { { const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983}; diff --git a/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_1_1_toMatrix_macro_3D.cpp b/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_1_1_toMatrix_P2ElementwiseEpsilonIcosahedralShellMap_1_1_macro_3D.cpp similarity index 99% rename from operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_1_1_toMatrix_macro_3D.cpp rename to operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_1_1_toMatrix_P2ElementwiseEpsilonIcosahedralShellMap_1_1_macro_3D.cpp index 913751dd..6b80cdad 100644 --- a/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_1_1_toMatrix_macro_3D.cpp +++ b/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_1_1_toMatrix_P2ElementwiseEpsilonIcosahedralShellMap_1_1_macro_3D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseEpsilonIcosahedralShellMap_1_1::toMatrix_macro_3D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const +void P2ElementwiseEpsilonIcosahedralShellMap_1_1::toMatrix_P2ElementwiseEpsilonIcosahedralShellMap_1_1_macro_3D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const { { const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983}; diff --git a/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_1_2_apply_macro_3D.cpp b/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_1_2_apply_P2ElementwiseEpsilonIcosahedralShellMap_1_2_macro_3D.cpp similarity index 99% rename from operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_1_2_apply_macro_3D.cpp rename to operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_1_2_apply_P2ElementwiseEpsilonIcosahedralShellMap_1_2_macro_3D.cpp index 1d04f1a4..f1756dbf 100644 --- a/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_1_2_apply_macro_3D.cpp +++ b/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_1_2_apply_P2ElementwiseEpsilonIcosahedralShellMap_1_2_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseEpsilonIcosahedralShellMap_1_2::apply_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const +void P2ElementwiseEpsilonIcosahedralShellMap_1_2::apply_P2ElementwiseEpsilonIcosahedralShellMap_1_2_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const { { const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983}; diff --git a/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_1_2_toMatrix_macro_3D.cpp b/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_1_2_toMatrix_P2ElementwiseEpsilonIcosahedralShellMap_1_2_macro_3D.cpp similarity index 99% rename from operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_1_2_toMatrix_macro_3D.cpp rename to operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_1_2_toMatrix_P2ElementwiseEpsilonIcosahedralShellMap_1_2_macro_3D.cpp index 4a33acf8..d44e9560 100644 --- a/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_1_2_toMatrix_macro_3D.cpp +++ b/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_1_2_toMatrix_P2ElementwiseEpsilonIcosahedralShellMap_1_2_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseEpsilonIcosahedralShellMap_1_2::toMatrix_macro_3D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const +void P2ElementwiseEpsilonIcosahedralShellMap_1_2::toMatrix_P2ElementwiseEpsilonIcosahedralShellMap_1_2_macro_3D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const { { const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983}; diff --git a/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_2_0_apply_macro_3D.cpp b/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_2_0_apply_P2ElementwiseEpsilonIcosahedralShellMap_2_0_macro_3D.cpp similarity index 99% rename from operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_2_0_apply_macro_3D.cpp rename to operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_2_0_apply_P2ElementwiseEpsilonIcosahedralShellMap_2_0_macro_3D.cpp index 19f5036b..59e82034 100644 --- a/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_2_0_apply_macro_3D.cpp +++ b/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_2_0_apply_P2ElementwiseEpsilonIcosahedralShellMap_2_0_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseEpsilonIcosahedralShellMap_2_0::apply_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const +void P2ElementwiseEpsilonIcosahedralShellMap_2_0::apply_P2ElementwiseEpsilonIcosahedralShellMap_2_0_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const { { const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983}; diff --git a/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_2_0_toMatrix_macro_3D.cpp b/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_2_0_toMatrix_P2ElementwiseEpsilonIcosahedralShellMap_2_0_macro_3D.cpp similarity index 99% rename from operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_2_0_toMatrix_macro_3D.cpp rename to operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_2_0_toMatrix_P2ElementwiseEpsilonIcosahedralShellMap_2_0_macro_3D.cpp index 246bf4b2..4472c135 100644 --- a/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_2_0_toMatrix_macro_3D.cpp +++ b/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_2_0_toMatrix_P2ElementwiseEpsilonIcosahedralShellMap_2_0_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseEpsilonIcosahedralShellMap_2_0::toMatrix_macro_3D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const +void P2ElementwiseEpsilonIcosahedralShellMap_2_0::toMatrix_P2ElementwiseEpsilonIcosahedralShellMap_2_0_macro_3D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const { { const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983}; diff --git a/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_2_1_apply_macro_3D.cpp b/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_2_1_apply_P2ElementwiseEpsilonIcosahedralShellMap_2_1_macro_3D.cpp similarity index 99% rename from operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_2_1_apply_macro_3D.cpp rename to operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_2_1_apply_P2ElementwiseEpsilonIcosahedralShellMap_2_1_macro_3D.cpp index 4839662b..3697fa5a 100644 --- a/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_2_1_apply_macro_3D.cpp +++ b/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_2_1_apply_P2ElementwiseEpsilonIcosahedralShellMap_2_1_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseEpsilonIcosahedralShellMap_2_1::apply_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const +void P2ElementwiseEpsilonIcosahedralShellMap_2_1::apply_P2ElementwiseEpsilonIcosahedralShellMap_2_1_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const { { const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983}; diff --git a/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_2_1_toMatrix_macro_3D.cpp b/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_2_1_toMatrix_P2ElementwiseEpsilonIcosahedralShellMap_2_1_macro_3D.cpp similarity index 99% rename from operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_2_1_toMatrix_macro_3D.cpp rename to operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_2_1_toMatrix_P2ElementwiseEpsilonIcosahedralShellMap_2_1_macro_3D.cpp index ac3e7fe1..f6eaedfd 100644 --- a/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_2_1_toMatrix_macro_3D.cpp +++ b/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_2_1_toMatrix_P2ElementwiseEpsilonIcosahedralShellMap_2_1_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseEpsilonIcosahedralShellMap_2_1::toMatrix_macro_3D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const +void P2ElementwiseEpsilonIcosahedralShellMap_2_1::toMatrix_P2ElementwiseEpsilonIcosahedralShellMap_2_1_macro_3D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const { { const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983}; diff --git a/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_2_2_apply_macro_3D.cpp b/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_2_2_apply_P2ElementwiseEpsilonIcosahedralShellMap_2_2_macro_3D.cpp similarity index 99% rename from operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_2_2_apply_macro_3D.cpp rename to operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_2_2_apply_P2ElementwiseEpsilonIcosahedralShellMap_2_2_macro_3D.cpp index f35664d0..81b2f338 100644 --- a/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_2_2_apply_macro_3D.cpp +++ b/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_2_2_apply_P2ElementwiseEpsilonIcosahedralShellMap_2_2_macro_3D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseEpsilonIcosahedralShellMap_2_2::apply_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const +void P2ElementwiseEpsilonIcosahedralShellMap_2_2::apply_P2ElementwiseEpsilonIcosahedralShellMap_2_2_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const { { const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983}; diff --git a/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_2_2_computeInverseDiagonalOperatorValues_macro_3D.cpp b/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_2_2_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilonIcosahedralShellMap_2_2_macro_3D.cpp similarity index 99% rename from operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_2_2_computeInverseDiagonalOperatorValues_macro_3D.cpp rename to operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_2_2_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilonIcosahedralShellMap_2_2_macro_3D.cpp index 995e8d7d..5c8cadef 100644 --- a/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_2_2_computeInverseDiagonalOperatorValues_macro_3D.cpp +++ b/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_2_2_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilonIcosahedralShellMap_2_2_macro_3D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseEpsilonIcosahedralShellMap_2_2::computeInverseDiagonalOperatorValues_macro_3D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const +void P2ElementwiseEpsilonIcosahedralShellMap_2_2::computeInverseDiagonalOperatorValues_P2ElementwiseEpsilonIcosahedralShellMap_2_2_macro_3D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const { { const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983}; diff --git a/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_2_2_toMatrix_macro_3D.cpp b/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_2_2_toMatrix_P2ElementwiseEpsilonIcosahedralShellMap_2_2_macro_3D.cpp similarity index 99% rename from operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_2_2_toMatrix_macro_3D.cpp rename to operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_2_2_toMatrix_P2ElementwiseEpsilonIcosahedralShellMap_2_2_macro_3D.cpp index 48ccdc57..4a401e54 100644 --- a/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_2_2_toMatrix_macro_3D.cpp +++ b/operators/epsilon/noarch/P2ElementwiseEpsilonIcosahedralShellMap_2_2_toMatrix_P2ElementwiseEpsilonIcosahedralShellMap_2_2_macro_3D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseEpsilonIcosahedralShellMap_2_2::toMatrix_macro_3D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const +void P2ElementwiseEpsilonIcosahedralShellMap_2_2::toMatrix_P2ElementwiseEpsilonIcosahedralShellMap_2_2_macro_3D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const { { const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983}; diff --git a/operators/epsilon/noarch/P2ElementwiseEpsilon_0_0_apply_macro_2D.cpp b/operators/epsilon/noarch/P2ElementwiseEpsilon_0_0_apply_P2ElementwiseEpsilon_0_0_macro_2D.cpp similarity index 99% rename from operators/epsilon/noarch/P2ElementwiseEpsilon_0_0_apply_macro_2D.cpp rename to operators/epsilon/noarch/P2ElementwiseEpsilon_0_0_apply_P2ElementwiseEpsilon_0_0_macro_2D.cpp index a8a5ea94..e7f157ff 100644 --- a/operators/epsilon/noarch/P2ElementwiseEpsilon_0_0_apply_macro_2D.cpp +++ b/operators/epsilon/noarch/P2ElementwiseEpsilon_0_0_apply_P2ElementwiseEpsilon_0_0_macro_2D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseEpsilon_0_0::apply_macro_2D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseEpsilon_0_0::apply_P2ElementwiseEpsilon_0_0_macro_2D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.16666666666666666, 0.16666666666666666, 0.16666666666666666}; diff --git a/operators/epsilon/noarch/P2ElementwiseEpsilon_0_0_apply_macro_3D.cpp b/operators/epsilon/noarch/P2ElementwiseEpsilon_0_0_apply_P2ElementwiseEpsilon_0_0_macro_3D.cpp similarity index 99% rename from operators/epsilon/noarch/P2ElementwiseEpsilon_0_0_apply_macro_3D.cpp rename to operators/epsilon/noarch/P2ElementwiseEpsilon_0_0_apply_P2ElementwiseEpsilon_0_0_macro_3D.cpp index 6a6c83f8..48e29e4a 100644 --- a/operators/epsilon/noarch/P2ElementwiseEpsilon_0_0_apply_macro_3D.cpp +++ b/operators/epsilon/noarch/P2ElementwiseEpsilon_0_0_apply_P2ElementwiseEpsilon_0_0_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseEpsilon_0_0::apply_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseEpsilon_0_0::apply_P2ElementwiseEpsilon_0_0_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.041666666666666657, 0.041666666666666657, 0.041666666666666657, 0.041666666666666657}; diff --git a/operators/epsilon/noarch/P2ElementwiseEpsilon_0_0_computeInverseDiagonalOperatorValues_macro_2D.cpp b/operators/epsilon/noarch/P2ElementwiseEpsilon_0_0_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilon_0_0_macro_2D.cpp similarity index 99% rename from operators/epsilon/noarch/P2ElementwiseEpsilon_0_0_computeInverseDiagonalOperatorValues_macro_2D.cpp rename to operators/epsilon/noarch/P2ElementwiseEpsilon_0_0_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilon_0_0_macro_2D.cpp index 29960423..03bb47dd 100644 --- a/operators/epsilon/noarch/P2ElementwiseEpsilon_0_0_computeInverseDiagonalOperatorValues_macro_2D.cpp +++ b/operators/epsilon/noarch/P2ElementwiseEpsilon_0_0_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilon_0_0_macro_2D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseEpsilon_0_0::computeInverseDiagonalOperatorValues_macro_2D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseEpsilon_0_0::computeInverseDiagonalOperatorValues_P2ElementwiseEpsilon_0_0_macro_2D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.16666666666666666, 0.16666666666666666, 0.16666666666666666}; diff --git a/operators/epsilon/noarch/P2ElementwiseEpsilon_0_0_computeInverseDiagonalOperatorValues_macro_3D.cpp b/operators/epsilon/noarch/P2ElementwiseEpsilon_0_0_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilon_0_0_macro_3D.cpp similarity index 99% rename from operators/epsilon/noarch/P2ElementwiseEpsilon_0_0_computeInverseDiagonalOperatorValues_macro_3D.cpp rename to operators/epsilon/noarch/P2ElementwiseEpsilon_0_0_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilon_0_0_macro_3D.cpp index 5a6d6a11..d5c47673 100644 --- a/operators/epsilon/noarch/P2ElementwiseEpsilon_0_0_computeInverseDiagonalOperatorValues_macro_3D.cpp +++ b/operators/epsilon/noarch/P2ElementwiseEpsilon_0_0_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilon_0_0_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseEpsilon_0_0::computeInverseDiagonalOperatorValues_macro_3D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseEpsilon_0_0::computeInverseDiagonalOperatorValues_P2ElementwiseEpsilon_0_0_macro_3D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.041666666666666657, 0.041666666666666657, 0.041666666666666657, 0.041666666666666657}; diff --git a/operators/epsilon/noarch/P2ElementwiseEpsilon_0_0_toMatrix_macro_2D.cpp b/operators/epsilon/noarch/P2ElementwiseEpsilon_0_0_toMatrix_P2ElementwiseEpsilon_0_0_macro_2D.cpp similarity index 99% rename from operators/epsilon/noarch/P2ElementwiseEpsilon_0_0_toMatrix_macro_2D.cpp rename to operators/epsilon/noarch/P2ElementwiseEpsilon_0_0_toMatrix_P2ElementwiseEpsilon_0_0_macro_2D.cpp index 196b0631..f5a61bc6 100644 --- a/operators/epsilon/noarch/P2ElementwiseEpsilon_0_0_toMatrix_macro_2D.cpp +++ b/operators/epsilon/noarch/P2ElementwiseEpsilon_0_0_toMatrix_P2ElementwiseEpsilon_0_0_macro_2D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseEpsilon_0_0::toMatrix_macro_2D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseEpsilon_0_0::toMatrix_P2ElementwiseEpsilon_0_0_macro_2D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.16666666666666666, 0.16666666666666666, 0.16666666666666666}; diff --git a/operators/epsilon/noarch/P2ElementwiseEpsilon_0_0_toMatrix_macro_3D.cpp b/operators/epsilon/noarch/P2ElementwiseEpsilon_0_0_toMatrix_P2ElementwiseEpsilon_0_0_macro_3D.cpp similarity index 99% rename from operators/epsilon/noarch/P2ElementwiseEpsilon_0_0_toMatrix_macro_3D.cpp rename to operators/epsilon/noarch/P2ElementwiseEpsilon_0_0_toMatrix_P2ElementwiseEpsilon_0_0_macro_3D.cpp index 75e2a24b..e380f4ef 100644 --- a/operators/epsilon/noarch/P2ElementwiseEpsilon_0_0_toMatrix_macro_3D.cpp +++ b/operators/epsilon/noarch/P2ElementwiseEpsilon_0_0_toMatrix_P2ElementwiseEpsilon_0_0_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseEpsilon_0_0::toMatrix_macro_3D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseEpsilon_0_0::toMatrix_P2ElementwiseEpsilon_0_0_macro_3D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.041666666666666657, 0.041666666666666657, 0.041666666666666657, 0.041666666666666657}; diff --git a/operators/epsilon/noarch/P2ElementwiseEpsilon_0_1_apply_macro_2D.cpp b/operators/epsilon/noarch/P2ElementwiseEpsilon_0_1_apply_P2ElementwiseEpsilon_0_1_macro_2D.cpp similarity index 99% rename from operators/epsilon/noarch/P2ElementwiseEpsilon_0_1_apply_macro_2D.cpp rename to operators/epsilon/noarch/P2ElementwiseEpsilon_0_1_apply_P2ElementwiseEpsilon_0_1_macro_2D.cpp index b43c2532..5b6f7087 100644 --- a/operators/epsilon/noarch/P2ElementwiseEpsilon_0_1_apply_macro_2D.cpp +++ b/operators/epsilon/noarch/P2ElementwiseEpsilon_0_1_apply_P2ElementwiseEpsilon_0_1_macro_2D.cpp @@ -41,6 +41,10 @@ + + + + @@ -54,7 +58,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseEpsilon_0_1::apply_macro_2D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseEpsilon_0_1::apply_P2ElementwiseEpsilon_0_1_macro_2D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.16666666666666666, 0.16666666666666666, 0.16666666666666666}; diff --git a/operators/epsilon/noarch/P2ElementwiseEpsilon_0_1_apply_macro_3D.cpp b/operators/epsilon/noarch/P2ElementwiseEpsilon_0_1_apply_P2ElementwiseEpsilon_0_1_macro_3D.cpp similarity index 99% rename from operators/epsilon/noarch/P2ElementwiseEpsilon_0_1_apply_macro_3D.cpp rename to operators/epsilon/noarch/P2ElementwiseEpsilon_0_1_apply_P2ElementwiseEpsilon_0_1_macro_3D.cpp index 377700e3..674d6dbb 100644 --- a/operators/epsilon/noarch/P2ElementwiseEpsilon_0_1_apply_macro_3D.cpp +++ b/operators/epsilon/noarch/P2ElementwiseEpsilon_0_1_apply_P2ElementwiseEpsilon_0_1_macro_3D.cpp @@ -41,6 +41,10 @@ + + + + @@ -54,7 +58,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseEpsilon_0_1::apply_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseEpsilon_0_1::apply_P2ElementwiseEpsilon_0_1_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.041666666666666657, 0.041666666666666657, 0.041666666666666657, 0.041666666666666657}; diff --git a/operators/epsilon/noarch/P2ElementwiseEpsilon_0_1_toMatrix_macro_2D.cpp b/operators/epsilon/noarch/P2ElementwiseEpsilon_0_1_toMatrix_P2ElementwiseEpsilon_0_1_macro_2D.cpp similarity index 99% rename from operators/epsilon/noarch/P2ElementwiseEpsilon_0_1_toMatrix_macro_2D.cpp rename to operators/epsilon/noarch/P2ElementwiseEpsilon_0_1_toMatrix_P2ElementwiseEpsilon_0_1_macro_2D.cpp index f112ea9a..5c9266e1 100644 --- a/operators/epsilon/noarch/P2ElementwiseEpsilon_0_1_toMatrix_macro_2D.cpp +++ b/operators/epsilon/noarch/P2ElementwiseEpsilon_0_1_toMatrix_P2ElementwiseEpsilon_0_1_macro_2D.cpp @@ -41,6 +41,10 @@ + + + + @@ -54,7 +58,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseEpsilon_0_1::toMatrix_macro_2D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseEpsilon_0_1::toMatrix_P2ElementwiseEpsilon_0_1_macro_2D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.16666666666666666, 0.16666666666666666, 0.16666666666666666}; diff --git a/operators/epsilon/noarch/P2ElementwiseEpsilon_0_1_toMatrix_macro_3D.cpp b/operators/epsilon/noarch/P2ElementwiseEpsilon_0_1_toMatrix_P2ElementwiseEpsilon_0_1_macro_3D.cpp similarity index 99% rename from operators/epsilon/noarch/P2ElementwiseEpsilon_0_1_toMatrix_macro_3D.cpp rename to operators/epsilon/noarch/P2ElementwiseEpsilon_0_1_toMatrix_P2ElementwiseEpsilon_0_1_macro_3D.cpp index 5df17aeb..79008336 100644 --- a/operators/epsilon/noarch/P2ElementwiseEpsilon_0_1_toMatrix_macro_3D.cpp +++ b/operators/epsilon/noarch/P2ElementwiseEpsilon_0_1_toMatrix_P2ElementwiseEpsilon_0_1_macro_3D.cpp @@ -41,6 +41,10 @@ + + + + @@ -54,7 +58,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseEpsilon_0_1::toMatrix_macro_3D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseEpsilon_0_1::toMatrix_P2ElementwiseEpsilon_0_1_macro_3D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.041666666666666657, 0.041666666666666657, 0.041666666666666657, 0.041666666666666657}; diff --git a/operators/epsilon/noarch/P2ElementwiseEpsilon_0_2_apply_macro_3D.cpp b/operators/epsilon/noarch/P2ElementwiseEpsilon_0_2_apply_P2ElementwiseEpsilon_0_2_macro_3D.cpp similarity index 99% rename from operators/epsilon/noarch/P2ElementwiseEpsilon_0_2_apply_macro_3D.cpp rename to operators/epsilon/noarch/P2ElementwiseEpsilon_0_2_apply_P2ElementwiseEpsilon_0_2_macro_3D.cpp index 833d6080..178addd6 100644 --- a/operators/epsilon/noarch/P2ElementwiseEpsilon_0_2_apply_macro_3D.cpp +++ b/operators/epsilon/noarch/P2ElementwiseEpsilon_0_2_apply_P2ElementwiseEpsilon_0_2_macro_3D.cpp @@ -41,6 +41,10 @@ + + + + @@ -54,7 +58,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseEpsilon_0_2::apply_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseEpsilon_0_2::apply_P2ElementwiseEpsilon_0_2_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.041666666666666657, 0.041666666666666657, 0.041666666666666657, 0.041666666666666657}; diff --git a/operators/epsilon/noarch/P2ElementwiseEpsilon_0_2_toMatrix_macro_3D.cpp b/operators/epsilon/noarch/P2ElementwiseEpsilon_0_2_toMatrix_P2ElementwiseEpsilon_0_2_macro_3D.cpp similarity index 99% rename from operators/epsilon/noarch/P2ElementwiseEpsilon_0_2_toMatrix_macro_3D.cpp rename to operators/epsilon/noarch/P2ElementwiseEpsilon_0_2_toMatrix_P2ElementwiseEpsilon_0_2_macro_3D.cpp index c0a3819d..81d08aec 100644 --- a/operators/epsilon/noarch/P2ElementwiseEpsilon_0_2_toMatrix_macro_3D.cpp +++ b/operators/epsilon/noarch/P2ElementwiseEpsilon_0_2_toMatrix_P2ElementwiseEpsilon_0_2_macro_3D.cpp @@ -41,6 +41,10 @@ + + + + @@ -54,7 +58,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseEpsilon_0_2::toMatrix_macro_3D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseEpsilon_0_2::toMatrix_P2ElementwiseEpsilon_0_2_macro_3D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.041666666666666657, 0.041666666666666657, 0.041666666666666657, 0.041666666666666657}; diff --git a/operators/epsilon/noarch/P2ElementwiseEpsilon_1_0_apply_macro_2D.cpp b/operators/epsilon/noarch/P2ElementwiseEpsilon_1_0_apply_P2ElementwiseEpsilon_1_0_macro_2D.cpp similarity index 99% rename from operators/epsilon/noarch/P2ElementwiseEpsilon_1_0_apply_macro_2D.cpp rename to operators/epsilon/noarch/P2ElementwiseEpsilon_1_0_apply_P2ElementwiseEpsilon_1_0_macro_2D.cpp index 626c2765..9c1b8a28 100644 --- a/operators/epsilon/noarch/P2ElementwiseEpsilon_1_0_apply_macro_2D.cpp +++ b/operators/epsilon/noarch/P2ElementwiseEpsilon_1_0_apply_P2ElementwiseEpsilon_1_0_macro_2D.cpp @@ -41,6 +41,10 @@ + + + + @@ -54,7 +58,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseEpsilon_1_0::apply_macro_2D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseEpsilon_1_0::apply_P2ElementwiseEpsilon_1_0_macro_2D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.16666666666666666, 0.16666666666666666, 0.16666666666666666}; diff --git a/operators/epsilon/noarch/P2ElementwiseEpsilon_1_0_apply_macro_3D.cpp b/operators/epsilon/noarch/P2ElementwiseEpsilon_1_0_apply_P2ElementwiseEpsilon_1_0_macro_3D.cpp similarity index 99% rename from operators/epsilon/noarch/P2ElementwiseEpsilon_1_0_apply_macro_3D.cpp rename to operators/epsilon/noarch/P2ElementwiseEpsilon_1_0_apply_P2ElementwiseEpsilon_1_0_macro_3D.cpp index ce752ff6..60cf4d51 100644 --- a/operators/epsilon/noarch/P2ElementwiseEpsilon_1_0_apply_macro_3D.cpp +++ b/operators/epsilon/noarch/P2ElementwiseEpsilon_1_0_apply_P2ElementwiseEpsilon_1_0_macro_3D.cpp @@ -41,6 +41,10 @@ + + + + @@ -54,7 +58,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseEpsilon_1_0::apply_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseEpsilon_1_0::apply_P2ElementwiseEpsilon_1_0_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.041666666666666657, 0.041666666666666657, 0.041666666666666657, 0.041666666666666657}; diff --git a/operators/epsilon/noarch/P2ElementwiseEpsilon_1_0_toMatrix_macro_2D.cpp b/operators/epsilon/noarch/P2ElementwiseEpsilon_1_0_toMatrix_P2ElementwiseEpsilon_1_0_macro_2D.cpp similarity index 99% rename from operators/epsilon/noarch/P2ElementwiseEpsilon_1_0_toMatrix_macro_2D.cpp rename to operators/epsilon/noarch/P2ElementwiseEpsilon_1_0_toMatrix_P2ElementwiseEpsilon_1_0_macro_2D.cpp index e6b12c78..85f92d5e 100644 --- a/operators/epsilon/noarch/P2ElementwiseEpsilon_1_0_toMatrix_macro_2D.cpp +++ b/operators/epsilon/noarch/P2ElementwiseEpsilon_1_0_toMatrix_P2ElementwiseEpsilon_1_0_macro_2D.cpp @@ -41,6 +41,10 @@ + + + + @@ -54,7 +58,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseEpsilon_1_0::toMatrix_macro_2D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseEpsilon_1_0::toMatrix_P2ElementwiseEpsilon_1_0_macro_2D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.16666666666666666, 0.16666666666666666, 0.16666666666666666}; diff --git a/operators/epsilon/noarch/P2ElementwiseEpsilon_1_0_toMatrix_macro_3D.cpp b/operators/epsilon/noarch/P2ElementwiseEpsilon_1_0_toMatrix_P2ElementwiseEpsilon_1_0_macro_3D.cpp similarity index 99% rename from operators/epsilon/noarch/P2ElementwiseEpsilon_1_0_toMatrix_macro_3D.cpp rename to operators/epsilon/noarch/P2ElementwiseEpsilon_1_0_toMatrix_P2ElementwiseEpsilon_1_0_macro_3D.cpp index ff45b435..ec5f7bea 100644 --- a/operators/epsilon/noarch/P2ElementwiseEpsilon_1_0_toMatrix_macro_3D.cpp +++ b/operators/epsilon/noarch/P2ElementwiseEpsilon_1_0_toMatrix_P2ElementwiseEpsilon_1_0_macro_3D.cpp @@ -41,6 +41,10 @@ + + + + @@ -54,7 +58,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseEpsilon_1_0::toMatrix_macro_3D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseEpsilon_1_0::toMatrix_P2ElementwiseEpsilon_1_0_macro_3D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.041666666666666657, 0.041666666666666657, 0.041666666666666657, 0.041666666666666657}; diff --git a/operators/epsilon/noarch/P2ElementwiseEpsilon_1_1_apply_macro_2D.cpp b/operators/epsilon/noarch/P2ElementwiseEpsilon_1_1_apply_P2ElementwiseEpsilon_1_1_macro_2D.cpp similarity index 99% rename from operators/epsilon/noarch/P2ElementwiseEpsilon_1_1_apply_macro_2D.cpp rename to operators/epsilon/noarch/P2ElementwiseEpsilon_1_1_apply_P2ElementwiseEpsilon_1_1_macro_2D.cpp index eedbea32..39298ace 100644 --- a/operators/epsilon/noarch/P2ElementwiseEpsilon_1_1_apply_macro_2D.cpp +++ b/operators/epsilon/noarch/P2ElementwiseEpsilon_1_1_apply_P2ElementwiseEpsilon_1_1_macro_2D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseEpsilon_1_1::apply_macro_2D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseEpsilon_1_1::apply_P2ElementwiseEpsilon_1_1_macro_2D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.16666666666666666, 0.16666666666666666, 0.16666666666666666}; diff --git a/operators/epsilon/noarch/P2ElementwiseEpsilon_1_1_apply_macro_3D.cpp b/operators/epsilon/noarch/P2ElementwiseEpsilon_1_1_apply_P2ElementwiseEpsilon_1_1_macro_3D.cpp similarity index 99% rename from operators/epsilon/noarch/P2ElementwiseEpsilon_1_1_apply_macro_3D.cpp rename to operators/epsilon/noarch/P2ElementwiseEpsilon_1_1_apply_P2ElementwiseEpsilon_1_1_macro_3D.cpp index d449ae6a..026cd8dd 100644 --- a/operators/epsilon/noarch/P2ElementwiseEpsilon_1_1_apply_macro_3D.cpp +++ b/operators/epsilon/noarch/P2ElementwiseEpsilon_1_1_apply_P2ElementwiseEpsilon_1_1_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseEpsilon_1_1::apply_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseEpsilon_1_1::apply_P2ElementwiseEpsilon_1_1_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.041666666666666657, 0.041666666666666657, 0.041666666666666657, 0.041666666666666657}; diff --git a/operators/epsilon/noarch/P2ElementwiseEpsilon_1_1_computeInverseDiagonalOperatorValues_macro_2D.cpp b/operators/epsilon/noarch/P2ElementwiseEpsilon_1_1_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilon_1_1_macro_2D.cpp similarity index 99% rename from operators/epsilon/noarch/P2ElementwiseEpsilon_1_1_computeInverseDiagonalOperatorValues_macro_2D.cpp rename to operators/epsilon/noarch/P2ElementwiseEpsilon_1_1_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilon_1_1_macro_2D.cpp index 91d9e9ca..cea03d04 100644 --- a/operators/epsilon/noarch/P2ElementwiseEpsilon_1_1_computeInverseDiagonalOperatorValues_macro_2D.cpp +++ b/operators/epsilon/noarch/P2ElementwiseEpsilon_1_1_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilon_1_1_macro_2D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseEpsilon_1_1::computeInverseDiagonalOperatorValues_macro_2D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseEpsilon_1_1::computeInverseDiagonalOperatorValues_P2ElementwiseEpsilon_1_1_macro_2D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.16666666666666666, 0.16666666666666666, 0.16666666666666666}; diff --git a/operators/epsilon/noarch/P2ElementwiseEpsilon_1_1_computeInverseDiagonalOperatorValues_macro_3D.cpp b/operators/epsilon/noarch/P2ElementwiseEpsilon_1_1_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilon_1_1_macro_3D.cpp similarity index 99% rename from operators/epsilon/noarch/P2ElementwiseEpsilon_1_1_computeInverseDiagonalOperatorValues_macro_3D.cpp rename to operators/epsilon/noarch/P2ElementwiseEpsilon_1_1_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilon_1_1_macro_3D.cpp index 883735ab..0a111455 100644 --- a/operators/epsilon/noarch/P2ElementwiseEpsilon_1_1_computeInverseDiagonalOperatorValues_macro_3D.cpp +++ b/operators/epsilon/noarch/P2ElementwiseEpsilon_1_1_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilon_1_1_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseEpsilon_1_1::computeInverseDiagonalOperatorValues_macro_3D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseEpsilon_1_1::computeInverseDiagonalOperatorValues_P2ElementwiseEpsilon_1_1_macro_3D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.041666666666666657, 0.041666666666666657, 0.041666666666666657, 0.041666666666666657}; diff --git a/operators/epsilon/noarch/P2ElementwiseEpsilon_1_1_toMatrix_macro_2D.cpp b/operators/epsilon/noarch/P2ElementwiseEpsilon_1_1_toMatrix_P2ElementwiseEpsilon_1_1_macro_2D.cpp similarity index 99% rename from operators/epsilon/noarch/P2ElementwiseEpsilon_1_1_toMatrix_macro_2D.cpp rename to operators/epsilon/noarch/P2ElementwiseEpsilon_1_1_toMatrix_P2ElementwiseEpsilon_1_1_macro_2D.cpp index 79549f47..a58872ea 100644 --- a/operators/epsilon/noarch/P2ElementwiseEpsilon_1_1_toMatrix_macro_2D.cpp +++ b/operators/epsilon/noarch/P2ElementwiseEpsilon_1_1_toMatrix_P2ElementwiseEpsilon_1_1_macro_2D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseEpsilon_1_1::toMatrix_macro_2D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseEpsilon_1_1::toMatrix_P2ElementwiseEpsilon_1_1_macro_2D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.16666666666666666, 0.16666666666666666, 0.16666666666666666}; diff --git a/operators/epsilon/noarch/P2ElementwiseEpsilon_1_1_toMatrix_macro_3D.cpp b/operators/epsilon/noarch/P2ElementwiseEpsilon_1_1_toMatrix_P2ElementwiseEpsilon_1_1_macro_3D.cpp similarity index 99% rename from operators/epsilon/noarch/P2ElementwiseEpsilon_1_1_toMatrix_macro_3D.cpp rename to operators/epsilon/noarch/P2ElementwiseEpsilon_1_1_toMatrix_P2ElementwiseEpsilon_1_1_macro_3D.cpp index aff549f4..1b08642e 100644 --- a/operators/epsilon/noarch/P2ElementwiseEpsilon_1_1_toMatrix_macro_3D.cpp +++ b/operators/epsilon/noarch/P2ElementwiseEpsilon_1_1_toMatrix_P2ElementwiseEpsilon_1_1_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseEpsilon_1_1::toMatrix_macro_3D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseEpsilon_1_1::toMatrix_P2ElementwiseEpsilon_1_1_macro_3D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.041666666666666657, 0.041666666666666657, 0.041666666666666657, 0.041666666666666657}; diff --git a/operators/epsilon/noarch/P2ElementwiseEpsilon_1_2_apply_macro_3D.cpp b/operators/epsilon/noarch/P2ElementwiseEpsilon_1_2_apply_P2ElementwiseEpsilon_1_2_macro_3D.cpp similarity index 99% rename from operators/epsilon/noarch/P2ElementwiseEpsilon_1_2_apply_macro_3D.cpp rename to operators/epsilon/noarch/P2ElementwiseEpsilon_1_2_apply_P2ElementwiseEpsilon_1_2_macro_3D.cpp index b3b27120..49e96a45 100644 --- a/operators/epsilon/noarch/P2ElementwiseEpsilon_1_2_apply_macro_3D.cpp +++ b/operators/epsilon/noarch/P2ElementwiseEpsilon_1_2_apply_P2ElementwiseEpsilon_1_2_macro_3D.cpp @@ -41,6 +41,10 @@ + + + + @@ -54,7 +58,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseEpsilon_1_2::apply_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseEpsilon_1_2::apply_P2ElementwiseEpsilon_1_2_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.041666666666666657, 0.041666666666666657, 0.041666666666666657, 0.041666666666666657}; diff --git a/operators/epsilon/noarch/P2ElementwiseEpsilon_1_2_toMatrix_macro_3D.cpp b/operators/epsilon/noarch/P2ElementwiseEpsilon_1_2_toMatrix_P2ElementwiseEpsilon_1_2_macro_3D.cpp similarity index 99% rename from operators/epsilon/noarch/P2ElementwiseEpsilon_1_2_toMatrix_macro_3D.cpp rename to operators/epsilon/noarch/P2ElementwiseEpsilon_1_2_toMatrix_P2ElementwiseEpsilon_1_2_macro_3D.cpp index 8772aa7b..09b18c98 100644 --- a/operators/epsilon/noarch/P2ElementwiseEpsilon_1_2_toMatrix_macro_3D.cpp +++ b/operators/epsilon/noarch/P2ElementwiseEpsilon_1_2_toMatrix_P2ElementwiseEpsilon_1_2_macro_3D.cpp @@ -41,6 +41,10 @@ + + + + @@ -54,7 +58,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseEpsilon_1_2::toMatrix_macro_3D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseEpsilon_1_2::toMatrix_P2ElementwiseEpsilon_1_2_macro_3D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.041666666666666657, 0.041666666666666657, 0.041666666666666657, 0.041666666666666657}; diff --git a/operators/epsilon/noarch/P2ElementwiseEpsilon_2_0_apply_macro_3D.cpp b/operators/epsilon/noarch/P2ElementwiseEpsilon_2_0_apply_P2ElementwiseEpsilon_2_0_macro_3D.cpp similarity index 99% rename from operators/epsilon/noarch/P2ElementwiseEpsilon_2_0_apply_macro_3D.cpp rename to operators/epsilon/noarch/P2ElementwiseEpsilon_2_0_apply_P2ElementwiseEpsilon_2_0_macro_3D.cpp index b0dcd426..7f3b1f1f 100644 --- a/operators/epsilon/noarch/P2ElementwiseEpsilon_2_0_apply_macro_3D.cpp +++ b/operators/epsilon/noarch/P2ElementwiseEpsilon_2_0_apply_P2ElementwiseEpsilon_2_0_macro_3D.cpp @@ -41,6 +41,10 @@ + + + + @@ -54,7 +58,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseEpsilon_2_0::apply_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseEpsilon_2_0::apply_P2ElementwiseEpsilon_2_0_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.041666666666666657, 0.041666666666666657, 0.041666666666666657, 0.041666666666666657}; diff --git a/operators/epsilon/noarch/P2ElementwiseEpsilon_2_0_toMatrix_macro_3D.cpp b/operators/epsilon/noarch/P2ElementwiseEpsilon_2_0_toMatrix_P2ElementwiseEpsilon_2_0_macro_3D.cpp similarity index 99% rename from operators/epsilon/noarch/P2ElementwiseEpsilon_2_0_toMatrix_macro_3D.cpp rename to operators/epsilon/noarch/P2ElementwiseEpsilon_2_0_toMatrix_P2ElementwiseEpsilon_2_0_macro_3D.cpp index 97ade253..d66a4e3f 100644 --- a/operators/epsilon/noarch/P2ElementwiseEpsilon_2_0_toMatrix_macro_3D.cpp +++ b/operators/epsilon/noarch/P2ElementwiseEpsilon_2_0_toMatrix_P2ElementwiseEpsilon_2_0_macro_3D.cpp @@ -41,6 +41,10 @@ + + + + @@ -54,7 +58,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseEpsilon_2_0::toMatrix_macro_3D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseEpsilon_2_0::toMatrix_P2ElementwiseEpsilon_2_0_macro_3D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.041666666666666657, 0.041666666666666657, 0.041666666666666657, 0.041666666666666657}; diff --git a/operators/epsilon/noarch/P2ElementwiseEpsilon_2_1_apply_macro_3D.cpp b/operators/epsilon/noarch/P2ElementwiseEpsilon_2_1_apply_P2ElementwiseEpsilon_2_1_macro_3D.cpp similarity index 99% rename from operators/epsilon/noarch/P2ElementwiseEpsilon_2_1_apply_macro_3D.cpp rename to operators/epsilon/noarch/P2ElementwiseEpsilon_2_1_apply_P2ElementwiseEpsilon_2_1_macro_3D.cpp index fd8f4f22..b0e72a02 100644 --- a/operators/epsilon/noarch/P2ElementwiseEpsilon_2_1_apply_macro_3D.cpp +++ b/operators/epsilon/noarch/P2ElementwiseEpsilon_2_1_apply_P2ElementwiseEpsilon_2_1_macro_3D.cpp @@ -41,6 +41,10 @@ + + + + @@ -54,7 +58,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseEpsilon_2_1::apply_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseEpsilon_2_1::apply_P2ElementwiseEpsilon_2_1_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.041666666666666657, 0.041666666666666657, 0.041666666666666657, 0.041666666666666657}; diff --git a/operators/epsilon/noarch/P2ElementwiseEpsilon_2_1_toMatrix_macro_3D.cpp b/operators/epsilon/noarch/P2ElementwiseEpsilon_2_1_toMatrix_P2ElementwiseEpsilon_2_1_macro_3D.cpp similarity index 99% rename from operators/epsilon/noarch/P2ElementwiseEpsilon_2_1_toMatrix_macro_3D.cpp rename to operators/epsilon/noarch/P2ElementwiseEpsilon_2_1_toMatrix_P2ElementwiseEpsilon_2_1_macro_3D.cpp index f8c35e99..ece95813 100644 --- a/operators/epsilon/noarch/P2ElementwiseEpsilon_2_1_toMatrix_macro_3D.cpp +++ b/operators/epsilon/noarch/P2ElementwiseEpsilon_2_1_toMatrix_P2ElementwiseEpsilon_2_1_macro_3D.cpp @@ -41,6 +41,10 @@ + + + + @@ -54,7 +58,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseEpsilon_2_1::toMatrix_macro_3D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseEpsilon_2_1::toMatrix_P2ElementwiseEpsilon_2_1_macro_3D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.041666666666666657, 0.041666666666666657, 0.041666666666666657, 0.041666666666666657}; diff --git a/operators/epsilon/noarch/P2ElementwiseEpsilon_2_2_apply_macro_3D.cpp b/operators/epsilon/noarch/P2ElementwiseEpsilon_2_2_apply_P2ElementwiseEpsilon_2_2_macro_3D.cpp similarity index 99% rename from operators/epsilon/noarch/P2ElementwiseEpsilon_2_2_apply_macro_3D.cpp rename to operators/epsilon/noarch/P2ElementwiseEpsilon_2_2_apply_P2ElementwiseEpsilon_2_2_macro_3D.cpp index 443f2405..4058126a 100644 --- a/operators/epsilon/noarch/P2ElementwiseEpsilon_2_2_apply_macro_3D.cpp +++ b/operators/epsilon/noarch/P2ElementwiseEpsilon_2_2_apply_P2ElementwiseEpsilon_2_2_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseEpsilon_2_2::apply_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseEpsilon_2_2::apply_P2ElementwiseEpsilon_2_2_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.041666666666666657, 0.041666666666666657, 0.041666666666666657, 0.041666666666666657}; diff --git a/operators/epsilon/noarch/P2ElementwiseEpsilon_2_2_computeInverseDiagonalOperatorValues_macro_3D.cpp b/operators/epsilon/noarch/P2ElementwiseEpsilon_2_2_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilon_2_2_macro_3D.cpp similarity index 99% rename from operators/epsilon/noarch/P2ElementwiseEpsilon_2_2_computeInverseDiagonalOperatorValues_macro_3D.cpp rename to operators/epsilon/noarch/P2ElementwiseEpsilon_2_2_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilon_2_2_macro_3D.cpp index f243bfa0..74288889 100644 --- a/operators/epsilon/noarch/P2ElementwiseEpsilon_2_2_computeInverseDiagonalOperatorValues_macro_3D.cpp +++ b/operators/epsilon/noarch/P2ElementwiseEpsilon_2_2_computeInverseDiagonalOperatorValues_P2ElementwiseEpsilon_2_2_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseEpsilon_2_2::computeInverseDiagonalOperatorValues_macro_3D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseEpsilon_2_2::computeInverseDiagonalOperatorValues_P2ElementwiseEpsilon_2_2_macro_3D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.041666666666666657, 0.041666666666666657, 0.041666666666666657, 0.041666666666666657}; diff --git a/operators/epsilon/noarch/P2ElementwiseEpsilon_2_2_toMatrix_macro_3D.cpp b/operators/epsilon/noarch/P2ElementwiseEpsilon_2_2_toMatrix_P2ElementwiseEpsilon_2_2_macro_3D.cpp similarity index 99% rename from operators/epsilon/noarch/P2ElementwiseEpsilon_2_2_toMatrix_macro_3D.cpp rename to operators/epsilon/noarch/P2ElementwiseEpsilon_2_2_toMatrix_P2ElementwiseEpsilon_2_2_macro_3D.cpp index 30cc56a5..9801afb4 100644 --- a/operators/epsilon/noarch/P2ElementwiseEpsilon_2_2_toMatrix_macro_3D.cpp +++ b/operators/epsilon/noarch/P2ElementwiseEpsilon_2_2_toMatrix_P2ElementwiseEpsilon_2_2_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseEpsilon_2_2::toMatrix_macro_3D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseEpsilon_2_2::toMatrix_P2ElementwiseEpsilon_2_2_macro_3D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.041666666666666657, 0.041666666666666657, 0.041666666666666657, 0.041666666666666657}; diff --git a/operators/full_stokes/CMakeLists.txt b/operators/full_stokes/CMakeLists.txt index cd9d5f1b..4e9d365a 100644 --- a/operators/full_stokes/CMakeLists.txt +++ b/operators/full_stokes/CMakeLists.txt @@ -49,108 +49,108 @@ add_library( opgen-full_stokes if(HYTEG_BUILD_WITH_AVX AND WALBERLA_DOUBLE_ACCURACY) target_sources(opgen-full_stokes PRIVATE - avx/P2ElementwiseFullStokesAnnulusMap_0_0_apply_macro_2D.cpp - avx/P2ElementwiseFullStokesAnnulusMap_0_0_computeInverseDiagonalOperatorValues_macro_2D.cpp - avx/P2ElementwiseFullStokesAnnulusMap_0_1_apply_macro_2D.cpp - avx/P2ElementwiseFullStokesAnnulusMap_1_0_apply_macro_2D.cpp - avx/P2ElementwiseFullStokesAnnulusMap_1_1_apply_macro_2D.cpp - avx/P2ElementwiseFullStokesAnnulusMap_1_1_computeInverseDiagonalOperatorValues_macro_2D.cpp - avx/P2ElementwiseFullStokesIcosahedralShellMap_0_0_apply_macro_3D.cpp - avx/P2ElementwiseFullStokesIcosahedralShellMap_0_0_computeInverseDiagonalOperatorValues_macro_3D.cpp - avx/P2ElementwiseFullStokesIcosahedralShellMap_0_1_apply_macro_3D.cpp - avx/P2ElementwiseFullStokesIcosahedralShellMap_0_2_apply_macro_3D.cpp - avx/P2ElementwiseFullStokesIcosahedralShellMap_1_0_apply_macro_3D.cpp - avx/P2ElementwiseFullStokesIcosahedralShellMap_1_1_apply_macro_3D.cpp - avx/P2ElementwiseFullStokesIcosahedralShellMap_1_1_computeInverseDiagonalOperatorValues_macro_3D.cpp - avx/P2ElementwiseFullStokesIcosahedralShellMap_1_2_apply_macro_3D.cpp - avx/P2ElementwiseFullStokesIcosahedralShellMap_2_0_apply_macro_3D.cpp - avx/P2ElementwiseFullStokesIcosahedralShellMap_2_1_apply_macro_3D.cpp - avx/P2ElementwiseFullStokesIcosahedralShellMap_2_2_apply_macro_3D.cpp - avx/P2ElementwiseFullStokesIcosahedralShellMap_2_2_computeInverseDiagonalOperatorValues_macro_3D.cpp - avx/P2ElementwiseFullStokes_0_0_apply_macro_2D.cpp - avx/P2ElementwiseFullStokes_0_0_apply_macro_3D.cpp - avx/P2ElementwiseFullStokes_0_0_computeInverseDiagonalOperatorValues_macro_2D.cpp - avx/P2ElementwiseFullStokes_0_0_computeInverseDiagonalOperatorValues_macro_3D.cpp - avx/P2ElementwiseFullStokes_0_1_apply_macro_2D.cpp - avx/P2ElementwiseFullStokes_0_1_apply_macro_3D.cpp - avx/P2ElementwiseFullStokes_0_2_apply_macro_3D.cpp - avx/P2ElementwiseFullStokes_1_0_apply_macro_2D.cpp - avx/P2ElementwiseFullStokes_1_0_apply_macro_3D.cpp - avx/P2ElementwiseFullStokes_1_1_apply_macro_2D.cpp - avx/P2ElementwiseFullStokes_1_1_apply_macro_3D.cpp - avx/P2ElementwiseFullStokes_1_1_computeInverseDiagonalOperatorValues_macro_2D.cpp - avx/P2ElementwiseFullStokes_1_1_computeInverseDiagonalOperatorValues_macro_3D.cpp - avx/P2ElementwiseFullStokes_1_2_apply_macro_3D.cpp - avx/P2ElementwiseFullStokes_2_0_apply_macro_3D.cpp - avx/P2ElementwiseFullStokes_2_1_apply_macro_3D.cpp - avx/P2ElementwiseFullStokes_2_2_apply_macro_3D.cpp - avx/P2ElementwiseFullStokes_2_2_computeInverseDiagonalOperatorValues_macro_3D.cpp - noarch/P2ElementwiseFullStokesAnnulusMap_0_0_toMatrix_macro_2D.cpp - noarch/P2ElementwiseFullStokesAnnulusMap_0_1_toMatrix_macro_2D.cpp - noarch/P2ElementwiseFullStokesAnnulusMap_1_0_toMatrix_macro_2D.cpp - noarch/P2ElementwiseFullStokesAnnulusMap_1_1_toMatrix_macro_2D.cpp - noarch/P2ElementwiseFullStokesIcosahedralShellMap_0_0_toMatrix_macro_3D.cpp - noarch/P2ElementwiseFullStokesIcosahedralShellMap_0_1_toMatrix_macro_3D.cpp - noarch/P2ElementwiseFullStokesIcosahedralShellMap_0_2_toMatrix_macro_3D.cpp - noarch/P2ElementwiseFullStokesIcosahedralShellMap_1_0_toMatrix_macro_3D.cpp - noarch/P2ElementwiseFullStokesIcosahedralShellMap_1_1_toMatrix_macro_3D.cpp - noarch/P2ElementwiseFullStokesIcosahedralShellMap_1_2_toMatrix_macro_3D.cpp - noarch/P2ElementwiseFullStokesIcosahedralShellMap_2_0_toMatrix_macro_3D.cpp - noarch/P2ElementwiseFullStokesIcosahedralShellMap_2_1_toMatrix_macro_3D.cpp - noarch/P2ElementwiseFullStokesIcosahedralShellMap_2_2_toMatrix_macro_3D.cpp - noarch/P2ElementwiseFullStokes_0_0_toMatrix_macro_2D.cpp - noarch/P2ElementwiseFullStokes_0_0_toMatrix_macro_3D.cpp - noarch/P2ElementwiseFullStokes_0_1_toMatrix_macro_2D.cpp - noarch/P2ElementwiseFullStokes_0_1_toMatrix_macro_3D.cpp - noarch/P2ElementwiseFullStokes_0_2_toMatrix_macro_3D.cpp - noarch/P2ElementwiseFullStokes_1_0_toMatrix_macro_2D.cpp - noarch/P2ElementwiseFullStokes_1_0_toMatrix_macro_3D.cpp - noarch/P2ElementwiseFullStokes_1_1_toMatrix_macro_2D.cpp - noarch/P2ElementwiseFullStokes_1_1_toMatrix_macro_3D.cpp - noarch/P2ElementwiseFullStokes_1_2_toMatrix_macro_3D.cpp - noarch/P2ElementwiseFullStokes_2_0_toMatrix_macro_3D.cpp - noarch/P2ElementwiseFullStokes_2_1_toMatrix_macro_3D.cpp - noarch/P2ElementwiseFullStokes_2_2_toMatrix_macro_3D.cpp + avx/P2ElementwiseFullStokesAnnulusMap_0_0_apply_P2ElementwiseFullStokesAnnulusMap_0_0_macro_2D.cpp + avx/P2ElementwiseFullStokesAnnulusMap_0_0_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokesAnnulusMap_0_0_macro_2D.cpp + avx/P2ElementwiseFullStokesAnnulusMap_0_1_apply_P2ElementwiseFullStokesAnnulusMap_0_1_macro_2D.cpp + avx/P2ElementwiseFullStokesAnnulusMap_1_0_apply_P2ElementwiseFullStokesAnnulusMap_1_0_macro_2D.cpp + avx/P2ElementwiseFullStokesAnnulusMap_1_1_apply_P2ElementwiseFullStokesAnnulusMap_1_1_macro_2D.cpp + avx/P2ElementwiseFullStokesAnnulusMap_1_1_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokesAnnulusMap_1_1_macro_2D.cpp + avx/P2ElementwiseFullStokesIcosahedralShellMap_0_0_apply_P2ElementwiseFullStokesIcosahedralShellMap_0_0_macro_3D.cpp + avx/P2ElementwiseFullStokesIcosahedralShellMap_0_0_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokesIcosahedralShellMap_0_0_macro_3D.cpp + avx/P2ElementwiseFullStokesIcosahedralShellMap_0_1_apply_P2ElementwiseFullStokesIcosahedralShellMap_0_1_macro_3D.cpp + avx/P2ElementwiseFullStokesIcosahedralShellMap_0_2_apply_P2ElementwiseFullStokesIcosahedralShellMap_0_2_macro_3D.cpp + avx/P2ElementwiseFullStokesIcosahedralShellMap_1_0_apply_P2ElementwiseFullStokesIcosahedralShellMap_1_0_macro_3D.cpp + avx/P2ElementwiseFullStokesIcosahedralShellMap_1_1_apply_P2ElementwiseFullStokesIcosahedralShellMap_1_1_macro_3D.cpp + avx/P2ElementwiseFullStokesIcosahedralShellMap_1_1_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokesIcosahedralShellMap_1_1_macro_3D.cpp + avx/P2ElementwiseFullStokesIcosahedralShellMap_1_2_apply_P2ElementwiseFullStokesIcosahedralShellMap_1_2_macro_3D.cpp + avx/P2ElementwiseFullStokesIcosahedralShellMap_2_0_apply_P2ElementwiseFullStokesIcosahedralShellMap_2_0_macro_3D.cpp + avx/P2ElementwiseFullStokesIcosahedralShellMap_2_1_apply_P2ElementwiseFullStokesIcosahedralShellMap_2_1_macro_3D.cpp + avx/P2ElementwiseFullStokesIcosahedralShellMap_2_2_apply_P2ElementwiseFullStokesIcosahedralShellMap_2_2_macro_3D.cpp + avx/P2ElementwiseFullStokesIcosahedralShellMap_2_2_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokesIcosahedralShellMap_2_2_macro_3D.cpp + avx/P2ElementwiseFullStokes_0_0_apply_P2ElementwiseFullStokes_0_0_macro_2D.cpp + avx/P2ElementwiseFullStokes_0_0_apply_P2ElementwiseFullStokes_0_0_macro_3D.cpp + avx/P2ElementwiseFullStokes_0_0_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokes_0_0_macro_2D.cpp + avx/P2ElementwiseFullStokes_0_0_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokes_0_0_macro_3D.cpp + avx/P2ElementwiseFullStokes_0_1_apply_P2ElementwiseFullStokes_0_1_macro_2D.cpp + avx/P2ElementwiseFullStokes_0_1_apply_P2ElementwiseFullStokes_0_1_macro_3D.cpp + avx/P2ElementwiseFullStokes_0_2_apply_P2ElementwiseFullStokes_0_2_macro_3D.cpp + avx/P2ElementwiseFullStokes_1_0_apply_P2ElementwiseFullStokes_1_0_macro_2D.cpp + avx/P2ElementwiseFullStokes_1_0_apply_P2ElementwiseFullStokes_1_0_macro_3D.cpp + avx/P2ElementwiseFullStokes_1_1_apply_P2ElementwiseFullStokes_1_1_macro_2D.cpp + avx/P2ElementwiseFullStokes_1_1_apply_P2ElementwiseFullStokes_1_1_macro_3D.cpp + avx/P2ElementwiseFullStokes_1_1_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokes_1_1_macro_2D.cpp + avx/P2ElementwiseFullStokes_1_1_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokes_1_1_macro_3D.cpp + avx/P2ElementwiseFullStokes_1_2_apply_P2ElementwiseFullStokes_1_2_macro_3D.cpp + avx/P2ElementwiseFullStokes_2_0_apply_P2ElementwiseFullStokes_2_0_macro_3D.cpp + avx/P2ElementwiseFullStokes_2_1_apply_P2ElementwiseFullStokes_2_1_macro_3D.cpp + avx/P2ElementwiseFullStokes_2_2_apply_P2ElementwiseFullStokes_2_2_macro_3D.cpp + avx/P2ElementwiseFullStokes_2_2_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokes_2_2_macro_3D.cpp + noarch/P2ElementwiseFullStokesAnnulusMap_0_0_toMatrix_P2ElementwiseFullStokesAnnulusMap_0_0_macro_2D.cpp + noarch/P2ElementwiseFullStokesAnnulusMap_0_1_toMatrix_P2ElementwiseFullStokesAnnulusMap_0_1_macro_2D.cpp + noarch/P2ElementwiseFullStokesAnnulusMap_1_0_toMatrix_P2ElementwiseFullStokesAnnulusMap_1_0_macro_2D.cpp + noarch/P2ElementwiseFullStokesAnnulusMap_1_1_toMatrix_P2ElementwiseFullStokesAnnulusMap_1_1_macro_2D.cpp + noarch/P2ElementwiseFullStokesIcosahedralShellMap_0_0_toMatrix_P2ElementwiseFullStokesIcosahedralShellMap_0_0_macro_3D.cpp + noarch/P2ElementwiseFullStokesIcosahedralShellMap_0_1_toMatrix_P2ElementwiseFullStokesIcosahedralShellMap_0_1_macro_3D.cpp + noarch/P2ElementwiseFullStokesIcosahedralShellMap_0_2_toMatrix_P2ElementwiseFullStokesIcosahedralShellMap_0_2_macro_3D.cpp + noarch/P2ElementwiseFullStokesIcosahedralShellMap_1_0_toMatrix_P2ElementwiseFullStokesIcosahedralShellMap_1_0_macro_3D.cpp + noarch/P2ElementwiseFullStokesIcosahedralShellMap_1_1_toMatrix_P2ElementwiseFullStokesIcosahedralShellMap_1_1_macro_3D.cpp + noarch/P2ElementwiseFullStokesIcosahedralShellMap_1_2_toMatrix_P2ElementwiseFullStokesIcosahedralShellMap_1_2_macro_3D.cpp + noarch/P2ElementwiseFullStokesIcosahedralShellMap_2_0_toMatrix_P2ElementwiseFullStokesIcosahedralShellMap_2_0_macro_3D.cpp + noarch/P2ElementwiseFullStokesIcosahedralShellMap_2_1_toMatrix_P2ElementwiseFullStokesIcosahedralShellMap_2_1_macro_3D.cpp + noarch/P2ElementwiseFullStokesIcosahedralShellMap_2_2_toMatrix_P2ElementwiseFullStokesIcosahedralShellMap_2_2_macro_3D.cpp + noarch/P2ElementwiseFullStokes_0_0_toMatrix_P2ElementwiseFullStokes_0_0_macro_2D.cpp + noarch/P2ElementwiseFullStokes_0_0_toMatrix_P2ElementwiseFullStokes_0_0_macro_3D.cpp + noarch/P2ElementwiseFullStokes_0_1_toMatrix_P2ElementwiseFullStokes_0_1_macro_2D.cpp + noarch/P2ElementwiseFullStokes_0_1_toMatrix_P2ElementwiseFullStokes_0_1_macro_3D.cpp + noarch/P2ElementwiseFullStokes_0_2_toMatrix_P2ElementwiseFullStokes_0_2_macro_3D.cpp + noarch/P2ElementwiseFullStokes_1_0_toMatrix_P2ElementwiseFullStokes_1_0_macro_2D.cpp + noarch/P2ElementwiseFullStokes_1_0_toMatrix_P2ElementwiseFullStokes_1_0_macro_3D.cpp + noarch/P2ElementwiseFullStokes_1_1_toMatrix_P2ElementwiseFullStokes_1_1_macro_2D.cpp + noarch/P2ElementwiseFullStokes_1_1_toMatrix_P2ElementwiseFullStokes_1_1_macro_3D.cpp + noarch/P2ElementwiseFullStokes_1_2_toMatrix_P2ElementwiseFullStokes_1_2_macro_3D.cpp + noarch/P2ElementwiseFullStokes_2_0_toMatrix_P2ElementwiseFullStokes_2_0_macro_3D.cpp + noarch/P2ElementwiseFullStokes_2_1_toMatrix_P2ElementwiseFullStokes_2_1_macro_3D.cpp + noarch/P2ElementwiseFullStokes_2_2_toMatrix_P2ElementwiseFullStokes_2_2_macro_3D.cpp ) set_source_files_properties( - avx/P2ElementwiseFullStokesAnnulusMap_0_0_apply_macro_2D.cpp - avx/P2ElementwiseFullStokesAnnulusMap_0_0_computeInverseDiagonalOperatorValues_macro_2D.cpp - avx/P2ElementwiseFullStokesAnnulusMap_0_1_apply_macro_2D.cpp - avx/P2ElementwiseFullStokesAnnulusMap_1_0_apply_macro_2D.cpp - avx/P2ElementwiseFullStokesAnnulusMap_1_1_apply_macro_2D.cpp - avx/P2ElementwiseFullStokesAnnulusMap_1_1_computeInverseDiagonalOperatorValues_macro_2D.cpp - avx/P2ElementwiseFullStokesIcosahedralShellMap_0_0_apply_macro_3D.cpp - avx/P2ElementwiseFullStokesIcosahedralShellMap_0_0_computeInverseDiagonalOperatorValues_macro_3D.cpp - avx/P2ElementwiseFullStokesIcosahedralShellMap_0_1_apply_macro_3D.cpp - avx/P2ElementwiseFullStokesIcosahedralShellMap_0_2_apply_macro_3D.cpp - avx/P2ElementwiseFullStokesIcosahedralShellMap_1_0_apply_macro_3D.cpp - avx/P2ElementwiseFullStokesIcosahedralShellMap_1_1_apply_macro_3D.cpp - avx/P2ElementwiseFullStokesIcosahedralShellMap_1_1_computeInverseDiagonalOperatorValues_macro_3D.cpp - avx/P2ElementwiseFullStokesIcosahedralShellMap_1_2_apply_macro_3D.cpp - avx/P2ElementwiseFullStokesIcosahedralShellMap_2_0_apply_macro_3D.cpp - avx/P2ElementwiseFullStokesIcosahedralShellMap_2_1_apply_macro_3D.cpp - avx/P2ElementwiseFullStokesIcosahedralShellMap_2_2_apply_macro_3D.cpp - avx/P2ElementwiseFullStokesIcosahedralShellMap_2_2_computeInverseDiagonalOperatorValues_macro_3D.cpp - avx/P2ElementwiseFullStokes_0_0_apply_macro_2D.cpp - avx/P2ElementwiseFullStokes_0_0_apply_macro_3D.cpp - avx/P2ElementwiseFullStokes_0_0_computeInverseDiagonalOperatorValues_macro_2D.cpp - avx/P2ElementwiseFullStokes_0_0_computeInverseDiagonalOperatorValues_macro_3D.cpp - avx/P2ElementwiseFullStokes_0_1_apply_macro_2D.cpp - avx/P2ElementwiseFullStokes_0_1_apply_macro_3D.cpp - avx/P2ElementwiseFullStokes_0_2_apply_macro_3D.cpp - avx/P2ElementwiseFullStokes_1_0_apply_macro_2D.cpp - avx/P2ElementwiseFullStokes_1_0_apply_macro_3D.cpp - avx/P2ElementwiseFullStokes_1_1_apply_macro_2D.cpp - avx/P2ElementwiseFullStokes_1_1_apply_macro_3D.cpp - avx/P2ElementwiseFullStokes_1_1_computeInverseDiagonalOperatorValues_macro_2D.cpp - avx/P2ElementwiseFullStokes_1_1_computeInverseDiagonalOperatorValues_macro_3D.cpp - avx/P2ElementwiseFullStokes_1_2_apply_macro_3D.cpp - avx/P2ElementwiseFullStokes_2_0_apply_macro_3D.cpp - avx/P2ElementwiseFullStokes_2_1_apply_macro_3D.cpp - avx/P2ElementwiseFullStokes_2_2_apply_macro_3D.cpp - avx/P2ElementwiseFullStokes_2_2_computeInverseDiagonalOperatorValues_macro_3D.cpp + avx/P2ElementwiseFullStokesAnnulusMap_0_0_apply_P2ElementwiseFullStokesAnnulusMap_0_0_macro_2D.cpp + avx/P2ElementwiseFullStokesAnnulusMap_0_0_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokesAnnulusMap_0_0_macro_2D.cpp + avx/P2ElementwiseFullStokesAnnulusMap_0_1_apply_P2ElementwiseFullStokesAnnulusMap_0_1_macro_2D.cpp + avx/P2ElementwiseFullStokesAnnulusMap_1_0_apply_P2ElementwiseFullStokesAnnulusMap_1_0_macro_2D.cpp + avx/P2ElementwiseFullStokesAnnulusMap_1_1_apply_P2ElementwiseFullStokesAnnulusMap_1_1_macro_2D.cpp + avx/P2ElementwiseFullStokesAnnulusMap_1_1_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokesAnnulusMap_1_1_macro_2D.cpp + avx/P2ElementwiseFullStokesIcosahedralShellMap_0_0_apply_P2ElementwiseFullStokesIcosahedralShellMap_0_0_macro_3D.cpp + avx/P2ElementwiseFullStokesIcosahedralShellMap_0_0_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokesIcosahedralShellMap_0_0_macro_3D.cpp + avx/P2ElementwiseFullStokesIcosahedralShellMap_0_1_apply_P2ElementwiseFullStokesIcosahedralShellMap_0_1_macro_3D.cpp + avx/P2ElementwiseFullStokesIcosahedralShellMap_0_2_apply_P2ElementwiseFullStokesIcosahedralShellMap_0_2_macro_3D.cpp + avx/P2ElementwiseFullStokesIcosahedralShellMap_1_0_apply_P2ElementwiseFullStokesIcosahedralShellMap_1_0_macro_3D.cpp + avx/P2ElementwiseFullStokesIcosahedralShellMap_1_1_apply_P2ElementwiseFullStokesIcosahedralShellMap_1_1_macro_3D.cpp + avx/P2ElementwiseFullStokesIcosahedralShellMap_1_1_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokesIcosahedralShellMap_1_1_macro_3D.cpp + avx/P2ElementwiseFullStokesIcosahedralShellMap_1_2_apply_P2ElementwiseFullStokesIcosahedralShellMap_1_2_macro_3D.cpp + avx/P2ElementwiseFullStokesIcosahedralShellMap_2_0_apply_P2ElementwiseFullStokesIcosahedralShellMap_2_0_macro_3D.cpp + avx/P2ElementwiseFullStokesIcosahedralShellMap_2_1_apply_P2ElementwiseFullStokesIcosahedralShellMap_2_1_macro_3D.cpp + avx/P2ElementwiseFullStokesIcosahedralShellMap_2_2_apply_P2ElementwiseFullStokesIcosahedralShellMap_2_2_macro_3D.cpp + avx/P2ElementwiseFullStokesIcosahedralShellMap_2_2_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokesIcosahedralShellMap_2_2_macro_3D.cpp + avx/P2ElementwiseFullStokes_0_0_apply_P2ElementwiseFullStokes_0_0_macro_2D.cpp + avx/P2ElementwiseFullStokes_0_0_apply_P2ElementwiseFullStokes_0_0_macro_3D.cpp + avx/P2ElementwiseFullStokes_0_0_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokes_0_0_macro_2D.cpp + avx/P2ElementwiseFullStokes_0_0_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokes_0_0_macro_3D.cpp + avx/P2ElementwiseFullStokes_0_1_apply_P2ElementwiseFullStokes_0_1_macro_2D.cpp + avx/P2ElementwiseFullStokes_0_1_apply_P2ElementwiseFullStokes_0_1_macro_3D.cpp + avx/P2ElementwiseFullStokes_0_2_apply_P2ElementwiseFullStokes_0_2_macro_3D.cpp + avx/P2ElementwiseFullStokes_1_0_apply_P2ElementwiseFullStokes_1_0_macro_2D.cpp + avx/P2ElementwiseFullStokes_1_0_apply_P2ElementwiseFullStokes_1_0_macro_3D.cpp + avx/P2ElementwiseFullStokes_1_1_apply_P2ElementwiseFullStokes_1_1_macro_2D.cpp + avx/P2ElementwiseFullStokes_1_1_apply_P2ElementwiseFullStokes_1_1_macro_3D.cpp + avx/P2ElementwiseFullStokes_1_1_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokes_1_1_macro_2D.cpp + avx/P2ElementwiseFullStokes_1_1_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokes_1_1_macro_3D.cpp + avx/P2ElementwiseFullStokes_1_2_apply_P2ElementwiseFullStokes_1_2_macro_3D.cpp + avx/P2ElementwiseFullStokes_2_0_apply_P2ElementwiseFullStokes_2_0_macro_3D.cpp + avx/P2ElementwiseFullStokes_2_1_apply_P2ElementwiseFullStokes_2_1_macro_3D.cpp + avx/P2ElementwiseFullStokes_2_2_apply_P2ElementwiseFullStokes_2_2_macro_3D.cpp + avx/P2ElementwiseFullStokes_2_2_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokes_2_2_macro_3D.cpp PROPERTIES COMPILE_OPTIONS ${HYTEG_COMPILER_NATIVE_FLAGS} ) @@ -161,68 +161,68 @@ else() target_sources(opgen-full_stokes PRIVATE - noarch/P2ElementwiseFullStokesAnnulusMap_0_0_apply_macro_2D.cpp - noarch/P2ElementwiseFullStokesAnnulusMap_0_0_computeInverseDiagonalOperatorValues_macro_2D.cpp - noarch/P2ElementwiseFullStokesAnnulusMap_0_0_toMatrix_macro_2D.cpp - noarch/P2ElementwiseFullStokesAnnulusMap_0_1_apply_macro_2D.cpp - noarch/P2ElementwiseFullStokesAnnulusMap_0_1_toMatrix_macro_2D.cpp - noarch/P2ElementwiseFullStokesAnnulusMap_1_0_apply_macro_2D.cpp - noarch/P2ElementwiseFullStokesAnnulusMap_1_0_toMatrix_macro_2D.cpp - noarch/P2ElementwiseFullStokesAnnulusMap_1_1_apply_macro_2D.cpp - noarch/P2ElementwiseFullStokesAnnulusMap_1_1_computeInverseDiagonalOperatorValues_macro_2D.cpp - noarch/P2ElementwiseFullStokesAnnulusMap_1_1_toMatrix_macro_2D.cpp - noarch/P2ElementwiseFullStokesIcosahedralShellMap_0_0_apply_macro_3D.cpp - noarch/P2ElementwiseFullStokesIcosahedralShellMap_0_0_computeInverseDiagonalOperatorValues_macro_3D.cpp - noarch/P2ElementwiseFullStokesIcosahedralShellMap_0_0_toMatrix_macro_3D.cpp - noarch/P2ElementwiseFullStokesIcosahedralShellMap_0_1_apply_macro_3D.cpp - noarch/P2ElementwiseFullStokesIcosahedralShellMap_0_1_toMatrix_macro_3D.cpp - noarch/P2ElementwiseFullStokesIcosahedralShellMap_0_2_apply_macro_3D.cpp - noarch/P2ElementwiseFullStokesIcosahedralShellMap_0_2_toMatrix_macro_3D.cpp - noarch/P2ElementwiseFullStokesIcosahedralShellMap_1_0_apply_macro_3D.cpp - noarch/P2ElementwiseFullStokesIcosahedralShellMap_1_0_toMatrix_macro_3D.cpp - noarch/P2ElementwiseFullStokesIcosahedralShellMap_1_1_apply_macro_3D.cpp - noarch/P2ElementwiseFullStokesIcosahedralShellMap_1_1_computeInverseDiagonalOperatorValues_macro_3D.cpp - noarch/P2ElementwiseFullStokesIcosahedralShellMap_1_1_toMatrix_macro_3D.cpp - noarch/P2ElementwiseFullStokesIcosahedralShellMap_1_2_apply_macro_3D.cpp - noarch/P2ElementwiseFullStokesIcosahedralShellMap_1_2_toMatrix_macro_3D.cpp - noarch/P2ElementwiseFullStokesIcosahedralShellMap_2_0_apply_macro_3D.cpp - noarch/P2ElementwiseFullStokesIcosahedralShellMap_2_0_toMatrix_macro_3D.cpp - noarch/P2ElementwiseFullStokesIcosahedralShellMap_2_1_apply_macro_3D.cpp - noarch/P2ElementwiseFullStokesIcosahedralShellMap_2_1_toMatrix_macro_3D.cpp - noarch/P2ElementwiseFullStokesIcosahedralShellMap_2_2_apply_macro_3D.cpp - noarch/P2ElementwiseFullStokesIcosahedralShellMap_2_2_computeInverseDiagonalOperatorValues_macro_3D.cpp - noarch/P2ElementwiseFullStokesIcosahedralShellMap_2_2_toMatrix_macro_3D.cpp - noarch/P2ElementwiseFullStokes_0_0_apply_macro_2D.cpp - noarch/P2ElementwiseFullStokes_0_0_apply_macro_3D.cpp - noarch/P2ElementwiseFullStokes_0_0_computeInverseDiagonalOperatorValues_macro_2D.cpp - noarch/P2ElementwiseFullStokes_0_0_computeInverseDiagonalOperatorValues_macro_3D.cpp - noarch/P2ElementwiseFullStokes_0_0_toMatrix_macro_2D.cpp - noarch/P2ElementwiseFullStokes_0_0_toMatrix_macro_3D.cpp - noarch/P2ElementwiseFullStokes_0_1_apply_macro_2D.cpp - noarch/P2ElementwiseFullStokes_0_1_apply_macro_3D.cpp - noarch/P2ElementwiseFullStokes_0_1_toMatrix_macro_2D.cpp - noarch/P2ElementwiseFullStokes_0_1_toMatrix_macro_3D.cpp - noarch/P2ElementwiseFullStokes_0_2_apply_macro_3D.cpp - noarch/P2ElementwiseFullStokes_0_2_toMatrix_macro_3D.cpp - noarch/P2ElementwiseFullStokes_1_0_apply_macro_2D.cpp - noarch/P2ElementwiseFullStokes_1_0_apply_macro_3D.cpp - noarch/P2ElementwiseFullStokes_1_0_toMatrix_macro_2D.cpp - noarch/P2ElementwiseFullStokes_1_0_toMatrix_macro_3D.cpp - noarch/P2ElementwiseFullStokes_1_1_apply_macro_2D.cpp - noarch/P2ElementwiseFullStokes_1_1_apply_macro_3D.cpp - noarch/P2ElementwiseFullStokes_1_1_computeInverseDiagonalOperatorValues_macro_2D.cpp - noarch/P2ElementwiseFullStokes_1_1_computeInverseDiagonalOperatorValues_macro_3D.cpp - noarch/P2ElementwiseFullStokes_1_1_toMatrix_macro_2D.cpp - noarch/P2ElementwiseFullStokes_1_1_toMatrix_macro_3D.cpp - noarch/P2ElementwiseFullStokes_1_2_apply_macro_3D.cpp - noarch/P2ElementwiseFullStokes_1_2_toMatrix_macro_3D.cpp - noarch/P2ElementwiseFullStokes_2_0_apply_macro_3D.cpp - noarch/P2ElementwiseFullStokes_2_0_toMatrix_macro_3D.cpp - noarch/P2ElementwiseFullStokes_2_1_apply_macro_3D.cpp - noarch/P2ElementwiseFullStokes_2_1_toMatrix_macro_3D.cpp - noarch/P2ElementwiseFullStokes_2_2_apply_macro_3D.cpp - noarch/P2ElementwiseFullStokes_2_2_computeInverseDiagonalOperatorValues_macro_3D.cpp - noarch/P2ElementwiseFullStokes_2_2_toMatrix_macro_3D.cpp + noarch/P2ElementwiseFullStokesAnnulusMap_0_0_apply_P2ElementwiseFullStokesAnnulusMap_0_0_macro_2D.cpp + noarch/P2ElementwiseFullStokesAnnulusMap_0_0_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokesAnnulusMap_0_0_macro_2D.cpp + noarch/P2ElementwiseFullStokesAnnulusMap_0_0_toMatrix_P2ElementwiseFullStokesAnnulusMap_0_0_macro_2D.cpp + noarch/P2ElementwiseFullStokesAnnulusMap_0_1_apply_P2ElementwiseFullStokesAnnulusMap_0_1_macro_2D.cpp + noarch/P2ElementwiseFullStokesAnnulusMap_0_1_toMatrix_P2ElementwiseFullStokesAnnulusMap_0_1_macro_2D.cpp + noarch/P2ElementwiseFullStokesAnnulusMap_1_0_apply_P2ElementwiseFullStokesAnnulusMap_1_0_macro_2D.cpp + noarch/P2ElementwiseFullStokesAnnulusMap_1_0_toMatrix_P2ElementwiseFullStokesAnnulusMap_1_0_macro_2D.cpp + noarch/P2ElementwiseFullStokesAnnulusMap_1_1_apply_P2ElementwiseFullStokesAnnulusMap_1_1_macro_2D.cpp + noarch/P2ElementwiseFullStokesAnnulusMap_1_1_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokesAnnulusMap_1_1_macro_2D.cpp + noarch/P2ElementwiseFullStokesAnnulusMap_1_1_toMatrix_P2ElementwiseFullStokesAnnulusMap_1_1_macro_2D.cpp + noarch/P2ElementwiseFullStokesIcosahedralShellMap_0_0_apply_P2ElementwiseFullStokesIcosahedralShellMap_0_0_macro_3D.cpp + noarch/P2ElementwiseFullStokesIcosahedralShellMap_0_0_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokesIcosahedralShellMap_0_0_macro_3D.cpp + noarch/P2ElementwiseFullStokesIcosahedralShellMap_0_0_toMatrix_P2ElementwiseFullStokesIcosahedralShellMap_0_0_macro_3D.cpp + noarch/P2ElementwiseFullStokesIcosahedralShellMap_0_1_apply_P2ElementwiseFullStokesIcosahedralShellMap_0_1_macro_3D.cpp + noarch/P2ElementwiseFullStokesIcosahedralShellMap_0_1_toMatrix_P2ElementwiseFullStokesIcosahedralShellMap_0_1_macro_3D.cpp + noarch/P2ElementwiseFullStokesIcosahedralShellMap_0_2_apply_P2ElementwiseFullStokesIcosahedralShellMap_0_2_macro_3D.cpp + noarch/P2ElementwiseFullStokesIcosahedralShellMap_0_2_toMatrix_P2ElementwiseFullStokesIcosahedralShellMap_0_2_macro_3D.cpp + noarch/P2ElementwiseFullStokesIcosahedralShellMap_1_0_apply_P2ElementwiseFullStokesIcosahedralShellMap_1_0_macro_3D.cpp + noarch/P2ElementwiseFullStokesIcosahedralShellMap_1_0_toMatrix_P2ElementwiseFullStokesIcosahedralShellMap_1_0_macro_3D.cpp + noarch/P2ElementwiseFullStokesIcosahedralShellMap_1_1_apply_P2ElementwiseFullStokesIcosahedralShellMap_1_1_macro_3D.cpp + noarch/P2ElementwiseFullStokesIcosahedralShellMap_1_1_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokesIcosahedralShellMap_1_1_macro_3D.cpp + noarch/P2ElementwiseFullStokesIcosahedralShellMap_1_1_toMatrix_P2ElementwiseFullStokesIcosahedralShellMap_1_1_macro_3D.cpp + noarch/P2ElementwiseFullStokesIcosahedralShellMap_1_2_apply_P2ElementwiseFullStokesIcosahedralShellMap_1_2_macro_3D.cpp + noarch/P2ElementwiseFullStokesIcosahedralShellMap_1_2_toMatrix_P2ElementwiseFullStokesIcosahedralShellMap_1_2_macro_3D.cpp + noarch/P2ElementwiseFullStokesIcosahedralShellMap_2_0_apply_P2ElementwiseFullStokesIcosahedralShellMap_2_0_macro_3D.cpp + noarch/P2ElementwiseFullStokesIcosahedralShellMap_2_0_toMatrix_P2ElementwiseFullStokesIcosahedralShellMap_2_0_macro_3D.cpp + noarch/P2ElementwiseFullStokesIcosahedralShellMap_2_1_apply_P2ElementwiseFullStokesIcosahedralShellMap_2_1_macro_3D.cpp + noarch/P2ElementwiseFullStokesIcosahedralShellMap_2_1_toMatrix_P2ElementwiseFullStokesIcosahedralShellMap_2_1_macro_3D.cpp + noarch/P2ElementwiseFullStokesIcosahedralShellMap_2_2_apply_P2ElementwiseFullStokesIcosahedralShellMap_2_2_macro_3D.cpp + noarch/P2ElementwiseFullStokesIcosahedralShellMap_2_2_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokesIcosahedralShellMap_2_2_macro_3D.cpp + noarch/P2ElementwiseFullStokesIcosahedralShellMap_2_2_toMatrix_P2ElementwiseFullStokesIcosahedralShellMap_2_2_macro_3D.cpp + noarch/P2ElementwiseFullStokes_0_0_apply_P2ElementwiseFullStokes_0_0_macro_2D.cpp + noarch/P2ElementwiseFullStokes_0_0_apply_P2ElementwiseFullStokes_0_0_macro_3D.cpp + noarch/P2ElementwiseFullStokes_0_0_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokes_0_0_macro_2D.cpp + noarch/P2ElementwiseFullStokes_0_0_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokes_0_0_macro_3D.cpp + noarch/P2ElementwiseFullStokes_0_0_toMatrix_P2ElementwiseFullStokes_0_0_macro_2D.cpp + noarch/P2ElementwiseFullStokes_0_0_toMatrix_P2ElementwiseFullStokes_0_0_macro_3D.cpp + noarch/P2ElementwiseFullStokes_0_1_apply_P2ElementwiseFullStokes_0_1_macro_2D.cpp + noarch/P2ElementwiseFullStokes_0_1_apply_P2ElementwiseFullStokes_0_1_macro_3D.cpp + noarch/P2ElementwiseFullStokes_0_1_toMatrix_P2ElementwiseFullStokes_0_1_macro_2D.cpp + noarch/P2ElementwiseFullStokes_0_1_toMatrix_P2ElementwiseFullStokes_0_1_macro_3D.cpp + noarch/P2ElementwiseFullStokes_0_2_apply_P2ElementwiseFullStokes_0_2_macro_3D.cpp + noarch/P2ElementwiseFullStokes_0_2_toMatrix_P2ElementwiseFullStokes_0_2_macro_3D.cpp + noarch/P2ElementwiseFullStokes_1_0_apply_P2ElementwiseFullStokes_1_0_macro_2D.cpp + noarch/P2ElementwiseFullStokes_1_0_apply_P2ElementwiseFullStokes_1_0_macro_3D.cpp + noarch/P2ElementwiseFullStokes_1_0_toMatrix_P2ElementwiseFullStokes_1_0_macro_2D.cpp + noarch/P2ElementwiseFullStokes_1_0_toMatrix_P2ElementwiseFullStokes_1_0_macro_3D.cpp + noarch/P2ElementwiseFullStokes_1_1_apply_P2ElementwiseFullStokes_1_1_macro_2D.cpp + noarch/P2ElementwiseFullStokes_1_1_apply_P2ElementwiseFullStokes_1_1_macro_3D.cpp + noarch/P2ElementwiseFullStokes_1_1_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokes_1_1_macro_2D.cpp + noarch/P2ElementwiseFullStokes_1_1_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokes_1_1_macro_3D.cpp + noarch/P2ElementwiseFullStokes_1_1_toMatrix_P2ElementwiseFullStokes_1_1_macro_2D.cpp + noarch/P2ElementwiseFullStokes_1_1_toMatrix_P2ElementwiseFullStokes_1_1_macro_3D.cpp + noarch/P2ElementwiseFullStokes_1_2_apply_P2ElementwiseFullStokes_1_2_macro_3D.cpp + noarch/P2ElementwiseFullStokes_1_2_toMatrix_P2ElementwiseFullStokes_1_2_macro_3D.cpp + noarch/P2ElementwiseFullStokes_2_0_apply_P2ElementwiseFullStokes_2_0_macro_3D.cpp + noarch/P2ElementwiseFullStokes_2_0_toMatrix_P2ElementwiseFullStokes_2_0_macro_3D.cpp + noarch/P2ElementwiseFullStokes_2_1_apply_P2ElementwiseFullStokes_2_1_macro_3D.cpp + noarch/P2ElementwiseFullStokes_2_1_toMatrix_P2ElementwiseFullStokes_2_1_macro_3D.cpp + noarch/P2ElementwiseFullStokes_2_2_apply_P2ElementwiseFullStokes_2_2_macro_3D.cpp + noarch/P2ElementwiseFullStokes_2_2_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokes_2_2_macro_3D.cpp + noarch/P2ElementwiseFullStokes_2_2_toMatrix_P2ElementwiseFullStokes_2_2_macro_3D.cpp ) endif() diff --git a/operators/full_stokes/P2ElementwiseFullStokesAnnulusMap_0_0.cpp b/operators/full_stokes/P2ElementwiseFullStokesAnnulusMap_0_0.cpp index 72dfac44..58031f2c 100644 --- a/operators/full_stokes/P2ElementwiseFullStokesAnnulusMap_0_0.cpp +++ b/operators/full_stokes/P2ElementwiseFullStokesAnnulusMap_0_0.cpp @@ -149,7 +149,7 @@ void P2ElementwiseFullStokesAnnulusMap_0_0::apply( const P2Function< real_t >& s this->timingTree_->start( "kernel" ); - apply_macro_2D( + apply_P2ElementwiseFullStokesAnnulusMap_0_0_macro_2D( _data_dstEdge, _data_dstVertex, @@ -173,6 +173,7 @@ void P2ElementwiseFullStokesAnnulusMap_0_0::apply( const P2Function< real_t >& s refVertex_1, thrVertex_0, thrVertex_1 ); + this->timingTree_->stop( "kernel" ); } @@ -256,7 +257,7 @@ void P2ElementwiseFullStokesAnnulusMap_0_0::toMatrix( const std::shared_ptr< Spa this->timingTree_->start( "kernel" ); - toMatrix_macro_2D( + toMatrix_P2ElementwiseFullStokesAnnulusMap_0_0_macro_2D( _data_dstEdge, _data_dstVertex, @@ -281,6 +282,7 @@ void P2ElementwiseFullStokesAnnulusMap_0_0::toMatrix( const std::shared_ptr< Spa refVertex_1, thrVertex_0, thrVertex_1 ); + this->timingTree_->stop( "kernel" ); } } @@ -349,7 +351,7 @@ void P2ElementwiseFullStokesAnnulusMap_0_0::computeInverseDiagonalOperatorValues this->timingTree_->start( "kernel" ); - computeInverseDiagonalOperatorValues_macro_2D( + computeInverseDiagonalOperatorValues_P2ElementwiseFullStokesAnnulusMap_0_0_macro_2D( _data_invDiag_Edge, _data_invDiag_Vertex, @@ -371,6 +373,7 @@ void P2ElementwiseFullStokesAnnulusMap_0_0::computeInverseDiagonalOperatorValues refVertex_1, thrVertex_0, thrVertex_1 ); + this->timingTree_->stop( "kernel" ); } diff --git a/operators/full_stokes/P2ElementwiseFullStokesAnnulusMap_0_0.hpp b/operators/full_stokes/P2ElementwiseFullStokesAnnulusMap_0_0.hpp index 579eb8f0..2117e368 100644 --- a/operators/full_stokes/P2ElementwiseFullStokesAnnulusMap_0_0.hpp +++ b/operators/full_stokes/P2ElementwiseFullStokesAnnulusMap_0_0.hpp @@ -29,6 +29,7 @@ #include "core/DataTypes.h" #include "hyteg/LikwidWrapper.hpp" +#include "hyteg/boundary/BoundaryConditions.hpp" #include "hyteg/communication/Syncing.hpp" #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp" #include "hyteg/geometry/AnnulusMap.hpp" @@ -37,6 +38,7 @@ #include "hyteg/primitivestorage/PrimitiveStorage.hpp" #include "hyteg/solvers/Smoothables.hpp" #include "hyteg/sparseassembly/SparseMatrixProxy.hpp" +#include "hyteg/types/types.hpp" #define FUNC_PREFIX @@ -99,89 +101,104 @@ class P2ElementwiseFullStokesAnnulusMap_0_0 : public Operator< P2Function< real_ protected: private: - /// Kernel type: apply + /// Integral: P2ElementwiseFullStokesAnnulusMap_0_0 + /// - volume element: triangle, dim: 2, vertices: 3, spacedim: 2 + /// - kernel type: apply + /// - loop strategy: SAWTOOTH /// - quadrature rule: Dunavant 3 | points: 4, degree: 3 + /// - blending map: AnnulusMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 888 1304 28 20 4 0 0 1 - void apply_macro_2D( real_t* RESTRICT _data_dstEdge, - real_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - real_t* RESTRICT _data_srcEdge, - real_t* RESTRICT _data_srcVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t refVertex_0, - real_t refVertex_1, - real_t thrVertex_0, - real_t thrVertex_1 ) const; - /// Kernel type: toMatrix + void apply_P2ElementwiseFullStokesAnnulusMap_0_0_macro_2D( real_t* RESTRICT _data_dstEdge, + real_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + real_t* RESTRICT _data_srcEdge, + real_t* RESTRICT _data_srcVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t refVertex_0, + real_t refVertex_1, + real_t thrVertex_0, + real_t thrVertex_1 ) const; + + /// Integral: P2ElementwiseFullStokesAnnulusMap_0_0 + /// - volume element: triangle, dim: 2, vertices: 3, spacedim: 2 + /// - kernel type: toMatrix + /// - loop strategy: SAWTOOTH /// - quadrature rule: Dunavant 3 | points: 4, degree: 3 + /// - blending map: AnnulusMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 852 1268 28 20 4 0 0 4 - void toMatrix_macro_2D( idx_t* RESTRICT _data_dstEdge, - idx_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - idx_t* RESTRICT _data_srcEdge, - idx_t* RESTRICT _data_srcVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - std::shared_ptr< SparseMatrixProxy > mat, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t refVertex_0, - real_t refVertex_1, - real_t thrVertex_0, - real_t thrVertex_1 ) const; - /// Kernel type: computeInverseDiagonalOperatorValues + void toMatrix_P2ElementwiseFullStokesAnnulusMap_0_0_macro_2D( idx_t* RESTRICT _data_dstEdge, + idx_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + idx_t* RESTRICT _data_srcEdge, + idx_t* RESTRICT _data_srcVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + std::shared_ptr< SparseMatrixProxy > mat, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t refVertex_0, + real_t refVertex_1, + real_t thrVertex_0, + real_t thrVertex_1 ) const; + + /// Integral: P2ElementwiseFullStokesAnnulusMap_0_0 + /// - volume element: triangle, dim: 2, vertices: 3, spacedim: 2 + /// - kernel type: computeInverseDiagonalOperatorValues + /// - loop strategy: SAWTOOTH /// - quadrature rule: Dunavant 3 | points: 4, degree: 3 + /// - blending map: AnnulusMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 618 968 28 20 4 0 0 1 - void computeInverseDiagonalOperatorValues_macro_2D( real_t* RESTRICT _data_invDiag_Edge, - real_t* RESTRICT _data_invDiag_Vertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t refVertex_0, - real_t refVertex_1, - real_t thrVertex_0, - real_t thrVertex_1 ) const; + void computeInverseDiagonalOperatorValues_P2ElementwiseFullStokesAnnulusMap_0_0_macro_2D( + real_t* RESTRICT _data_invDiag_Edge, + real_t* RESTRICT _data_invDiag_Vertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t refVertex_0, + real_t refVertex_1, + real_t thrVertex_0, + real_t thrVertex_1 ) const; std::shared_ptr< P2Function< real_t > > invDiag_; P2Function< real_t > mu; diff --git a/operators/full_stokes/P2ElementwiseFullStokesAnnulusMap_0_1.cpp b/operators/full_stokes/P2ElementwiseFullStokesAnnulusMap_0_1.cpp index efbb3d8b..7127fc85 100644 --- a/operators/full_stokes/P2ElementwiseFullStokesAnnulusMap_0_1.cpp +++ b/operators/full_stokes/P2ElementwiseFullStokesAnnulusMap_0_1.cpp @@ -149,7 +149,7 @@ void P2ElementwiseFullStokesAnnulusMap_0_1::apply( const P2Function< real_t >& s this->timingTree_->start( "kernel" ); - apply_macro_2D( + apply_P2ElementwiseFullStokesAnnulusMap_0_1_macro_2D( _data_dstEdge, _data_dstVertex, @@ -173,6 +173,7 @@ void P2ElementwiseFullStokesAnnulusMap_0_1::apply( const P2Function< real_t >& s refVertex_1, thrVertex_0, thrVertex_1 ); + this->timingTree_->stop( "kernel" ); } @@ -256,7 +257,7 @@ void P2ElementwiseFullStokesAnnulusMap_0_1::toMatrix( const std::shared_ptr< Spa this->timingTree_->start( "kernel" ); - toMatrix_macro_2D( + toMatrix_P2ElementwiseFullStokesAnnulusMap_0_1_macro_2D( _data_dstEdge, _data_dstVertex, @@ -281,6 +282,7 @@ void P2ElementwiseFullStokesAnnulusMap_0_1::toMatrix( const std::shared_ptr< Spa refVertex_1, thrVertex_0, thrVertex_1 ); + this->timingTree_->stop( "kernel" ); } } diff --git a/operators/full_stokes/P2ElementwiseFullStokesAnnulusMap_0_1.hpp b/operators/full_stokes/P2ElementwiseFullStokesAnnulusMap_0_1.hpp index 5c457c8d..6a380c6e 100644 --- a/operators/full_stokes/P2ElementwiseFullStokesAnnulusMap_0_1.hpp +++ b/operators/full_stokes/P2ElementwiseFullStokesAnnulusMap_0_1.hpp @@ -29,6 +29,7 @@ #include "core/DataTypes.h" #include "hyteg/LikwidWrapper.hpp" +#include "hyteg/boundary/BoundaryConditions.hpp" #include "hyteg/communication/Syncing.hpp" #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp" #include "hyteg/geometry/AnnulusMap.hpp" @@ -36,6 +37,7 @@ #include "hyteg/p2functionspace/P2Function.hpp" #include "hyteg/primitivestorage/PrimitiveStorage.hpp" #include "hyteg/sparseassembly/SparseMatrixProxy.hpp" +#include "hyteg/types/types.hpp" #define FUNC_PREFIX @@ -93,63 +95,72 @@ class P2ElementwiseFullStokesAnnulusMap_0_1 : public Operator< P2Function< real_ protected: private: - /// Kernel type: apply + /// Integral: P2ElementwiseFullStokesAnnulusMap_0_1 + /// - volume element: triangle, dim: 2, vertices: 3, spacedim: 2 + /// - kernel type: apply + /// - loop strategy: SAWTOOTH /// - quadrature rule: Dunavant 3 | points: 4, degree: 3 + /// - blending map: AnnulusMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 1128 1452 28 20 4 0 0 1 - void apply_macro_2D( real_t* RESTRICT _data_dstEdge, - real_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - real_t* RESTRICT _data_srcEdge, - real_t* RESTRICT _data_srcVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t refVertex_0, - real_t refVertex_1, - real_t thrVertex_0, - real_t thrVertex_1 ) const; - /// Kernel type: toMatrix + void apply_P2ElementwiseFullStokesAnnulusMap_0_1_macro_2D( real_t* RESTRICT _data_dstEdge, + real_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + real_t* RESTRICT _data_srcEdge, + real_t* RESTRICT _data_srcVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t refVertex_0, + real_t refVertex_1, + real_t thrVertex_0, + real_t thrVertex_1 ) const; + + /// Integral: P2ElementwiseFullStokesAnnulusMap_0_1 + /// - volume element: triangle, dim: 2, vertices: 3, spacedim: 2 + /// - kernel type: toMatrix + /// - loop strategy: SAWTOOTH /// - quadrature rule: Dunavant 3 | points: 4, degree: 3 + /// - blending map: AnnulusMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 1092 1416 28 20 4 0 0 4 - void toMatrix_macro_2D( idx_t* RESTRICT _data_dstEdge, - idx_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - idx_t* RESTRICT _data_srcEdge, - idx_t* RESTRICT _data_srcVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - std::shared_ptr< SparseMatrixProxy > mat, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t refVertex_0, - real_t refVertex_1, - real_t thrVertex_0, - real_t thrVertex_1 ) const; + void toMatrix_P2ElementwiseFullStokesAnnulusMap_0_1_macro_2D( idx_t* RESTRICT _data_dstEdge, + idx_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + idx_t* RESTRICT _data_srcEdge, + idx_t* RESTRICT _data_srcVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + std::shared_ptr< SparseMatrixProxy > mat, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t refVertex_0, + real_t refVertex_1, + real_t thrVertex_0, + real_t thrVertex_1 ) const; P2Function< real_t > mu; }; diff --git a/operators/full_stokes/P2ElementwiseFullStokesAnnulusMap_1_0.cpp b/operators/full_stokes/P2ElementwiseFullStokesAnnulusMap_1_0.cpp index e701b20c..b618a343 100644 --- a/operators/full_stokes/P2ElementwiseFullStokesAnnulusMap_1_0.cpp +++ b/operators/full_stokes/P2ElementwiseFullStokesAnnulusMap_1_0.cpp @@ -149,7 +149,7 @@ void P2ElementwiseFullStokesAnnulusMap_1_0::apply( const P2Function< real_t >& s this->timingTree_->start( "kernel" ); - apply_macro_2D( + apply_P2ElementwiseFullStokesAnnulusMap_1_0_macro_2D( _data_dstEdge, _data_dstVertex, @@ -173,6 +173,7 @@ void P2ElementwiseFullStokesAnnulusMap_1_0::apply( const P2Function< real_t >& s refVertex_1, thrVertex_0, thrVertex_1 ); + this->timingTree_->stop( "kernel" ); } @@ -256,7 +257,7 @@ void P2ElementwiseFullStokesAnnulusMap_1_0::toMatrix( const std::shared_ptr< Spa this->timingTree_->start( "kernel" ); - toMatrix_macro_2D( + toMatrix_P2ElementwiseFullStokesAnnulusMap_1_0_macro_2D( _data_dstEdge, _data_dstVertex, @@ -281,6 +282,7 @@ void P2ElementwiseFullStokesAnnulusMap_1_0::toMatrix( const std::shared_ptr< Spa refVertex_1, thrVertex_0, thrVertex_1 ); + this->timingTree_->stop( "kernel" ); } } diff --git a/operators/full_stokes/P2ElementwiseFullStokesAnnulusMap_1_0.hpp b/operators/full_stokes/P2ElementwiseFullStokesAnnulusMap_1_0.hpp index 299e3fe3..b6a47058 100644 --- a/operators/full_stokes/P2ElementwiseFullStokesAnnulusMap_1_0.hpp +++ b/operators/full_stokes/P2ElementwiseFullStokesAnnulusMap_1_0.hpp @@ -29,6 +29,7 @@ #include "core/DataTypes.h" #include "hyteg/LikwidWrapper.hpp" +#include "hyteg/boundary/BoundaryConditions.hpp" #include "hyteg/communication/Syncing.hpp" #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp" #include "hyteg/geometry/AnnulusMap.hpp" @@ -36,6 +37,7 @@ #include "hyteg/p2functionspace/P2Function.hpp" #include "hyteg/primitivestorage/PrimitiveStorage.hpp" #include "hyteg/sparseassembly/SparseMatrixProxy.hpp" +#include "hyteg/types/types.hpp" #define FUNC_PREFIX @@ -93,63 +95,72 @@ class P2ElementwiseFullStokesAnnulusMap_1_0 : public Operator< P2Function< real_ protected: private: - /// Kernel type: apply + /// Integral: P2ElementwiseFullStokesAnnulusMap_1_0 + /// - volume element: triangle, dim: 2, vertices: 3, spacedim: 2 + /// - kernel type: apply + /// - loop strategy: SAWTOOTH /// - quadrature rule: Dunavant 3 | points: 4, degree: 3 + /// - blending map: AnnulusMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 1128 1456 28 20 4 0 0 1 - void apply_macro_2D( real_t* RESTRICT _data_dstEdge, - real_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - real_t* RESTRICT _data_srcEdge, - real_t* RESTRICT _data_srcVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t refVertex_0, - real_t refVertex_1, - real_t thrVertex_0, - real_t thrVertex_1 ) const; - /// Kernel type: toMatrix + void apply_P2ElementwiseFullStokesAnnulusMap_1_0_macro_2D( real_t* RESTRICT _data_dstEdge, + real_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + real_t* RESTRICT _data_srcEdge, + real_t* RESTRICT _data_srcVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t refVertex_0, + real_t refVertex_1, + real_t thrVertex_0, + real_t thrVertex_1 ) const; + + /// Integral: P2ElementwiseFullStokesAnnulusMap_1_0 + /// - volume element: triangle, dim: 2, vertices: 3, spacedim: 2 + /// - kernel type: toMatrix + /// - loop strategy: SAWTOOTH /// - quadrature rule: Dunavant 3 | points: 4, degree: 3 + /// - blending map: AnnulusMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 1092 1420 28 20 4 0 0 4 - void toMatrix_macro_2D( idx_t* RESTRICT _data_dstEdge, - idx_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - idx_t* RESTRICT _data_srcEdge, - idx_t* RESTRICT _data_srcVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - std::shared_ptr< SparseMatrixProxy > mat, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t refVertex_0, - real_t refVertex_1, - real_t thrVertex_0, - real_t thrVertex_1 ) const; + void toMatrix_P2ElementwiseFullStokesAnnulusMap_1_0_macro_2D( idx_t* RESTRICT _data_dstEdge, + idx_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + idx_t* RESTRICT _data_srcEdge, + idx_t* RESTRICT _data_srcVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + std::shared_ptr< SparseMatrixProxy > mat, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t refVertex_0, + real_t refVertex_1, + real_t thrVertex_0, + real_t thrVertex_1 ) const; P2Function< real_t > mu; }; diff --git a/operators/full_stokes/P2ElementwiseFullStokesAnnulusMap_1_1.cpp b/operators/full_stokes/P2ElementwiseFullStokesAnnulusMap_1_1.cpp index 663ef74a..99de0a35 100644 --- a/operators/full_stokes/P2ElementwiseFullStokesAnnulusMap_1_1.cpp +++ b/operators/full_stokes/P2ElementwiseFullStokesAnnulusMap_1_1.cpp @@ -149,7 +149,7 @@ void P2ElementwiseFullStokesAnnulusMap_1_1::apply( const P2Function< real_t >& s this->timingTree_->start( "kernel" ); - apply_macro_2D( + apply_P2ElementwiseFullStokesAnnulusMap_1_1_macro_2D( _data_dstEdge, _data_dstVertex, @@ -173,6 +173,7 @@ void P2ElementwiseFullStokesAnnulusMap_1_1::apply( const P2Function< real_t >& s refVertex_1, thrVertex_0, thrVertex_1 ); + this->timingTree_->stop( "kernel" ); } @@ -256,7 +257,7 @@ void P2ElementwiseFullStokesAnnulusMap_1_1::toMatrix( const std::shared_ptr< Spa this->timingTree_->start( "kernel" ); - toMatrix_macro_2D( + toMatrix_P2ElementwiseFullStokesAnnulusMap_1_1_macro_2D( _data_dstEdge, _data_dstVertex, @@ -281,6 +282,7 @@ void P2ElementwiseFullStokesAnnulusMap_1_1::toMatrix( const std::shared_ptr< Spa refVertex_1, thrVertex_0, thrVertex_1 ); + this->timingTree_->stop( "kernel" ); } } @@ -349,7 +351,7 @@ void P2ElementwiseFullStokesAnnulusMap_1_1::computeInverseDiagonalOperatorValues this->timingTree_->start( "kernel" ); - computeInverseDiagonalOperatorValues_macro_2D( + computeInverseDiagonalOperatorValues_P2ElementwiseFullStokesAnnulusMap_1_1_macro_2D( _data_invDiag_Edge, _data_invDiag_Vertex, @@ -371,6 +373,7 @@ void P2ElementwiseFullStokesAnnulusMap_1_1::computeInverseDiagonalOperatorValues refVertex_1, thrVertex_0, thrVertex_1 ); + this->timingTree_->stop( "kernel" ); } diff --git a/operators/full_stokes/P2ElementwiseFullStokesAnnulusMap_1_1.hpp b/operators/full_stokes/P2ElementwiseFullStokesAnnulusMap_1_1.hpp index 61485efc..01025b15 100644 --- a/operators/full_stokes/P2ElementwiseFullStokesAnnulusMap_1_1.hpp +++ b/operators/full_stokes/P2ElementwiseFullStokesAnnulusMap_1_1.hpp @@ -29,6 +29,7 @@ #include "core/DataTypes.h" #include "hyteg/LikwidWrapper.hpp" +#include "hyteg/boundary/BoundaryConditions.hpp" #include "hyteg/communication/Syncing.hpp" #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp" #include "hyteg/geometry/AnnulusMap.hpp" @@ -37,6 +38,7 @@ #include "hyteg/primitivestorage/PrimitiveStorage.hpp" #include "hyteg/solvers/Smoothables.hpp" #include "hyteg/sparseassembly/SparseMatrixProxy.hpp" +#include "hyteg/types/types.hpp" #define FUNC_PREFIX @@ -99,89 +101,104 @@ class P2ElementwiseFullStokesAnnulusMap_1_1 : public Operator< P2Function< real_ protected: private: - /// Kernel type: apply + /// Integral: P2ElementwiseFullStokesAnnulusMap_1_1 + /// - volume element: triangle, dim: 2, vertices: 3, spacedim: 2 + /// - kernel type: apply + /// - loop strategy: SAWTOOTH /// - quadrature rule: Dunavant 3 | points: 4, degree: 3 + /// - blending map: AnnulusMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 888 1304 28 20 4 0 0 1 - void apply_macro_2D( real_t* RESTRICT _data_dstEdge, - real_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - real_t* RESTRICT _data_srcEdge, - real_t* RESTRICT _data_srcVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t refVertex_0, - real_t refVertex_1, - real_t thrVertex_0, - real_t thrVertex_1 ) const; - /// Kernel type: toMatrix + void apply_P2ElementwiseFullStokesAnnulusMap_1_1_macro_2D( real_t* RESTRICT _data_dstEdge, + real_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + real_t* RESTRICT _data_srcEdge, + real_t* RESTRICT _data_srcVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t refVertex_0, + real_t refVertex_1, + real_t thrVertex_0, + real_t thrVertex_1 ) const; + + /// Integral: P2ElementwiseFullStokesAnnulusMap_1_1 + /// - volume element: triangle, dim: 2, vertices: 3, spacedim: 2 + /// - kernel type: toMatrix + /// - loop strategy: SAWTOOTH /// - quadrature rule: Dunavant 3 | points: 4, degree: 3 + /// - blending map: AnnulusMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 852 1268 28 20 4 0 0 4 - void toMatrix_macro_2D( idx_t* RESTRICT _data_dstEdge, - idx_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - idx_t* RESTRICT _data_srcEdge, - idx_t* RESTRICT _data_srcVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - std::shared_ptr< SparseMatrixProxy > mat, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t refVertex_0, - real_t refVertex_1, - real_t thrVertex_0, - real_t thrVertex_1 ) const; - /// Kernel type: computeInverseDiagonalOperatorValues + void toMatrix_P2ElementwiseFullStokesAnnulusMap_1_1_macro_2D( idx_t* RESTRICT _data_dstEdge, + idx_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + idx_t* RESTRICT _data_srcEdge, + idx_t* RESTRICT _data_srcVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + std::shared_ptr< SparseMatrixProxy > mat, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t refVertex_0, + real_t refVertex_1, + real_t thrVertex_0, + real_t thrVertex_1 ) const; + + /// Integral: P2ElementwiseFullStokesAnnulusMap_1_1 + /// - volume element: triangle, dim: 2, vertices: 3, spacedim: 2 + /// - kernel type: computeInverseDiagonalOperatorValues + /// - loop strategy: SAWTOOTH /// - quadrature rule: Dunavant 3 | points: 4, degree: 3 + /// - blending map: AnnulusMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 618 968 28 20 4 0 0 1 - void computeInverseDiagonalOperatorValues_macro_2D( real_t* RESTRICT _data_invDiag_Edge, - real_t* RESTRICT _data_invDiag_Vertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t refVertex_0, - real_t refVertex_1, - real_t thrVertex_0, - real_t thrVertex_1 ) const; + void computeInverseDiagonalOperatorValues_P2ElementwiseFullStokesAnnulusMap_1_1_macro_2D( + real_t* RESTRICT _data_invDiag_Edge, + real_t* RESTRICT _data_invDiag_Vertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t refVertex_0, + real_t refVertex_1, + real_t thrVertex_0, + real_t thrVertex_1 ) const; std::shared_ptr< P2Function< real_t > > invDiag_; P2Function< real_t > mu; diff --git a/operators/full_stokes/P2ElementwiseFullStokesIcosahedralShellMap_0_0.cpp b/operators/full_stokes/P2ElementwiseFullStokesIcosahedralShellMap_0_0.cpp index 48536c31..2ced3fd9 100644 --- a/operators/full_stokes/P2ElementwiseFullStokesIcosahedralShellMap_0_0.cpp +++ b/operators/full_stokes/P2ElementwiseFullStokesIcosahedralShellMap_0_0.cpp @@ -154,7 +154,7 @@ void P2ElementwiseFullStokesIcosahedralShellMap_0_0::apply( const P2Function< re this->timingTree_->start( "kernel" ); - apply_macro_3D( + apply_P2ElementwiseFullStokesIcosahedralShellMap_0_0_macro_3D( _data_dstEdge, _data_dstVertex, @@ -190,6 +190,7 @@ void P2ElementwiseFullStokesIcosahedralShellMap_0_0::apply( const P2Function< re thrVertex_0, thrVertex_1, thrVertex_2 ); + this->timingTree_->stop( "kernel" ); } @@ -285,7 +286,7 @@ void P2ElementwiseFullStokesIcosahedralShellMap_0_0::toMatrix( const std::shared this->timingTree_->start( "kernel" ); - toMatrix_macro_3D( + toMatrix_P2ElementwiseFullStokesIcosahedralShellMap_0_0_macro_3D( _data_dstEdge, _data_dstVertex, @@ -322,6 +323,7 @@ void P2ElementwiseFullStokesIcosahedralShellMap_0_0::toMatrix( const std::shared thrVertex_0, thrVertex_1, thrVertex_2 ); + this->timingTree_->stop( "kernel" ); } } @@ -401,7 +403,7 @@ void P2ElementwiseFullStokesIcosahedralShellMap_0_0::computeInverseDiagonalOpera this->timingTree_->start( "kernel" ); - computeInverseDiagonalOperatorValues_macro_3D( + computeInverseDiagonalOperatorValues_P2ElementwiseFullStokesIcosahedralShellMap_0_0_macro_3D( _data_invDiag_Edge, _data_invDiag_Vertex, @@ -435,6 +437,7 @@ void P2ElementwiseFullStokesIcosahedralShellMap_0_0::computeInverseDiagonalOpera thrVertex_0, thrVertex_1, thrVertex_2 ); + this->timingTree_->stop( "kernel" ); } diff --git a/operators/full_stokes/P2ElementwiseFullStokesIcosahedralShellMap_0_0.hpp b/operators/full_stokes/P2ElementwiseFullStokesIcosahedralShellMap_0_0.hpp index df0a75c6..a32d7e1c 100644 --- a/operators/full_stokes/P2ElementwiseFullStokesIcosahedralShellMap_0_0.hpp +++ b/operators/full_stokes/P2ElementwiseFullStokesIcosahedralShellMap_0_0.hpp @@ -29,6 +29,7 @@ #include "core/DataTypes.h" #include "hyteg/LikwidWrapper.hpp" +#include "hyteg/boundary/BoundaryConditions.hpp" #include "hyteg/communication/Syncing.hpp" #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp" #include "hyteg/geometry/IcosahedralShellMap.hpp" @@ -37,6 +38,7 @@ #include "hyteg/primitivestorage/PrimitiveStorage.hpp" #include "hyteg/solvers/Smoothables.hpp" #include "hyteg/sparseassembly/SparseMatrixProxy.hpp" +#include "hyteg/types/types.hpp" #define FUNC_PREFIX @@ -99,125 +101,140 @@ class P2ElementwiseFullStokesIcosahedralShellMap_0_0 : public Operator< P2Functi protected: private: - /// Kernel type: apply + /// Integral: P2ElementwiseFullStokesIcosahedralShellMap_0_0 + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: apply + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 3 | points: 5, degree: 3 + /// - blending map: IcosahedralShellMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 3513 5177 66 15 5 0 0 1 - void apply_macro_3D( real_t* RESTRICT _data_dstEdge, - real_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - real_t* RESTRICT _data_srcEdge, - real_t* RESTRICT _data_srcVertex, - real_t forVertex_0, - real_t forVertex_1, - real_t forVertex_2, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t rayVertex_2, - real_t refVertex_0, - real_t refVertex_1, - real_t refVertex_2, - real_t thrVertex_0, - real_t thrVertex_1, - real_t thrVertex_2 ) const; - /// Kernel type: toMatrix + void apply_P2ElementwiseFullStokesIcosahedralShellMap_0_0_macro_3D( real_t* RESTRICT _data_dstEdge, + real_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + real_t* RESTRICT _data_srcEdge, + real_t* RESTRICT _data_srcVertex, + real_t forVertex_0, + real_t forVertex_1, + real_t forVertex_2, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t rayVertex_2, + real_t refVertex_0, + real_t refVertex_1, + real_t refVertex_2, + real_t thrVertex_0, + real_t thrVertex_1, + real_t thrVertex_2 ) const; + + /// Integral: P2ElementwiseFullStokesIcosahedralShellMap_0_0 + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: toMatrix + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 3 | points: 5, degree: 3 + /// - blending map: IcosahedralShellMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 3413 5077 66 15 5 0 0 4 - void toMatrix_macro_3D( idx_t* RESTRICT _data_dstEdge, - idx_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - idx_t* RESTRICT _data_srcEdge, - idx_t* RESTRICT _data_srcVertex, - real_t forVertex_0, - real_t forVertex_1, - real_t forVertex_2, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - std::shared_ptr< SparseMatrixProxy > mat, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t rayVertex_2, - real_t refVertex_0, - real_t refVertex_1, - real_t refVertex_2, - real_t thrVertex_0, - real_t thrVertex_1, - real_t thrVertex_2 ) const; - /// Kernel type: computeInverseDiagonalOperatorValues + void toMatrix_P2ElementwiseFullStokesIcosahedralShellMap_0_0_macro_3D( idx_t* RESTRICT _data_dstEdge, + idx_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + idx_t* RESTRICT _data_srcEdge, + idx_t* RESTRICT _data_srcVertex, + real_t forVertex_0, + real_t forVertex_1, + real_t forVertex_2, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + std::shared_ptr< SparseMatrixProxy > mat, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t rayVertex_2, + real_t refVertex_0, + real_t refVertex_1, + real_t refVertex_2, + real_t thrVertex_0, + real_t thrVertex_1, + real_t thrVertex_2 ) const; + + /// Integral: P2ElementwiseFullStokesIcosahedralShellMap_0_0 + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: computeInverseDiagonalOperatorValues + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 3 | points: 5, degree: 3 + /// - blending map: IcosahedralShellMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 2298 3592 66 15 5 0 0 1 - void computeInverseDiagonalOperatorValues_macro_3D( real_t* RESTRICT _data_invDiag_Edge, - real_t* RESTRICT _data_invDiag_Vertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - real_t forVertex_0, - real_t forVertex_1, - real_t forVertex_2, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t rayVertex_2, - real_t refVertex_0, - real_t refVertex_1, - real_t refVertex_2, - real_t thrVertex_0, - real_t thrVertex_1, - real_t thrVertex_2 ) const; + void computeInverseDiagonalOperatorValues_P2ElementwiseFullStokesIcosahedralShellMap_0_0_macro_3D( + real_t* RESTRICT _data_invDiag_Edge, + real_t* RESTRICT _data_invDiag_Vertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + real_t forVertex_0, + real_t forVertex_1, + real_t forVertex_2, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t rayVertex_2, + real_t refVertex_0, + real_t refVertex_1, + real_t refVertex_2, + real_t thrVertex_0, + real_t thrVertex_1, + real_t thrVertex_2 ) const; std::shared_ptr< P2Function< real_t > > invDiag_; P2Function< real_t > mu; diff --git a/operators/full_stokes/P2ElementwiseFullStokesIcosahedralShellMap_0_1.cpp b/operators/full_stokes/P2ElementwiseFullStokesIcosahedralShellMap_0_1.cpp index da66b821..3d449445 100644 --- a/operators/full_stokes/P2ElementwiseFullStokesIcosahedralShellMap_0_1.cpp +++ b/operators/full_stokes/P2ElementwiseFullStokesIcosahedralShellMap_0_1.cpp @@ -154,7 +154,7 @@ void P2ElementwiseFullStokesIcosahedralShellMap_0_1::apply( const P2Function< re this->timingTree_->start( "kernel" ); - apply_macro_3D( + apply_P2ElementwiseFullStokesIcosahedralShellMap_0_1_macro_3D( _data_dstEdge, _data_dstVertex, @@ -190,6 +190,7 @@ void P2ElementwiseFullStokesIcosahedralShellMap_0_1::apply( const P2Function< re thrVertex_0, thrVertex_1, thrVertex_2 ); + this->timingTree_->stop( "kernel" ); } @@ -285,7 +286,7 @@ void P2ElementwiseFullStokesIcosahedralShellMap_0_1::toMatrix( const std::shared this->timingTree_->start( "kernel" ); - toMatrix_macro_3D( + toMatrix_P2ElementwiseFullStokesIcosahedralShellMap_0_1_macro_3D( _data_dstEdge, _data_dstVertex, @@ -322,6 +323,7 @@ void P2ElementwiseFullStokesIcosahedralShellMap_0_1::toMatrix( const std::shared thrVertex_0, thrVertex_1, thrVertex_2 ); + this->timingTree_->stop( "kernel" ); } } diff --git a/operators/full_stokes/P2ElementwiseFullStokesIcosahedralShellMap_0_1.hpp b/operators/full_stokes/P2ElementwiseFullStokesIcosahedralShellMap_0_1.hpp index 7b524949..07133b03 100644 --- a/operators/full_stokes/P2ElementwiseFullStokesIcosahedralShellMap_0_1.hpp +++ b/operators/full_stokes/P2ElementwiseFullStokesIcosahedralShellMap_0_1.hpp @@ -29,6 +29,7 @@ #include "core/DataTypes.h" #include "hyteg/LikwidWrapper.hpp" +#include "hyteg/boundary/BoundaryConditions.hpp" #include "hyteg/communication/Syncing.hpp" #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp" #include "hyteg/geometry/IcosahedralShellMap.hpp" @@ -36,6 +37,7 @@ #include "hyteg/p2functionspace/P2Function.hpp" #include "hyteg/primitivestorage/PrimitiveStorage.hpp" #include "hyteg/sparseassembly/SparseMatrixProxy.hpp" +#include "hyteg/types/types.hpp" #define FUNC_PREFIX @@ -93,87 +95,96 @@ class P2ElementwiseFullStokesIcosahedralShellMap_0_1 : public Operator< P2Functi protected: private: - /// Kernel type: apply + /// Integral: P2ElementwiseFullStokesIcosahedralShellMap_0_1 + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: apply + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 3 | points: 5, degree: 3 + /// - blending map: IcosahedralShellMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 4283 5397 66 15 5 0 0 1 - void apply_macro_3D( real_t* RESTRICT _data_dstEdge, - real_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - real_t* RESTRICT _data_srcEdge, - real_t* RESTRICT _data_srcVertex, - real_t forVertex_0, - real_t forVertex_1, - real_t forVertex_2, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t rayVertex_2, - real_t refVertex_0, - real_t refVertex_1, - real_t refVertex_2, - real_t thrVertex_0, - real_t thrVertex_1, - real_t thrVertex_2 ) const; - /// Kernel type: toMatrix + void apply_P2ElementwiseFullStokesIcosahedralShellMap_0_1_macro_3D( real_t* RESTRICT _data_dstEdge, + real_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + real_t* RESTRICT _data_srcEdge, + real_t* RESTRICT _data_srcVertex, + real_t forVertex_0, + real_t forVertex_1, + real_t forVertex_2, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t rayVertex_2, + real_t refVertex_0, + real_t refVertex_1, + real_t refVertex_2, + real_t thrVertex_0, + real_t thrVertex_1, + real_t thrVertex_2 ) const; + + /// Integral: P2ElementwiseFullStokesIcosahedralShellMap_0_1 + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: toMatrix + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 3 | points: 5, degree: 3 + /// - blending map: IcosahedralShellMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 4183 5297 66 15 5 0 0 4 - void toMatrix_macro_3D( idx_t* RESTRICT _data_dstEdge, - idx_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - idx_t* RESTRICT _data_srcEdge, - idx_t* RESTRICT _data_srcVertex, - real_t forVertex_0, - real_t forVertex_1, - real_t forVertex_2, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - std::shared_ptr< SparseMatrixProxy > mat, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t rayVertex_2, - real_t refVertex_0, - real_t refVertex_1, - real_t refVertex_2, - real_t thrVertex_0, - real_t thrVertex_1, - real_t thrVertex_2 ) const; + void toMatrix_P2ElementwiseFullStokesIcosahedralShellMap_0_1_macro_3D( idx_t* RESTRICT _data_dstEdge, + idx_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + idx_t* RESTRICT _data_srcEdge, + idx_t* RESTRICT _data_srcVertex, + real_t forVertex_0, + real_t forVertex_1, + real_t forVertex_2, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + std::shared_ptr< SparseMatrixProxy > mat, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t rayVertex_2, + real_t refVertex_0, + real_t refVertex_1, + real_t refVertex_2, + real_t thrVertex_0, + real_t thrVertex_1, + real_t thrVertex_2 ) const; P2Function< real_t > mu; }; diff --git a/operators/full_stokes/P2ElementwiseFullStokesIcosahedralShellMap_0_2.cpp b/operators/full_stokes/P2ElementwiseFullStokesIcosahedralShellMap_0_2.cpp index 3f5efa0b..ff66e6ea 100644 --- a/operators/full_stokes/P2ElementwiseFullStokesIcosahedralShellMap_0_2.cpp +++ b/operators/full_stokes/P2ElementwiseFullStokesIcosahedralShellMap_0_2.cpp @@ -154,7 +154,7 @@ void P2ElementwiseFullStokesIcosahedralShellMap_0_2::apply( const P2Function< re this->timingTree_->start( "kernel" ); - apply_macro_3D( + apply_P2ElementwiseFullStokesIcosahedralShellMap_0_2_macro_3D( _data_dstEdge, _data_dstVertex, @@ -190,6 +190,7 @@ void P2ElementwiseFullStokesIcosahedralShellMap_0_2::apply( const P2Function< re thrVertex_0, thrVertex_1, thrVertex_2 ); + this->timingTree_->stop( "kernel" ); } @@ -285,7 +286,7 @@ void P2ElementwiseFullStokesIcosahedralShellMap_0_2::toMatrix( const std::shared this->timingTree_->start( "kernel" ); - toMatrix_macro_3D( + toMatrix_P2ElementwiseFullStokesIcosahedralShellMap_0_2_macro_3D( _data_dstEdge, _data_dstVertex, @@ -322,6 +323,7 @@ void P2ElementwiseFullStokesIcosahedralShellMap_0_2::toMatrix( const std::shared thrVertex_0, thrVertex_1, thrVertex_2 ); + this->timingTree_->stop( "kernel" ); } } diff --git a/operators/full_stokes/P2ElementwiseFullStokesIcosahedralShellMap_0_2.hpp b/operators/full_stokes/P2ElementwiseFullStokesIcosahedralShellMap_0_2.hpp index 3f5c1ae6..5e2cfe06 100644 --- a/operators/full_stokes/P2ElementwiseFullStokesIcosahedralShellMap_0_2.hpp +++ b/operators/full_stokes/P2ElementwiseFullStokesIcosahedralShellMap_0_2.hpp @@ -29,6 +29,7 @@ #include "core/DataTypes.h" #include "hyteg/LikwidWrapper.hpp" +#include "hyteg/boundary/BoundaryConditions.hpp" #include "hyteg/communication/Syncing.hpp" #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp" #include "hyteg/geometry/IcosahedralShellMap.hpp" @@ -36,6 +37,7 @@ #include "hyteg/p2functionspace/P2Function.hpp" #include "hyteg/primitivestorage/PrimitiveStorage.hpp" #include "hyteg/sparseassembly/SparseMatrixProxy.hpp" +#include "hyteg/types/types.hpp" #define FUNC_PREFIX @@ -93,87 +95,96 @@ class P2ElementwiseFullStokesIcosahedralShellMap_0_2 : public Operator< P2Functi protected: private: - /// Kernel type: apply + /// Integral: P2ElementwiseFullStokesIcosahedralShellMap_0_2 + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: apply + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 3 | points: 5, degree: 3 + /// - blending map: IcosahedralShellMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 4283 5392 66 15 5 0 0 1 - void apply_macro_3D( real_t* RESTRICT _data_dstEdge, - real_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - real_t* RESTRICT _data_srcEdge, - real_t* RESTRICT _data_srcVertex, - real_t forVertex_0, - real_t forVertex_1, - real_t forVertex_2, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t rayVertex_2, - real_t refVertex_0, - real_t refVertex_1, - real_t refVertex_2, - real_t thrVertex_0, - real_t thrVertex_1, - real_t thrVertex_2 ) const; - /// Kernel type: toMatrix + void apply_P2ElementwiseFullStokesIcosahedralShellMap_0_2_macro_3D( real_t* RESTRICT _data_dstEdge, + real_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + real_t* RESTRICT _data_srcEdge, + real_t* RESTRICT _data_srcVertex, + real_t forVertex_0, + real_t forVertex_1, + real_t forVertex_2, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t rayVertex_2, + real_t refVertex_0, + real_t refVertex_1, + real_t refVertex_2, + real_t thrVertex_0, + real_t thrVertex_1, + real_t thrVertex_2 ) const; + + /// Integral: P2ElementwiseFullStokesIcosahedralShellMap_0_2 + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: toMatrix + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 3 | points: 5, degree: 3 + /// - blending map: IcosahedralShellMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 4183 5292 66 15 5 0 0 4 - void toMatrix_macro_3D( idx_t* RESTRICT _data_dstEdge, - idx_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - idx_t* RESTRICT _data_srcEdge, - idx_t* RESTRICT _data_srcVertex, - real_t forVertex_0, - real_t forVertex_1, - real_t forVertex_2, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - std::shared_ptr< SparseMatrixProxy > mat, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t rayVertex_2, - real_t refVertex_0, - real_t refVertex_1, - real_t refVertex_2, - real_t thrVertex_0, - real_t thrVertex_1, - real_t thrVertex_2 ) const; + void toMatrix_P2ElementwiseFullStokesIcosahedralShellMap_0_2_macro_3D( idx_t* RESTRICT _data_dstEdge, + idx_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + idx_t* RESTRICT _data_srcEdge, + idx_t* RESTRICT _data_srcVertex, + real_t forVertex_0, + real_t forVertex_1, + real_t forVertex_2, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + std::shared_ptr< SparseMatrixProxy > mat, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t rayVertex_2, + real_t refVertex_0, + real_t refVertex_1, + real_t refVertex_2, + real_t thrVertex_0, + real_t thrVertex_1, + real_t thrVertex_2 ) const; P2Function< real_t > mu; }; diff --git a/operators/full_stokes/P2ElementwiseFullStokesIcosahedralShellMap_1_0.cpp b/operators/full_stokes/P2ElementwiseFullStokesIcosahedralShellMap_1_0.cpp index 8f1aebff..2b4f9a3a 100644 --- a/operators/full_stokes/P2ElementwiseFullStokesIcosahedralShellMap_1_0.cpp +++ b/operators/full_stokes/P2ElementwiseFullStokesIcosahedralShellMap_1_0.cpp @@ -154,7 +154,7 @@ void P2ElementwiseFullStokesIcosahedralShellMap_1_0::apply( const P2Function< re this->timingTree_->start( "kernel" ); - apply_macro_3D( + apply_P2ElementwiseFullStokesIcosahedralShellMap_1_0_macro_3D( _data_dstEdge, _data_dstVertex, @@ -190,6 +190,7 @@ void P2ElementwiseFullStokesIcosahedralShellMap_1_0::apply( const P2Function< re thrVertex_0, thrVertex_1, thrVertex_2 ); + this->timingTree_->stop( "kernel" ); } @@ -285,7 +286,7 @@ void P2ElementwiseFullStokesIcosahedralShellMap_1_0::toMatrix( const std::shared this->timingTree_->start( "kernel" ); - toMatrix_macro_3D( + toMatrix_P2ElementwiseFullStokesIcosahedralShellMap_1_0_macro_3D( _data_dstEdge, _data_dstVertex, @@ -322,6 +323,7 @@ void P2ElementwiseFullStokesIcosahedralShellMap_1_0::toMatrix( const std::shared thrVertex_0, thrVertex_1, thrVertex_2 ); + this->timingTree_->stop( "kernel" ); } } diff --git a/operators/full_stokes/P2ElementwiseFullStokesIcosahedralShellMap_1_0.hpp b/operators/full_stokes/P2ElementwiseFullStokesIcosahedralShellMap_1_0.hpp index c0c06628..283daa25 100644 --- a/operators/full_stokes/P2ElementwiseFullStokesIcosahedralShellMap_1_0.hpp +++ b/operators/full_stokes/P2ElementwiseFullStokesIcosahedralShellMap_1_0.hpp @@ -29,6 +29,7 @@ #include "core/DataTypes.h" #include "hyteg/LikwidWrapper.hpp" +#include "hyteg/boundary/BoundaryConditions.hpp" #include "hyteg/communication/Syncing.hpp" #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp" #include "hyteg/geometry/IcosahedralShellMap.hpp" @@ -36,6 +37,7 @@ #include "hyteg/p2functionspace/P2Function.hpp" #include "hyteg/primitivestorage/PrimitiveStorage.hpp" #include "hyteg/sparseassembly/SparseMatrixProxy.hpp" +#include "hyteg/types/types.hpp" #define FUNC_PREFIX @@ -93,87 +95,96 @@ class P2ElementwiseFullStokesIcosahedralShellMap_1_0 : public Operator< P2Functi protected: private: - /// Kernel type: apply + /// Integral: P2ElementwiseFullStokesIcosahedralShellMap_1_0 + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: apply + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 3 | points: 5, degree: 3 + /// - blending map: IcosahedralShellMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 4283 5387 66 15 5 0 0 1 - void apply_macro_3D( real_t* RESTRICT _data_dstEdge, - real_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - real_t* RESTRICT _data_srcEdge, - real_t* RESTRICT _data_srcVertex, - real_t forVertex_0, - real_t forVertex_1, - real_t forVertex_2, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t rayVertex_2, - real_t refVertex_0, - real_t refVertex_1, - real_t refVertex_2, - real_t thrVertex_0, - real_t thrVertex_1, - real_t thrVertex_2 ) const; - /// Kernel type: toMatrix + void apply_P2ElementwiseFullStokesIcosahedralShellMap_1_0_macro_3D( real_t* RESTRICT _data_dstEdge, + real_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + real_t* RESTRICT _data_srcEdge, + real_t* RESTRICT _data_srcVertex, + real_t forVertex_0, + real_t forVertex_1, + real_t forVertex_2, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t rayVertex_2, + real_t refVertex_0, + real_t refVertex_1, + real_t refVertex_2, + real_t thrVertex_0, + real_t thrVertex_1, + real_t thrVertex_2 ) const; + + /// Integral: P2ElementwiseFullStokesIcosahedralShellMap_1_0 + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: toMatrix + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 3 | points: 5, degree: 3 + /// - blending map: IcosahedralShellMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 4183 5287 66 15 5 0 0 4 - void toMatrix_macro_3D( idx_t* RESTRICT _data_dstEdge, - idx_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - idx_t* RESTRICT _data_srcEdge, - idx_t* RESTRICT _data_srcVertex, - real_t forVertex_0, - real_t forVertex_1, - real_t forVertex_2, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - std::shared_ptr< SparseMatrixProxy > mat, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t rayVertex_2, - real_t refVertex_0, - real_t refVertex_1, - real_t refVertex_2, - real_t thrVertex_0, - real_t thrVertex_1, - real_t thrVertex_2 ) const; + void toMatrix_P2ElementwiseFullStokesIcosahedralShellMap_1_0_macro_3D( idx_t* RESTRICT _data_dstEdge, + idx_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + idx_t* RESTRICT _data_srcEdge, + idx_t* RESTRICT _data_srcVertex, + real_t forVertex_0, + real_t forVertex_1, + real_t forVertex_2, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + std::shared_ptr< SparseMatrixProxy > mat, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t rayVertex_2, + real_t refVertex_0, + real_t refVertex_1, + real_t refVertex_2, + real_t thrVertex_0, + real_t thrVertex_1, + real_t thrVertex_2 ) const; P2Function< real_t > mu; }; diff --git a/operators/full_stokes/P2ElementwiseFullStokesIcosahedralShellMap_1_1.cpp b/operators/full_stokes/P2ElementwiseFullStokesIcosahedralShellMap_1_1.cpp index 65de5632..fb682439 100644 --- a/operators/full_stokes/P2ElementwiseFullStokesIcosahedralShellMap_1_1.cpp +++ b/operators/full_stokes/P2ElementwiseFullStokesIcosahedralShellMap_1_1.cpp @@ -154,7 +154,7 @@ void P2ElementwiseFullStokesIcosahedralShellMap_1_1::apply( const P2Function< re this->timingTree_->start( "kernel" ); - apply_macro_3D( + apply_P2ElementwiseFullStokesIcosahedralShellMap_1_1_macro_3D( _data_dstEdge, _data_dstVertex, @@ -190,6 +190,7 @@ void P2ElementwiseFullStokesIcosahedralShellMap_1_1::apply( const P2Function< re thrVertex_0, thrVertex_1, thrVertex_2 ); + this->timingTree_->stop( "kernel" ); } @@ -285,7 +286,7 @@ void P2ElementwiseFullStokesIcosahedralShellMap_1_1::toMatrix( const std::shared this->timingTree_->start( "kernel" ); - toMatrix_macro_3D( + toMatrix_P2ElementwiseFullStokesIcosahedralShellMap_1_1_macro_3D( _data_dstEdge, _data_dstVertex, @@ -322,6 +323,7 @@ void P2ElementwiseFullStokesIcosahedralShellMap_1_1::toMatrix( const std::shared thrVertex_0, thrVertex_1, thrVertex_2 ); + this->timingTree_->stop( "kernel" ); } } @@ -401,7 +403,7 @@ void P2ElementwiseFullStokesIcosahedralShellMap_1_1::computeInverseDiagonalOpera this->timingTree_->start( "kernel" ); - computeInverseDiagonalOperatorValues_macro_3D( + computeInverseDiagonalOperatorValues_P2ElementwiseFullStokesIcosahedralShellMap_1_1_macro_3D( _data_invDiag_Edge, _data_invDiag_Vertex, @@ -435,6 +437,7 @@ void P2ElementwiseFullStokesIcosahedralShellMap_1_1::computeInverseDiagonalOpera thrVertex_0, thrVertex_1, thrVertex_2 ); + this->timingTree_->stop( "kernel" ); } diff --git a/operators/full_stokes/P2ElementwiseFullStokesIcosahedralShellMap_1_1.hpp b/operators/full_stokes/P2ElementwiseFullStokesIcosahedralShellMap_1_1.hpp index b07be550..69319c20 100644 --- a/operators/full_stokes/P2ElementwiseFullStokesIcosahedralShellMap_1_1.hpp +++ b/operators/full_stokes/P2ElementwiseFullStokesIcosahedralShellMap_1_1.hpp @@ -29,6 +29,7 @@ #include "core/DataTypes.h" #include "hyteg/LikwidWrapper.hpp" +#include "hyteg/boundary/BoundaryConditions.hpp" #include "hyteg/communication/Syncing.hpp" #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp" #include "hyteg/geometry/IcosahedralShellMap.hpp" @@ -37,6 +38,7 @@ #include "hyteg/primitivestorage/PrimitiveStorage.hpp" #include "hyteg/solvers/Smoothables.hpp" #include "hyteg/sparseassembly/SparseMatrixProxy.hpp" +#include "hyteg/types/types.hpp" #define FUNC_PREFIX @@ -99,125 +101,140 @@ class P2ElementwiseFullStokesIcosahedralShellMap_1_1 : public Operator< P2Functi protected: private: - /// Kernel type: apply + /// Integral: P2ElementwiseFullStokesIcosahedralShellMap_1_1 + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: apply + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 3 | points: 5, degree: 3 + /// - blending map: IcosahedralShellMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 3513 5187 66 15 5 0 0 1 - void apply_macro_3D( real_t* RESTRICT _data_dstEdge, - real_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - real_t* RESTRICT _data_srcEdge, - real_t* RESTRICT _data_srcVertex, - real_t forVertex_0, - real_t forVertex_1, - real_t forVertex_2, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t rayVertex_2, - real_t refVertex_0, - real_t refVertex_1, - real_t refVertex_2, - real_t thrVertex_0, - real_t thrVertex_1, - real_t thrVertex_2 ) const; - /// Kernel type: toMatrix + void apply_P2ElementwiseFullStokesIcosahedralShellMap_1_1_macro_3D( real_t* RESTRICT _data_dstEdge, + real_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + real_t* RESTRICT _data_srcEdge, + real_t* RESTRICT _data_srcVertex, + real_t forVertex_0, + real_t forVertex_1, + real_t forVertex_2, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t rayVertex_2, + real_t refVertex_0, + real_t refVertex_1, + real_t refVertex_2, + real_t thrVertex_0, + real_t thrVertex_1, + real_t thrVertex_2 ) const; + + /// Integral: P2ElementwiseFullStokesIcosahedralShellMap_1_1 + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: toMatrix + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 3 | points: 5, degree: 3 + /// - blending map: IcosahedralShellMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 3413 5087 66 15 5 0 0 4 - void toMatrix_macro_3D( idx_t* RESTRICT _data_dstEdge, - idx_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - idx_t* RESTRICT _data_srcEdge, - idx_t* RESTRICT _data_srcVertex, - real_t forVertex_0, - real_t forVertex_1, - real_t forVertex_2, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - std::shared_ptr< SparseMatrixProxy > mat, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t rayVertex_2, - real_t refVertex_0, - real_t refVertex_1, - real_t refVertex_2, - real_t thrVertex_0, - real_t thrVertex_1, - real_t thrVertex_2 ) const; - /// Kernel type: computeInverseDiagonalOperatorValues + void toMatrix_P2ElementwiseFullStokesIcosahedralShellMap_1_1_macro_3D( idx_t* RESTRICT _data_dstEdge, + idx_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + idx_t* RESTRICT _data_srcEdge, + idx_t* RESTRICT _data_srcVertex, + real_t forVertex_0, + real_t forVertex_1, + real_t forVertex_2, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + std::shared_ptr< SparseMatrixProxy > mat, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t rayVertex_2, + real_t refVertex_0, + real_t refVertex_1, + real_t refVertex_2, + real_t thrVertex_0, + real_t thrVertex_1, + real_t thrVertex_2 ) const; + + /// Integral: P2ElementwiseFullStokesIcosahedralShellMap_1_1 + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: computeInverseDiagonalOperatorValues + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 3 | points: 5, degree: 3 + /// - blending map: IcosahedralShellMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 2298 3602 66 15 5 0 0 1 - void computeInverseDiagonalOperatorValues_macro_3D( real_t* RESTRICT _data_invDiag_Edge, - real_t* RESTRICT _data_invDiag_Vertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - real_t forVertex_0, - real_t forVertex_1, - real_t forVertex_2, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t rayVertex_2, - real_t refVertex_0, - real_t refVertex_1, - real_t refVertex_2, - real_t thrVertex_0, - real_t thrVertex_1, - real_t thrVertex_2 ) const; + void computeInverseDiagonalOperatorValues_P2ElementwiseFullStokesIcosahedralShellMap_1_1_macro_3D( + real_t* RESTRICT _data_invDiag_Edge, + real_t* RESTRICT _data_invDiag_Vertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + real_t forVertex_0, + real_t forVertex_1, + real_t forVertex_2, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t rayVertex_2, + real_t refVertex_0, + real_t refVertex_1, + real_t refVertex_2, + real_t thrVertex_0, + real_t thrVertex_1, + real_t thrVertex_2 ) const; std::shared_ptr< P2Function< real_t > > invDiag_; P2Function< real_t > mu; diff --git a/operators/full_stokes/P2ElementwiseFullStokesIcosahedralShellMap_1_2.cpp b/operators/full_stokes/P2ElementwiseFullStokesIcosahedralShellMap_1_2.cpp index 93197932..89f9ae24 100644 --- a/operators/full_stokes/P2ElementwiseFullStokesIcosahedralShellMap_1_2.cpp +++ b/operators/full_stokes/P2ElementwiseFullStokesIcosahedralShellMap_1_2.cpp @@ -154,7 +154,7 @@ void P2ElementwiseFullStokesIcosahedralShellMap_1_2::apply( const P2Function< re this->timingTree_->start( "kernel" ); - apply_macro_3D( + apply_P2ElementwiseFullStokesIcosahedralShellMap_1_2_macro_3D( _data_dstEdge, _data_dstVertex, @@ -190,6 +190,7 @@ void P2ElementwiseFullStokesIcosahedralShellMap_1_2::apply( const P2Function< re thrVertex_0, thrVertex_1, thrVertex_2 ); + this->timingTree_->stop( "kernel" ); } @@ -285,7 +286,7 @@ void P2ElementwiseFullStokesIcosahedralShellMap_1_2::toMatrix( const std::shared this->timingTree_->start( "kernel" ); - toMatrix_macro_3D( + toMatrix_P2ElementwiseFullStokesIcosahedralShellMap_1_2_macro_3D( _data_dstEdge, _data_dstVertex, @@ -322,6 +323,7 @@ void P2ElementwiseFullStokesIcosahedralShellMap_1_2::toMatrix( const std::shared thrVertex_0, thrVertex_1, thrVertex_2 ); + this->timingTree_->stop( "kernel" ); } } diff --git a/operators/full_stokes/P2ElementwiseFullStokesIcosahedralShellMap_1_2.hpp b/operators/full_stokes/P2ElementwiseFullStokesIcosahedralShellMap_1_2.hpp index 12b40740..3949dc64 100644 --- a/operators/full_stokes/P2ElementwiseFullStokesIcosahedralShellMap_1_2.hpp +++ b/operators/full_stokes/P2ElementwiseFullStokesIcosahedralShellMap_1_2.hpp @@ -29,6 +29,7 @@ #include "core/DataTypes.h" #include "hyteg/LikwidWrapper.hpp" +#include "hyteg/boundary/BoundaryConditions.hpp" #include "hyteg/communication/Syncing.hpp" #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp" #include "hyteg/geometry/IcosahedralShellMap.hpp" @@ -36,6 +37,7 @@ #include "hyteg/p2functionspace/P2Function.hpp" #include "hyteg/primitivestorage/PrimitiveStorage.hpp" #include "hyteg/sparseassembly/SparseMatrixProxy.hpp" +#include "hyteg/types/types.hpp" #define FUNC_PREFIX @@ -93,87 +95,96 @@ class P2ElementwiseFullStokesIcosahedralShellMap_1_2 : public Operator< P2Functi protected: private: - /// Kernel type: apply + /// Integral: P2ElementwiseFullStokesIcosahedralShellMap_1_2 + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: apply + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 3 | points: 5, degree: 3 + /// - blending map: IcosahedralShellMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 4283 5392 66 15 5 0 0 1 - void apply_macro_3D( real_t* RESTRICT _data_dstEdge, - real_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - real_t* RESTRICT _data_srcEdge, - real_t* RESTRICT _data_srcVertex, - real_t forVertex_0, - real_t forVertex_1, - real_t forVertex_2, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t rayVertex_2, - real_t refVertex_0, - real_t refVertex_1, - real_t refVertex_2, - real_t thrVertex_0, - real_t thrVertex_1, - real_t thrVertex_2 ) const; - /// Kernel type: toMatrix + void apply_P2ElementwiseFullStokesIcosahedralShellMap_1_2_macro_3D( real_t* RESTRICT _data_dstEdge, + real_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + real_t* RESTRICT _data_srcEdge, + real_t* RESTRICT _data_srcVertex, + real_t forVertex_0, + real_t forVertex_1, + real_t forVertex_2, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t rayVertex_2, + real_t refVertex_0, + real_t refVertex_1, + real_t refVertex_2, + real_t thrVertex_0, + real_t thrVertex_1, + real_t thrVertex_2 ) const; + + /// Integral: P2ElementwiseFullStokesIcosahedralShellMap_1_2 + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: toMatrix + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 3 | points: 5, degree: 3 + /// - blending map: IcosahedralShellMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 4183 5292 66 15 5 0 0 4 - void toMatrix_macro_3D( idx_t* RESTRICT _data_dstEdge, - idx_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - idx_t* RESTRICT _data_srcEdge, - idx_t* RESTRICT _data_srcVertex, - real_t forVertex_0, - real_t forVertex_1, - real_t forVertex_2, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - std::shared_ptr< SparseMatrixProxy > mat, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t rayVertex_2, - real_t refVertex_0, - real_t refVertex_1, - real_t refVertex_2, - real_t thrVertex_0, - real_t thrVertex_1, - real_t thrVertex_2 ) const; + void toMatrix_P2ElementwiseFullStokesIcosahedralShellMap_1_2_macro_3D( idx_t* RESTRICT _data_dstEdge, + idx_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + idx_t* RESTRICT _data_srcEdge, + idx_t* RESTRICT _data_srcVertex, + real_t forVertex_0, + real_t forVertex_1, + real_t forVertex_2, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + std::shared_ptr< SparseMatrixProxy > mat, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t rayVertex_2, + real_t refVertex_0, + real_t refVertex_1, + real_t refVertex_2, + real_t thrVertex_0, + real_t thrVertex_1, + real_t thrVertex_2 ) const; P2Function< real_t > mu; }; diff --git a/operators/full_stokes/P2ElementwiseFullStokesIcosahedralShellMap_2_0.cpp b/operators/full_stokes/P2ElementwiseFullStokesIcosahedralShellMap_2_0.cpp index 9b9e077c..2b12e9a4 100644 --- a/operators/full_stokes/P2ElementwiseFullStokesIcosahedralShellMap_2_0.cpp +++ b/operators/full_stokes/P2ElementwiseFullStokesIcosahedralShellMap_2_0.cpp @@ -154,7 +154,7 @@ void P2ElementwiseFullStokesIcosahedralShellMap_2_0::apply( const P2Function< re this->timingTree_->start( "kernel" ); - apply_macro_3D( + apply_P2ElementwiseFullStokesIcosahedralShellMap_2_0_macro_3D( _data_dstEdge, _data_dstVertex, @@ -190,6 +190,7 @@ void P2ElementwiseFullStokesIcosahedralShellMap_2_0::apply( const P2Function< re thrVertex_0, thrVertex_1, thrVertex_2 ); + this->timingTree_->stop( "kernel" ); } @@ -285,7 +286,7 @@ void P2ElementwiseFullStokesIcosahedralShellMap_2_0::toMatrix( const std::shared this->timingTree_->start( "kernel" ); - toMatrix_macro_3D( + toMatrix_P2ElementwiseFullStokesIcosahedralShellMap_2_0_macro_3D( _data_dstEdge, _data_dstVertex, @@ -322,6 +323,7 @@ void P2ElementwiseFullStokesIcosahedralShellMap_2_0::toMatrix( const std::shared thrVertex_0, thrVertex_1, thrVertex_2 ); + this->timingTree_->stop( "kernel" ); } } diff --git a/operators/full_stokes/P2ElementwiseFullStokesIcosahedralShellMap_2_0.hpp b/operators/full_stokes/P2ElementwiseFullStokesIcosahedralShellMap_2_0.hpp index e7f89bd7..66b9c464 100644 --- a/operators/full_stokes/P2ElementwiseFullStokesIcosahedralShellMap_2_0.hpp +++ b/operators/full_stokes/P2ElementwiseFullStokesIcosahedralShellMap_2_0.hpp @@ -29,6 +29,7 @@ #include "core/DataTypes.h" #include "hyteg/LikwidWrapper.hpp" +#include "hyteg/boundary/BoundaryConditions.hpp" #include "hyteg/communication/Syncing.hpp" #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp" #include "hyteg/geometry/IcosahedralShellMap.hpp" @@ -36,6 +37,7 @@ #include "hyteg/p2functionspace/P2Function.hpp" #include "hyteg/primitivestorage/PrimitiveStorage.hpp" #include "hyteg/sparseassembly/SparseMatrixProxy.hpp" +#include "hyteg/types/types.hpp" #define FUNC_PREFIX @@ -93,87 +95,96 @@ class P2ElementwiseFullStokesIcosahedralShellMap_2_0 : public Operator< P2Functi protected: private: - /// Kernel type: apply + /// Integral: P2ElementwiseFullStokesIcosahedralShellMap_2_0 + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: apply + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 3 | points: 5, degree: 3 + /// - blending map: IcosahedralShellMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 4283 5387 66 15 5 0 0 1 - void apply_macro_3D( real_t* RESTRICT _data_dstEdge, - real_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - real_t* RESTRICT _data_srcEdge, - real_t* RESTRICT _data_srcVertex, - real_t forVertex_0, - real_t forVertex_1, - real_t forVertex_2, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t rayVertex_2, - real_t refVertex_0, - real_t refVertex_1, - real_t refVertex_2, - real_t thrVertex_0, - real_t thrVertex_1, - real_t thrVertex_2 ) const; - /// Kernel type: toMatrix + void apply_P2ElementwiseFullStokesIcosahedralShellMap_2_0_macro_3D( real_t* RESTRICT _data_dstEdge, + real_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + real_t* RESTRICT _data_srcEdge, + real_t* RESTRICT _data_srcVertex, + real_t forVertex_0, + real_t forVertex_1, + real_t forVertex_2, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t rayVertex_2, + real_t refVertex_0, + real_t refVertex_1, + real_t refVertex_2, + real_t thrVertex_0, + real_t thrVertex_1, + real_t thrVertex_2 ) const; + + /// Integral: P2ElementwiseFullStokesIcosahedralShellMap_2_0 + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: toMatrix + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 3 | points: 5, degree: 3 + /// - blending map: IcosahedralShellMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 4183 5287 66 15 5 0 0 4 - void toMatrix_macro_3D( idx_t* RESTRICT _data_dstEdge, - idx_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - idx_t* RESTRICT _data_srcEdge, - idx_t* RESTRICT _data_srcVertex, - real_t forVertex_0, - real_t forVertex_1, - real_t forVertex_2, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - std::shared_ptr< SparseMatrixProxy > mat, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t rayVertex_2, - real_t refVertex_0, - real_t refVertex_1, - real_t refVertex_2, - real_t thrVertex_0, - real_t thrVertex_1, - real_t thrVertex_2 ) const; + void toMatrix_P2ElementwiseFullStokesIcosahedralShellMap_2_0_macro_3D( idx_t* RESTRICT _data_dstEdge, + idx_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + idx_t* RESTRICT _data_srcEdge, + idx_t* RESTRICT _data_srcVertex, + real_t forVertex_0, + real_t forVertex_1, + real_t forVertex_2, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + std::shared_ptr< SparseMatrixProxy > mat, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t rayVertex_2, + real_t refVertex_0, + real_t refVertex_1, + real_t refVertex_2, + real_t thrVertex_0, + real_t thrVertex_1, + real_t thrVertex_2 ) const; P2Function< real_t > mu; }; diff --git a/operators/full_stokes/P2ElementwiseFullStokesIcosahedralShellMap_2_1.cpp b/operators/full_stokes/P2ElementwiseFullStokesIcosahedralShellMap_2_1.cpp index 33789ed1..b8c0e5e9 100644 --- a/operators/full_stokes/P2ElementwiseFullStokesIcosahedralShellMap_2_1.cpp +++ b/operators/full_stokes/P2ElementwiseFullStokesIcosahedralShellMap_2_1.cpp @@ -154,7 +154,7 @@ void P2ElementwiseFullStokesIcosahedralShellMap_2_1::apply( const P2Function< re this->timingTree_->start( "kernel" ); - apply_macro_3D( + apply_P2ElementwiseFullStokesIcosahedralShellMap_2_1_macro_3D( _data_dstEdge, _data_dstVertex, @@ -190,6 +190,7 @@ void P2ElementwiseFullStokesIcosahedralShellMap_2_1::apply( const P2Function< re thrVertex_0, thrVertex_1, thrVertex_2 ); + this->timingTree_->stop( "kernel" ); } @@ -285,7 +286,7 @@ void P2ElementwiseFullStokesIcosahedralShellMap_2_1::toMatrix( const std::shared this->timingTree_->start( "kernel" ); - toMatrix_macro_3D( + toMatrix_P2ElementwiseFullStokesIcosahedralShellMap_2_1_macro_3D( _data_dstEdge, _data_dstVertex, @@ -322,6 +323,7 @@ void P2ElementwiseFullStokesIcosahedralShellMap_2_1::toMatrix( const std::shared thrVertex_0, thrVertex_1, thrVertex_2 ); + this->timingTree_->stop( "kernel" ); } } diff --git a/operators/full_stokes/P2ElementwiseFullStokesIcosahedralShellMap_2_1.hpp b/operators/full_stokes/P2ElementwiseFullStokesIcosahedralShellMap_2_1.hpp index d758b382..d584d34c 100644 --- a/operators/full_stokes/P2ElementwiseFullStokesIcosahedralShellMap_2_1.hpp +++ b/operators/full_stokes/P2ElementwiseFullStokesIcosahedralShellMap_2_1.hpp @@ -29,6 +29,7 @@ #include "core/DataTypes.h" #include "hyteg/LikwidWrapper.hpp" +#include "hyteg/boundary/BoundaryConditions.hpp" #include "hyteg/communication/Syncing.hpp" #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp" #include "hyteg/geometry/IcosahedralShellMap.hpp" @@ -36,6 +37,7 @@ #include "hyteg/p2functionspace/P2Function.hpp" #include "hyteg/primitivestorage/PrimitiveStorage.hpp" #include "hyteg/sparseassembly/SparseMatrixProxy.hpp" +#include "hyteg/types/types.hpp" #define FUNC_PREFIX @@ -93,87 +95,96 @@ class P2ElementwiseFullStokesIcosahedralShellMap_2_1 : public Operator< P2Functi protected: private: - /// Kernel type: apply + /// Integral: P2ElementwiseFullStokesIcosahedralShellMap_2_1 + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: apply + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 3 | points: 5, degree: 3 + /// - blending map: IcosahedralShellMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 4283 5387 66 15 5 0 0 1 - void apply_macro_3D( real_t* RESTRICT _data_dstEdge, - real_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - real_t* RESTRICT _data_srcEdge, - real_t* RESTRICT _data_srcVertex, - real_t forVertex_0, - real_t forVertex_1, - real_t forVertex_2, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t rayVertex_2, - real_t refVertex_0, - real_t refVertex_1, - real_t refVertex_2, - real_t thrVertex_0, - real_t thrVertex_1, - real_t thrVertex_2 ) const; - /// Kernel type: toMatrix + void apply_P2ElementwiseFullStokesIcosahedralShellMap_2_1_macro_3D( real_t* RESTRICT _data_dstEdge, + real_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + real_t* RESTRICT _data_srcEdge, + real_t* RESTRICT _data_srcVertex, + real_t forVertex_0, + real_t forVertex_1, + real_t forVertex_2, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t rayVertex_2, + real_t refVertex_0, + real_t refVertex_1, + real_t refVertex_2, + real_t thrVertex_0, + real_t thrVertex_1, + real_t thrVertex_2 ) const; + + /// Integral: P2ElementwiseFullStokesIcosahedralShellMap_2_1 + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: toMatrix + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 3 | points: 5, degree: 3 + /// - blending map: IcosahedralShellMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 4183 5287 66 15 5 0 0 4 - void toMatrix_macro_3D( idx_t* RESTRICT _data_dstEdge, - idx_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - idx_t* RESTRICT _data_srcEdge, - idx_t* RESTRICT _data_srcVertex, - real_t forVertex_0, - real_t forVertex_1, - real_t forVertex_2, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - std::shared_ptr< SparseMatrixProxy > mat, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t rayVertex_2, - real_t refVertex_0, - real_t refVertex_1, - real_t refVertex_2, - real_t thrVertex_0, - real_t thrVertex_1, - real_t thrVertex_2 ) const; + void toMatrix_P2ElementwiseFullStokesIcosahedralShellMap_2_1_macro_3D( idx_t* RESTRICT _data_dstEdge, + idx_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + idx_t* RESTRICT _data_srcEdge, + idx_t* RESTRICT _data_srcVertex, + real_t forVertex_0, + real_t forVertex_1, + real_t forVertex_2, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + std::shared_ptr< SparseMatrixProxy > mat, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t rayVertex_2, + real_t refVertex_0, + real_t refVertex_1, + real_t refVertex_2, + real_t thrVertex_0, + real_t thrVertex_1, + real_t thrVertex_2 ) const; P2Function< real_t > mu; }; diff --git a/operators/full_stokes/P2ElementwiseFullStokesIcosahedralShellMap_2_2.cpp b/operators/full_stokes/P2ElementwiseFullStokesIcosahedralShellMap_2_2.cpp index 1ed1ee4b..f3b67bf0 100644 --- a/operators/full_stokes/P2ElementwiseFullStokesIcosahedralShellMap_2_2.cpp +++ b/operators/full_stokes/P2ElementwiseFullStokesIcosahedralShellMap_2_2.cpp @@ -154,7 +154,7 @@ void P2ElementwiseFullStokesIcosahedralShellMap_2_2::apply( const P2Function< re this->timingTree_->start( "kernel" ); - apply_macro_3D( + apply_P2ElementwiseFullStokesIcosahedralShellMap_2_2_macro_3D( _data_dstEdge, _data_dstVertex, @@ -190,6 +190,7 @@ void P2ElementwiseFullStokesIcosahedralShellMap_2_2::apply( const P2Function< re thrVertex_0, thrVertex_1, thrVertex_2 ); + this->timingTree_->stop( "kernel" ); } @@ -285,7 +286,7 @@ void P2ElementwiseFullStokesIcosahedralShellMap_2_2::toMatrix( const std::shared this->timingTree_->start( "kernel" ); - toMatrix_macro_3D( + toMatrix_P2ElementwiseFullStokesIcosahedralShellMap_2_2_macro_3D( _data_dstEdge, _data_dstVertex, @@ -322,6 +323,7 @@ void P2ElementwiseFullStokesIcosahedralShellMap_2_2::toMatrix( const std::shared thrVertex_0, thrVertex_1, thrVertex_2 ); + this->timingTree_->stop( "kernel" ); } } @@ -401,7 +403,7 @@ void P2ElementwiseFullStokesIcosahedralShellMap_2_2::computeInverseDiagonalOpera this->timingTree_->start( "kernel" ); - computeInverseDiagonalOperatorValues_macro_3D( + computeInverseDiagonalOperatorValues_P2ElementwiseFullStokesIcosahedralShellMap_2_2_macro_3D( _data_invDiag_Edge, _data_invDiag_Vertex, @@ -435,6 +437,7 @@ void P2ElementwiseFullStokesIcosahedralShellMap_2_2::computeInverseDiagonalOpera thrVertex_0, thrVertex_1, thrVertex_2 ); + this->timingTree_->stop( "kernel" ); } diff --git a/operators/full_stokes/P2ElementwiseFullStokesIcosahedralShellMap_2_2.hpp b/operators/full_stokes/P2ElementwiseFullStokesIcosahedralShellMap_2_2.hpp index 56e9fc5b..0288c626 100644 --- a/operators/full_stokes/P2ElementwiseFullStokesIcosahedralShellMap_2_2.hpp +++ b/operators/full_stokes/P2ElementwiseFullStokesIcosahedralShellMap_2_2.hpp @@ -29,6 +29,7 @@ #include "core/DataTypes.h" #include "hyteg/LikwidWrapper.hpp" +#include "hyteg/boundary/BoundaryConditions.hpp" #include "hyteg/communication/Syncing.hpp" #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp" #include "hyteg/geometry/IcosahedralShellMap.hpp" @@ -37,6 +38,7 @@ #include "hyteg/primitivestorage/PrimitiveStorage.hpp" #include "hyteg/solvers/Smoothables.hpp" #include "hyteg/sparseassembly/SparseMatrixProxy.hpp" +#include "hyteg/types/types.hpp" #define FUNC_PREFIX @@ -99,125 +101,140 @@ class P2ElementwiseFullStokesIcosahedralShellMap_2_2 : public Operator< P2Functi protected: private: - /// Kernel type: apply + /// Integral: P2ElementwiseFullStokesIcosahedralShellMap_2_2 + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: apply + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 3 | points: 5, degree: 3 + /// - blending map: IcosahedralShellMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 3513 5182 66 15 5 0 0 1 - void apply_macro_3D( real_t* RESTRICT _data_dstEdge, - real_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - real_t* RESTRICT _data_srcEdge, - real_t* RESTRICT _data_srcVertex, - real_t forVertex_0, - real_t forVertex_1, - real_t forVertex_2, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t rayVertex_2, - real_t refVertex_0, - real_t refVertex_1, - real_t refVertex_2, - real_t thrVertex_0, - real_t thrVertex_1, - real_t thrVertex_2 ) const; - /// Kernel type: toMatrix + void apply_P2ElementwiseFullStokesIcosahedralShellMap_2_2_macro_3D( real_t* RESTRICT _data_dstEdge, + real_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + real_t* RESTRICT _data_srcEdge, + real_t* RESTRICT _data_srcVertex, + real_t forVertex_0, + real_t forVertex_1, + real_t forVertex_2, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t rayVertex_2, + real_t refVertex_0, + real_t refVertex_1, + real_t refVertex_2, + real_t thrVertex_0, + real_t thrVertex_1, + real_t thrVertex_2 ) const; + + /// Integral: P2ElementwiseFullStokesIcosahedralShellMap_2_2 + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: toMatrix + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 3 | points: 5, degree: 3 + /// - blending map: IcosahedralShellMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 3413 5082 66 15 5 0 0 4 - void toMatrix_macro_3D( idx_t* RESTRICT _data_dstEdge, - idx_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - idx_t* RESTRICT _data_srcEdge, - idx_t* RESTRICT _data_srcVertex, - real_t forVertex_0, - real_t forVertex_1, - real_t forVertex_2, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - std::shared_ptr< SparseMatrixProxy > mat, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t rayVertex_2, - real_t refVertex_0, - real_t refVertex_1, - real_t refVertex_2, - real_t thrVertex_0, - real_t thrVertex_1, - real_t thrVertex_2 ) const; - /// Kernel type: computeInverseDiagonalOperatorValues + void toMatrix_P2ElementwiseFullStokesIcosahedralShellMap_2_2_macro_3D( idx_t* RESTRICT _data_dstEdge, + idx_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + idx_t* RESTRICT _data_srcEdge, + idx_t* RESTRICT _data_srcVertex, + real_t forVertex_0, + real_t forVertex_1, + real_t forVertex_2, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + std::shared_ptr< SparseMatrixProxy > mat, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t rayVertex_2, + real_t refVertex_0, + real_t refVertex_1, + real_t refVertex_2, + real_t thrVertex_0, + real_t thrVertex_1, + real_t thrVertex_2 ) const; + + /// Integral: P2ElementwiseFullStokesIcosahedralShellMap_2_2 + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: computeInverseDiagonalOperatorValues + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 3 | points: 5, degree: 3 + /// - blending map: IcosahedralShellMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 2298 3597 66 15 5 0 0 1 - void computeInverseDiagonalOperatorValues_macro_3D( real_t* RESTRICT _data_invDiag_Edge, - real_t* RESTRICT _data_invDiag_Vertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - real_t forVertex_0, - real_t forVertex_1, - real_t forVertex_2, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t rayVertex_2, - real_t refVertex_0, - real_t refVertex_1, - real_t refVertex_2, - real_t thrVertex_0, - real_t thrVertex_1, - real_t thrVertex_2 ) const; + void computeInverseDiagonalOperatorValues_P2ElementwiseFullStokesIcosahedralShellMap_2_2_macro_3D( + real_t* RESTRICT _data_invDiag_Edge, + real_t* RESTRICT _data_invDiag_Vertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + real_t forVertex_0, + real_t forVertex_1, + real_t forVertex_2, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t rayVertex_2, + real_t refVertex_0, + real_t refVertex_1, + real_t refVertex_2, + real_t thrVertex_0, + real_t thrVertex_1, + real_t thrVertex_2 ) const; std::shared_ptr< P2Function< real_t > > invDiag_; P2Function< real_t > mu; diff --git a/operators/full_stokes/P2ElementwiseFullStokes_0_0.cpp b/operators/full_stokes/P2ElementwiseFullStokes_0_0.cpp index e1c98782..928dd7c8 100644 --- a/operators/full_stokes/P2ElementwiseFullStokes_0_0.cpp +++ b/operators/full_stokes/P2ElementwiseFullStokes_0_0.cpp @@ -137,7 +137,7 @@ void P2ElementwiseFullStokes_0_0::apply( const P2Function< real_t >& src, this->timingTree_->start( "kernel" ); - apply_macro_3D( + apply_P2ElementwiseFullStokes_0_0_macro_3D( _data_dstEdge, _data_dstVertex, @@ -159,6 +159,7 @@ void P2ElementwiseFullStokes_0_0::apply( const P2Function< real_t >& src, macro_vertex_coord_id_3comp2, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } @@ -228,7 +229,7 @@ void P2ElementwiseFullStokes_0_0::apply( const P2Function< real_t >& src, this->timingTree_->start( "kernel" ); - apply_macro_2D( + apply_P2ElementwiseFullStokes_0_0_macro_2D( _data_dstEdge, _data_dstVertex, @@ -244,6 +245,7 @@ void P2ElementwiseFullStokes_0_0::apply( const P2Function< real_t >& src, macro_vertex_coord_id_2comp1, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } @@ -314,7 +316,7 @@ void P2ElementwiseFullStokes_0_0::toMatrix( const std::shared_ptr< SparseMatrixP this->timingTree_->start( "kernel" ); - toMatrix_macro_3D( + toMatrix_P2ElementwiseFullStokes_0_0_macro_3D( _data_dstEdge, _data_dstVertex, @@ -337,6 +339,7 @@ void P2ElementwiseFullStokes_0_0::toMatrix( const std::shared_ptr< SparseMatrixP mat, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } } @@ -369,7 +372,7 @@ void P2ElementwiseFullStokes_0_0::toMatrix( const std::shared_ptr< SparseMatrixP this->timingTree_->start( "kernel" ); - toMatrix_macro_2D( + toMatrix_P2ElementwiseFullStokes_0_0_macro_2D( _data_dstEdge, _data_dstVertex, @@ -386,6 +389,7 @@ void P2ElementwiseFullStokes_0_0::toMatrix( const std::shared_ptr< SparseMatrixP mat, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } } @@ -440,7 +444,7 @@ void P2ElementwiseFullStokes_0_0::computeInverseDiagonalOperatorValues() this->timingTree_->start( "kernel" ); - computeInverseDiagonalOperatorValues_macro_3D( + computeInverseDiagonalOperatorValues_P2ElementwiseFullStokes_0_0_macro_3D( _data_invDiag_Edge, _data_invDiag_Vertex, @@ -460,6 +464,7 @@ void P2ElementwiseFullStokes_0_0::computeInverseDiagonalOperatorValues() macro_vertex_coord_id_3comp2, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } @@ -504,7 +509,7 @@ void P2ElementwiseFullStokes_0_0::computeInverseDiagonalOperatorValues() this->timingTree_->start( "kernel" ); - computeInverseDiagonalOperatorValues_macro_2D( + computeInverseDiagonalOperatorValues_P2ElementwiseFullStokes_0_0_macro_2D( _data_invDiag_Edge, _data_invDiag_Vertex, @@ -518,6 +523,7 @@ void P2ElementwiseFullStokes_0_0::computeInverseDiagonalOperatorValues() macro_vertex_coord_id_2comp1, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } diff --git a/operators/full_stokes/P2ElementwiseFullStokes_0_0.hpp b/operators/full_stokes/P2ElementwiseFullStokes_0_0.hpp index 63e4e09a..df1b1b4f 100644 --- a/operators/full_stokes/P2ElementwiseFullStokes_0_0.hpp +++ b/operators/full_stokes/P2ElementwiseFullStokes_0_0.hpp @@ -29,6 +29,7 @@ #include "core/DataTypes.h" #include "hyteg/LikwidWrapper.hpp" +#include "hyteg/boundary/BoundaryConditions.hpp" #include "hyteg/communication/Syncing.hpp" #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp" #include "hyteg/operators/Operator.hpp" @@ -36,6 +37,7 @@ #include "hyteg/primitivestorage/PrimitiveStorage.hpp" #include "hyteg/solvers/Smoothables.hpp" #include "hyteg/sparseassembly/SparseMatrixProxy.hpp" +#include "hyteg/types/types.hpp" #define FUNC_PREFIX @@ -98,142 +100,173 @@ class P2ElementwiseFullStokes_0_0 : public Operator< P2Function< real_t >, P2Fun protected: private: - /// Kernel type: apply + /// Integral: P2ElementwiseFullStokes_0_0 + /// - volume element: triangle, dim: 2, vertices: 3, spacedim: 2 + /// - kernel type: apply + /// - loop strategy: SAWTOOTH /// - quadrature rule: Dunavant 2 | points: 3, degree: 2 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 146 144 12 0 0 0 0 1 - void apply_macro_2D( real_t* RESTRICT _data_dstEdge, - real_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - real_t* RESTRICT _data_srcEdge, - real_t* RESTRICT _data_srcVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; - /// Kernel type: apply + void apply_P2ElementwiseFullStokes_0_0_macro_2D( real_t* RESTRICT _data_dstEdge, + real_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + real_t* RESTRICT _data_srcEdge, + real_t* RESTRICT _data_srcVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; + + /// Integral: P2ElementwiseFullStokes_0_0 + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: apply + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 1 | points: 4, degree: 2 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 449 436 36 0 0 0 0 1 - void apply_macro_3D( real_t* RESTRICT _data_dstEdge, - real_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - real_t* RESTRICT _data_srcEdge, - real_t* RESTRICT _data_srcVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; - /// Kernel type: toMatrix + void apply_P2ElementwiseFullStokes_0_0_macro_3D( real_t* RESTRICT _data_dstEdge, + real_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + real_t* RESTRICT _data_srcEdge, + real_t* RESTRICT _data_srcVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; + + /// Integral: P2ElementwiseFullStokes_0_0 + /// - volume element: triangle, dim: 2, vertices: 3, spacedim: 2 + /// - kernel type: toMatrix + /// - loop strategy: SAWTOOTH /// - quadrature rule: Dunavant 2 | points: 3, degree: 2 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 110 108 12 0 0 0 0 4 - void toMatrix_macro_2D( idx_t* RESTRICT _data_dstEdge, - idx_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - idx_t* RESTRICT _data_srcEdge, - idx_t* RESTRICT _data_srcVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - std::shared_ptr< SparseMatrixProxy > mat, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; - /// Kernel type: toMatrix + void toMatrix_P2ElementwiseFullStokes_0_0_macro_2D( idx_t* RESTRICT _data_dstEdge, + idx_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + idx_t* RESTRICT _data_srcEdge, + idx_t* RESTRICT _data_srcVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + std::shared_ptr< SparseMatrixProxy > mat, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; + + /// Integral: P2ElementwiseFullStokes_0_0 + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: toMatrix + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 1 | points: 4, degree: 2 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 349 336 36 0 0 0 0 4 - void toMatrix_macro_3D( idx_t* RESTRICT _data_dstEdge, - idx_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - idx_t* RESTRICT _data_srcEdge, - idx_t* RESTRICT _data_srcVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - std::shared_ptr< SparseMatrixProxy > mat, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; - /// Kernel type: computeInverseDiagonalOperatorValues + void toMatrix_P2ElementwiseFullStokes_0_0_macro_3D( idx_t* RESTRICT _data_dstEdge, + idx_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + idx_t* RESTRICT _data_srcEdge, + idx_t* RESTRICT _data_srcVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + std::shared_ptr< SparseMatrixProxy > mat, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; + + /// Integral: P2ElementwiseFullStokes_0_0 + /// - volume element: triangle, dim: 2, vertices: 3, spacedim: 2 + /// - kernel type: computeInverseDiagonalOperatorValues + /// - loop strategy: SAWTOOTH /// - quadrature rule: Dunavant 2 | points: 3, degree: 2 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 71 63 12 0 0 0 0 1 - void computeInverseDiagonalOperatorValues_macro_2D( real_t* RESTRICT _data_invDiag_Edge, - real_t* RESTRICT _data_invDiag_Vertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; - /// Kernel type: computeInverseDiagonalOperatorValues + void + computeInverseDiagonalOperatorValues_P2ElementwiseFullStokes_0_0_macro_2D( real_t* RESTRICT _data_invDiag_Edge, + real_t* RESTRICT _data_invDiag_Vertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; + + /// Integral: P2ElementwiseFullStokes_0_0 + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: computeInverseDiagonalOperatorValues + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 1 | points: 4, degree: 2 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 179 156 36 0 0 0 0 1 - void computeInverseDiagonalOperatorValues_macro_3D( real_t* RESTRICT _data_invDiag_Edge, - real_t* RESTRICT _data_invDiag_Vertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; + void + computeInverseDiagonalOperatorValues_P2ElementwiseFullStokes_0_0_macro_3D( real_t* RESTRICT _data_invDiag_Edge, + real_t* RESTRICT _data_invDiag_Vertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; std::shared_ptr< P2Function< real_t > > invDiag_; P2Function< real_t > mu; diff --git a/operators/full_stokes/P2ElementwiseFullStokes_0_1.cpp b/operators/full_stokes/P2ElementwiseFullStokes_0_1.cpp index a9cc1381..8d55c414 100644 --- a/operators/full_stokes/P2ElementwiseFullStokes_0_1.cpp +++ b/operators/full_stokes/P2ElementwiseFullStokes_0_1.cpp @@ -137,7 +137,7 @@ void P2ElementwiseFullStokes_0_1::apply( const P2Function< real_t >& src, this->timingTree_->start( "kernel" ); - apply_macro_3D( + apply_P2ElementwiseFullStokes_0_1_macro_3D( _data_dstEdge, _data_dstVertex, @@ -159,6 +159,7 @@ void P2ElementwiseFullStokes_0_1::apply( const P2Function< real_t >& src, macro_vertex_coord_id_3comp2, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } @@ -228,7 +229,7 @@ void P2ElementwiseFullStokes_0_1::apply( const P2Function< real_t >& src, this->timingTree_->start( "kernel" ); - apply_macro_2D( + apply_P2ElementwiseFullStokes_0_1_macro_2D( _data_dstEdge, _data_dstVertex, @@ -244,6 +245,7 @@ void P2ElementwiseFullStokes_0_1::apply( const P2Function< real_t >& src, macro_vertex_coord_id_2comp1, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } @@ -314,7 +316,7 @@ void P2ElementwiseFullStokes_0_1::toMatrix( const std::shared_ptr< SparseMatrixP this->timingTree_->start( "kernel" ); - toMatrix_macro_3D( + toMatrix_P2ElementwiseFullStokes_0_1_macro_3D( _data_dstEdge, _data_dstVertex, @@ -337,6 +339,7 @@ void P2ElementwiseFullStokes_0_1::toMatrix( const std::shared_ptr< SparseMatrixP mat, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } } @@ -369,7 +372,7 @@ void P2ElementwiseFullStokes_0_1::toMatrix( const std::shared_ptr< SparseMatrixP this->timingTree_->start( "kernel" ); - toMatrix_macro_2D( + toMatrix_P2ElementwiseFullStokes_0_1_macro_2D( _data_dstEdge, _data_dstVertex, @@ -386,6 +389,7 @@ void P2ElementwiseFullStokes_0_1::toMatrix( const std::shared_ptr< SparseMatrixP mat, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } } diff --git a/operators/full_stokes/P2ElementwiseFullStokes_0_1.hpp b/operators/full_stokes/P2ElementwiseFullStokes_0_1.hpp index b626c4d7..2383334d 100644 --- a/operators/full_stokes/P2ElementwiseFullStokes_0_1.hpp +++ b/operators/full_stokes/P2ElementwiseFullStokes_0_1.hpp @@ -29,12 +29,14 @@ #include "core/DataTypes.h" #include "hyteg/LikwidWrapper.hpp" +#include "hyteg/boundary/BoundaryConditions.hpp" #include "hyteg/communication/Syncing.hpp" #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp" #include "hyteg/operators/Operator.hpp" #include "hyteg/p2functionspace/P2Function.hpp" #include "hyteg/primitivestorage/PrimitiveStorage.hpp" #include "hyteg/sparseassembly/SparseMatrixProxy.hpp" +#include "hyteg/types/types.hpp" #define FUNC_PREFIX @@ -92,100 +94,119 @@ class P2ElementwiseFullStokes_0_1 : public Operator< P2Function< real_t >, P2Fun protected: private: - /// Kernel type: apply + /// Integral: P2ElementwiseFullStokes_0_1 + /// - volume element: triangle, dim: 2, vertices: 3, spacedim: 2 + /// - kernel type: apply + /// - loop strategy: SAWTOOTH /// - quadrature rule: Dunavant 2 | points: 3, degree: 2 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 191 189 12 0 0 0 0 1 - void apply_macro_2D( real_t* RESTRICT _data_dstEdge, - real_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - real_t* RESTRICT _data_srcEdge, - real_t* RESTRICT _data_srcVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; - /// Kernel type: apply + void apply_P2ElementwiseFullStokes_0_1_macro_2D( real_t* RESTRICT _data_dstEdge, + real_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + real_t* RESTRICT _data_srcEdge, + real_t* RESTRICT _data_srcVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; + + /// Integral: P2ElementwiseFullStokes_0_1 + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: apply + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 1 | points: 4, degree: 2 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 629 616 36 0 0 0 0 1 - void apply_macro_3D( real_t* RESTRICT _data_dstEdge, - real_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - real_t* RESTRICT _data_srcEdge, - real_t* RESTRICT _data_srcVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; - /// Kernel type: toMatrix + void apply_P2ElementwiseFullStokes_0_1_macro_3D( real_t* RESTRICT _data_dstEdge, + real_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + real_t* RESTRICT _data_srcEdge, + real_t* RESTRICT _data_srcVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; + + /// Integral: P2ElementwiseFullStokes_0_1 + /// - volume element: triangle, dim: 2, vertices: 3, spacedim: 2 + /// - kernel type: toMatrix + /// - loop strategy: SAWTOOTH /// - quadrature rule: Dunavant 2 | points: 3, degree: 2 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 155 153 12 0 0 0 0 4 - void toMatrix_macro_2D( idx_t* RESTRICT _data_dstEdge, - idx_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - idx_t* RESTRICT _data_srcEdge, - idx_t* RESTRICT _data_srcVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - std::shared_ptr< SparseMatrixProxy > mat, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; - /// Kernel type: toMatrix + void toMatrix_P2ElementwiseFullStokes_0_1_macro_2D( idx_t* RESTRICT _data_dstEdge, + idx_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + idx_t* RESTRICT _data_srcEdge, + idx_t* RESTRICT _data_srcVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + std::shared_ptr< SparseMatrixProxy > mat, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; + + /// Integral: P2ElementwiseFullStokes_0_1 + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: toMatrix + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 1 | points: 4, degree: 2 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 529 516 36 0 0 0 0 4 - void toMatrix_macro_3D( idx_t* RESTRICT _data_dstEdge, - idx_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - idx_t* RESTRICT _data_srcEdge, - idx_t* RESTRICT _data_srcVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - std::shared_ptr< SparseMatrixProxy > mat, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; + void toMatrix_P2ElementwiseFullStokes_0_1_macro_3D( idx_t* RESTRICT _data_dstEdge, + idx_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + idx_t* RESTRICT _data_srcEdge, + idx_t* RESTRICT _data_srcVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + std::shared_ptr< SparseMatrixProxy > mat, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; P2Function< real_t > mu; }; diff --git a/operators/full_stokes/P2ElementwiseFullStokes_0_2.cpp b/operators/full_stokes/P2ElementwiseFullStokes_0_2.cpp index ddfb13ea..9f8044a7 100644 --- a/operators/full_stokes/P2ElementwiseFullStokes_0_2.cpp +++ b/operators/full_stokes/P2ElementwiseFullStokes_0_2.cpp @@ -136,7 +136,7 @@ void P2ElementwiseFullStokes_0_2::apply( const P2Function< real_t >& src, this->timingTree_->start( "kernel" ); - apply_macro_3D( + apply_P2ElementwiseFullStokes_0_2_macro_3D( _data_dstEdge, _data_dstVertex, @@ -158,6 +158,7 @@ void P2ElementwiseFullStokes_0_2::apply( const P2Function< real_t >& src, macro_vertex_coord_id_3comp2, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } @@ -236,7 +237,7 @@ void P2ElementwiseFullStokes_0_2::toMatrix( const std::shared_ptr< SparseMatrixP this->timingTree_->start( "kernel" ); - toMatrix_macro_3D( + toMatrix_P2ElementwiseFullStokes_0_2_macro_3D( _data_dstEdge, _data_dstVertex, @@ -259,6 +260,7 @@ void P2ElementwiseFullStokes_0_2::toMatrix( const std::shared_ptr< SparseMatrixP mat, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } } diff --git a/operators/full_stokes/P2ElementwiseFullStokes_0_2.hpp b/operators/full_stokes/P2ElementwiseFullStokes_0_2.hpp index 37c3f570..bcd3a4ac 100644 --- a/operators/full_stokes/P2ElementwiseFullStokes_0_2.hpp +++ b/operators/full_stokes/P2ElementwiseFullStokes_0_2.hpp @@ -29,12 +29,14 @@ #include "core/DataTypes.h" #include "hyteg/LikwidWrapper.hpp" +#include "hyteg/boundary/BoundaryConditions.hpp" #include "hyteg/communication/Syncing.hpp" #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp" #include "hyteg/operators/Operator.hpp" #include "hyteg/p2functionspace/P2Function.hpp" #include "hyteg/primitivestorage/PrimitiveStorage.hpp" #include "hyteg/sparseassembly/SparseMatrixProxy.hpp" +#include "hyteg/types/types.hpp" #define FUNC_PREFIX @@ -92,59 +94,68 @@ class P2ElementwiseFullStokes_0_2 : public Operator< P2Function< real_t >, P2Fun protected: private: - /// Kernel type: apply + /// Integral: P2ElementwiseFullStokes_0_2 + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: apply + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 1 | points: 4, degree: 2 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 629 616 36 0 0 0 0 1 - void apply_macro_3D( real_t* RESTRICT _data_dstEdge, - real_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - real_t* RESTRICT _data_srcEdge, - real_t* RESTRICT _data_srcVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; - /// Kernel type: toMatrix + void apply_P2ElementwiseFullStokes_0_2_macro_3D( real_t* RESTRICT _data_dstEdge, + real_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + real_t* RESTRICT _data_srcEdge, + real_t* RESTRICT _data_srcVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; + + /// Integral: P2ElementwiseFullStokes_0_2 + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: toMatrix + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 1 | points: 4, degree: 2 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 529 516 36 0 0 0 0 4 - void toMatrix_macro_3D( idx_t* RESTRICT _data_dstEdge, - idx_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - idx_t* RESTRICT _data_srcEdge, - idx_t* RESTRICT _data_srcVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - std::shared_ptr< SparseMatrixProxy > mat, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; + void toMatrix_P2ElementwiseFullStokes_0_2_macro_3D( idx_t* RESTRICT _data_dstEdge, + idx_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + idx_t* RESTRICT _data_srcEdge, + idx_t* RESTRICT _data_srcVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + std::shared_ptr< SparseMatrixProxy > mat, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; P2Function< real_t > mu; }; diff --git a/operators/full_stokes/P2ElementwiseFullStokes_1_0.cpp b/operators/full_stokes/P2ElementwiseFullStokes_1_0.cpp index 951259bc..044e64af 100644 --- a/operators/full_stokes/P2ElementwiseFullStokes_1_0.cpp +++ b/operators/full_stokes/P2ElementwiseFullStokes_1_0.cpp @@ -137,7 +137,7 @@ void P2ElementwiseFullStokes_1_0::apply( const P2Function< real_t >& src, this->timingTree_->start( "kernel" ); - apply_macro_3D( + apply_P2ElementwiseFullStokes_1_0_macro_3D( _data_dstEdge, _data_dstVertex, @@ -159,6 +159,7 @@ void P2ElementwiseFullStokes_1_0::apply( const P2Function< real_t >& src, macro_vertex_coord_id_3comp2, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } @@ -228,7 +229,7 @@ void P2ElementwiseFullStokes_1_0::apply( const P2Function< real_t >& src, this->timingTree_->start( "kernel" ); - apply_macro_2D( + apply_P2ElementwiseFullStokes_1_0_macro_2D( _data_dstEdge, _data_dstVertex, @@ -244,6 +245,7 @@ void P2ElementwiseFullStokes_1_0::apply( const P2Function< real_t >& src, macro_vertex_coord_id_2comp1, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } @@ -314,7 +316,7 @@ void P2ElementwiseFullStokes_1_0::toMatrix( const std::shared_ptr< SparseMatrixP this->timingTree_->start( "kernel" ); - toMatrix_macro_3D( + toMatrix_P2ElementwiseFullStokes_1_0_macro_3D( _data_dstEdge, _data_dstVertex, @@ -337,6 +339,7 @@ void P2ElementwiseFullStokes_1_0::toMatrix( const std::shared_ptr< SparseMatrixP mat, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } } @@ -369,7 +372,7 @@ void P2ElementwiseFullStokes_1_0::toMatrix( const std::shared_ptr< SparseMatrixP this->timingTree_->start( "kernel" ); - toMatrix_macro_2D( + toMatrix_P2ElementwiseFullStokes_1_0_macro_2D( _data_dstEdge, _data_dstVertex, @@ -386,6 +389,7 @@ void P2ElementwiseFullStokes_1_0::toMatrix( const std::shared_ptr< SparseMatrixP mat, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } } diff --git a/operators/full_stokes/P2ElementwiseFullStokes_1_0.hpp b/operators/full_stokes/P2ElementwiseFullStokes_1_0.hpp index 318f05d4..20104068 100644 --- a/operators/full_stokes/P2ElementwiseFullStokes_1_0.hpp +++ b/operators/full_stokes/P2ElementwiseFullStokes_1_0.hpp @@ -29,12 +29,14 @@ #include "core/DataTypes.h" #include "hyteg/LikwidWrapper.hpp" +#include "hyteg/boundary/BoundaryConditions.hpp" #include "hyteg/communication/Syncing.hpp" #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp" #include "hyteg/operators/Operator.hpp" #include "hyteg/p2functionspace/P2Function.hpp" #include "hyteg/primitivestorage/PrimitiveStorage.hpp" #include "hyteg/sparseassembly/SparseMatrixProxy.hpp" +#include "hyteg/types/types.hpp" #define FUNC_PREFIX @@ -92,100 +94,119 @@ class P2ElementwiseFullStokes_1_0 : public Operator< P2Function< real_t >, P2Fun protected: private: - /// Kernel type: apply + /// Integral: P2ElementwiseFullStokes_1_0 + /// - volume element: triangle, dim: 2, vertices: 3, spacedim: 2 + /// - kernel type: apply + /// - loop strategy: SAWTOOTH /// - quadrature rule: Dunavant 2 | points: 3, degree: 2 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 191 189 12 0 0 0 0 1 - void apply_macro_2D( real_t* RESTRICT _data_dstEdge, - real_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - real_t* RESTRICT _data_srcEdge, - real_t* RESTRICT _data_srcVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; - /// Kernel type: apply + void apply_P2ElementwiseFullStokes_1_0_macro_2D( real_t* RESTRICT _data_dstEdge, + real_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + real_t* RESTRICT _data_srcEdge, + real_t* RESTRICT _data_srcVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; + + /// Integral: P2ElementwiseFullStokes_1_0 + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: apply + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 1 | points: 4, degree: 2 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 629 616 36 0 0 0 0 1 - void apply_macro_3D( real_t* RESTRICT _data_dstEdge, - real_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - real_t* RESTRICT _data_srcEdge, - real_t* RESTRICT _data_srcVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; - /// Kernel type: toMatrix + void apply_P2ElementwiseFullStokes_1_0_macro_3D( real_t* RESTRICT _data_dstEdge, + real_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + real_t* RESTRICT _data_srcEdge, + real_t* RESTRICT _data_srcVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; + + /// Integral: P2ElementwiseFullStokes_1_0 + /// - volume element: triangle, dim: 2, vertices: 3, spacedim: 2 + /// - kernel type: toMatrix + /// - loop strategy: SAWTOOTH /// - quadrature rule: Dunavant 2 | points: 3, degree: 2 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 155 153 12 0 0 0 0 4 - void toMatrix_macro_2D( idx_t* RESTRICT _data_dstEdge, - idx_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - idx_t* RESTRICT _data_srcEdge, - idx_t* RESTRICT _data_srcVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - std::shared_ptr< SparseMatrixProxy > mat, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; - /// Kernel type: toMatrix + void toMatrix_P2ElementwiseFullStokes_1_0_macro_2D( idx_t* RESTRICT _data_dstEdge, + idx_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + idx_t* RESTRICT _data_srcEdge, + idx_t* RESTRICT _data_srcVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + std::shared_ptr< SparseMatrixProxy > mat, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; + + /// Integral: P2ElementwiseFullStokes_1_0 + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: toMatrix + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 1 | points: 4, degree: 2 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 529 516 36 0 0 0 0 4 - void toMatrix_macro_3D( idx_t* RESTRICT _data_dstEdge, - idx_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - idx_t* RESTRICT _data_srcEdge, - idx_t* RESTRICT _data_srcVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - std::shared_ptr< SparseMatrixProxy > mat, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; + void toMatrix_P2ElementwiseFullStokes_1_0_macro_3D( idx_t* RESTRICT _data_dstEdge, + idx_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + idx_t* RESTRICT _data_srcEdge, + idx_t* RESTRICT _data_srcVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + std::shared_ptr< SparseMatrixProxy > mat, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; P2Function< real_t > mu; }; diff --git a/operators/full_stokes/P2ElementwiseFullStokes_1_1.cpp b/operators/full_stokes/P2ElementwiseFullStokes_1_1.cpp index 7b031440..a2c8882c 100644 --- a/operators/full_stokes/P2ElementwiseFullStokes_1_1.cpp +++ b/operators/full_stokes/P2ElementwiseFullStokes_1_1.cpp @@ -137,7 +137,7 @@ void P2ElementwiseFullStokes_1_1::apply( const P2Function< real_t >& src, this->timingTree_->start( "kernel" ); - apply_macro_3D( + apply_P2ElementwiseFullStokes_1_1_macro_3D( _data_dstEdge, _data_dstVertex, @@ -159,6 +159,7 @@ void P2ElementwiseFullStokes_1_1::apply( const P2Function< real_t >& src, macro_vertex_coord_id_3comp2, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } @@ -228,7 +229,7 @@ void P2ElementwiseFullStokes_1_1::apply( const P2Function< real_t >& src, this->timingTree_->start( "kernel" ); - apply_macro_2D( + apply_P2ElementwiseFullStokes_1_1_macro_2D( _data_dstEdge, _data_dstVertex, @@ -244,6 +245,7 @@ void P2ElementwiseFullStokes_1_1::apply( const P2Function< real_t >& src, macro_vertex_coord_id_2comp1, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } @@ -314,7 +316,7 @@ void P2ElementwiseFullStokes_1_1::toMatrix( const std::shared_ptr< SparseMatrixP this->timingTree_->start( "kernel" ); - toMatrix_macro_3D( + toMatrix_P2ElementwiseFullStokes_1_1_macro_3D( _data_dstEdge, _data_dstVertex, @@ -337,6 +339,7 @@ void P2ElementwiseFullStokes_1_1::toMatrix( const std::shared_ptr< SparseMatrixP mat, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } } @@ -369,7 +372,7 @@ void P2ElementwiseFullStokes_1_1::toMatrix( const std::shared_ptr< SparseMatrixP this->timingTree_->start( "kernel" ); - toMatrix_macro_2D( + toMatrix_P2ElementwiseFullStokes_1_1_macro_2D( _data_dstEdge, _data_dstVertex, @@ -386,6 +389,7 @@ void P2ElementwiseFullStokes_1_1::toMatrix( const std::shared_ptr< SparseMatrixP mat, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } } @@ -440,7 +444,7 @@ void P2ElementwiseFullStokes_1_1::computeInverseDiagonalOperatorValues() this->timingTree_->start( "kernel" ); - computeInverseDiagonalOperatorValues_macro_3D( + computeInverseDiagonalOperatorValues_P2ElementwiseFullStokes_1_1_macro_3D( _data_invDiag_Edge, _data_invDiag_Vertex, @@ -460,6 +464,7 @@ void P2ElementwiseFullStokes_1_1::computeInverseDiagonalOperatorValues() macro_vertex_coord_id_3comp2, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } @@ -504,7 +509,7 @@ void P2ElementwiseFullStokes_1_1::computeInverseDiagonalOperatorValues() this->timingTree_->start( "kernel" ); - computeInverseDiagonalOperatorValues_macro_2D( + computeInverseDiagonalOperatorValues_P2ElementwiseFullStokes_1_1_macro_2D( _data_invDiag_Edge, _data_invDiag_Vertex, @@ -518,6 +523,7 @@ void P2ElementwiseFullStokes_1_1::computeInverseDiagonalOperatorValues() macro_vertex_coord_id_2comp1, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } diff --git a/operators/full_stokes/P2ElementwiseFullStokes_1_1.hpp b/operators/full_stokes/P2ElementwiseFullStokes_1_1.hpp index 44988bf2..820ab0cc 100644 --- a/operators/full_stokes/P2ElementwiseFullStokes_1_1.hpp +++ b/operators/full_stokes/P2ElementwiseFullStokes_1_1.hpp @@ -29,6 +29,7 @@ #include "core/DataTypes.h" #include "hyteg/LikwidWrapper.hpp" +#include "hyteg/boundary/BoundaryConditions.hpp" #include "hyteg/communication/Syncing.hpp" #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp" #include "hyteg/operators/Operator.hpp" @@ -36,6 +37,7 @@ #include "hyteg/primitivestorage/PrimitiveStorage.hpp" #include "hyteg/solvers/Smoothables.hpp" #include "hyteg/sparseassembly/SparseMatrixProxy.hpp" +#include "hyteg/types/types.hpp" #define FUNC_PREFIX @@ -98,142 +100,173 @@ class P2ElementwiseFullStokes_1_1 : public Operator< P2Function< real_t >, P2Fun protected: private: - /// Kernel type: apply + /// Integral: P2ElementwiseFullStokes_1_1 + /// - volume element: triangle, dim: 2, vertices: 3, spacedim: 2 + /// - kernel type: apply + /// - loop strategy: SAWTOOTH /// - quadrature rule: Dunavant 2 | points: 3, degree: 2 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 146 144 12 0 0 0 0 1 - void apply_macro_2D( real_t* RESTRICT _data_dstEdge, - real_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - real_t* RESTRICT _data_srcEdge, - real_t* RESTRICT _data_srcVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; - /// Kernel type: apply + void apply_P2ElementwiseFullStokes_1_1_macro_2D( real_t* RESTRICT _data_dstEdge, + real_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + real_t* RESTRICT _data_srcEdge, + real_t* RESTRICT _data_srcVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; + + /// Integral: P2ElementwiseFullStokes_1_1 + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: apply + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 1 | points: 4, degree: 2 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 449 436 36 0 0 0 0 1 - void apply_macro_3D( real_t* RESTRICT _data_dstEdge, - real_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - real_t* RESTRICT _data_srcEdge, - real_t* RESTRICT _data_srcVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; - /// Kernel type: toMatrix + void apply_P2ElementwiseFullStokes_1_1_macro_3D( real_t* RESTRICT _data_dstEdge, + real_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + real_t* RESTRICT _data_srcEdge, + real_t* RESTRICT _data_srcVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; + + /// Integral: P2ElementwiseFullStokes_1_1 + /// - volume element: triangle, dim: 2, vertices: 3, spacedim: 2 + /// - kernel type: toMatrix + /// - loop strategy: SAWTOOTH /// - quadrature rule: Dunavant 2 | points: 3, degree: 2 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 110 108 12 0 0 0 0 4 - void toMatrix_macro_2D( idx_t* RESTRICT _data_dstEdge, - idx_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - idx_t* RESTRICT _data_srcEdge, - idx_t* RESTRICT _data_srcVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - std::shared_ptr< SparseMatrixProxy > mat, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; - /// Kernel type: toMatrix + void toMatrix_P2ElementwiseFullStokes_1_1_macro_2D( idx_t* RESTRICT _data_dstEdge, + idx_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + idx_t* RESTRICT _data_srcEdge, + idx_t* RESTRICT _data_srcVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + std::shared_ptr< SparseMatrixProxy > mat, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; + + /// Integral: P2ElementwiseFullStokes_1_1 + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: toMatrix + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 1 | points: 4, degree: 2 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 349 336 36 0 0 0 0 4 - void toMatrix_macro_3D( idx_t* RESTRICT _data_dstEdge, - idx_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - idx_t* RESTRICT _data_srcEdge, - idx_t* RESTRICT _data_srcVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - std::shared_ptr< SparseMatrixProxy > mat, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; - /// Kernel type: computeInverseDiagonalOperatorValues + void toMatrix_P2ElementwiseFullStokes_1_1_macro_3D( idx_t* RESTRICT _data_dstEdge, + idx_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + idx_t* RESTRICT _data_srcEdge, + idx_t* RESTRICT _data_srcVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + std::shared_ptr< SparseMatrixProxy > mat, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; + + /// Integral: P2ElementwiseFullStokes_1_1 + /// - volume element: triangle, dim: 2, vertices: 3, spacedim: 2 + /// - kernel type: computeInverseDiagonalOperatorValues + /// - loop strategy: SAWTOOTH /// - quadrature rule: Dunavant 2 | points: 3, degree: 2 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 71 63 12 0 0 0 0 1 - void computeInverseDiagonalOperatorValues_macro_2D( real_t* RESTRICT _data_invDiag_Edge, - real_t* RESTRICT _data_invDiag_Vertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; - /// Kernel type: computeInverseDiagonalOperatorValues + void + computeInverseDiagonalOperatorValues_P2ElementwiseFullStokes_1_1_macro_2D( real_t* RESTRICT _data_invDiag_Edge, + real_t* RESTRICT _data_invDiag_Vertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; + + /// Integral: P2ElementwiseFullStokes_1_1 + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: computeInverseDiagonalOperatorValues + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 1 | points: 4, degree: 2 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 179 156 36 0 0 0 0 1 - void computeInverseDiagonalOperatorValues_macro_3D( real_t* RESTRICT _data_invDiag_Edge, - real_t* RESTRICT _data_invDiag_Vertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; + void + computeInverseDiagonalOperatorValues_P2ElementwiseFullStokes_1_1_macro_3D( real_t* RESTRICT _data_invDiag_Edge, + real_t* RESTRICT _data_invDiag_Vertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; std::shared_ptr< P2Function< real_t > > invDiag_; P2Function< real_t > mu; diff --git a/operators/full_stokes/P2ElementwiseFullStokes_1_2.cpp b/operators/full_stokes/P2ElementwiseFullStokes_1_2.cpp index bd7685ec..b2551208 100644 --- a/operators/full_stokes/P2ElementwiseFullStokes_1_2.cpp +++ b/operators/full_stokes/P2ElementwiseFullStokes_1_2.cpp @@ -136,7 +136,7 @@ void P2ElementwiseFullStokes_1_2::apply( const P2Function< real_t >& src, this->timingTree_->start( "kernel" ); - apply_macro_3D( + apply_P2ElementwiseFullStokes_1_2_macro_3D( _data_dstEdge, _data_dstVertex, @@ -158,6 +158,7 @@ void P2ElementwiseFullStokes_1_2::apply( const P2Function< real_t >& src, macro_vertex_coord_id_3comp2, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } @@ -236,7 +237,7 @@ void P2ElementwiseFullStokes_1_2::toMatrix( const std::shared_ptr< SparseMatrixP this->timingTree_->start( "kernel" ); - toMatrix_macro_3D( + toMatrix_P2ElementwiseFullStokes_1_2_macro_3D( _data_dstEdge, _data_dstVertex, @@ -259,6 +260,7 @@ void P2ElementwiseFullStokes_1_2::toMatrix( const std::shared_ptr< SparseMatrixP mat, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } } diff --git a/operators/full_stokes/P2ElementwiseFullStokes_1_2.hpp b/operators/full_stokes/P2ElementwiseFullStokes_1_2.hpp index 84e25265..e92d8145 100644 --- a/operators/full_stokes/P2ElementwiseFullStokes_1_2.hpp +++ b/operators/full_stokes/P2ElementwiseFullStokes_1_2.hpp @@ -29,12 +29,14 @@ #include "core/DataTypes.h" #include "hyteg/LikwidWrapper.hpp" +#include "hyteg/boundary/BoundaryConditions.hpp" #include "hyteg/communication/Syncing.hpp" #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp" #include "hyteg/operators/Operator.hpp" #include "hyteg/p2functionspace/P2Function.hpp" #include "hyteg/primitivestorage/PrimitiveStorage.hpp" #include "hyteg/sparseassembly/SparseMatrixProxy.hpp" +#include "hyteg/types/types.hpp" #define FUNC_PREFIX @@ -92,59 +94,68 @@ class P2ElementwiseFullStokes_1_2 : public Operator< P2Function< real_t >, P2Fun protected: private: - /// Kernel type: apply + /// Integral: P2ElementwiseFullStokes_1_2 + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: apply + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 1 | points: 4, degree: 2 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 629 616 36 0 0 0 0 1 - void apply_macro_3D( real_t* RESTRICT _data_dstEdge, - real_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - real_t* RESTRICT _data_srcEdge, - real_t* RESTRICT _data_srcVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; - /// Kernel type: toMatrix + void apply_P2ElementwiseFullStokes_1_2_macro_3D( real_t* RESTRICT _data_dstEdge, + real_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + real_t* RESTRICT _data_srcEdge, + real_t* RESTRICT _data_srcVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; + + /// Integral: P2ElementwiseFullStokes_1_2 + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: toMatrix + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 1 | points: 4, degree: 2 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 529 516 36 0 0 0 0 4 - void toMatrix_macro_3D( idx_t* RESTRICT _data_dstEdge, - idx_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - idx_t* RESTRICT _data_srcEdge, - idx_t* RESTRICT _data_srcVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - std::shared_ptr< SparseMatrixProxy > mat, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; + void toMatrix_P2ElementwiseFullStokes_1_2_macro_3D( idx_t* RESTRICT _data_dstEdge, + idx_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + idx_t* RESTRICT _data_srcEdge, + idx_t* RESTRICT _data_srcVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + std::shared_ptr< SparseMatrixProxy > mat, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; P2Function< real_t > mu; }; diff --git a/operators/full_stokes/P2ElementwiseFullStokes_2_0.cpp b/operators/full_stokes/P2ElementwiseFullStokes_2_0.cpp index b61da148..7bbbb100 100644 --- a/operators/full_stokes/P2ElementwiseFullStokes_2_0.cpp +++ b/operators/full_stokes/P2ElementwiseFullStokes_2_0.cpp @@ -136,7 +136,7 @@ void P2ElementwiseFullStokes_2_0::apply( const P2Function< real_t >& src, this->timingTree_->start( "kernel" ); - apply_macro_3D( + apply_P2ElementwiseFullStokes_2_0_macro_3D( _data_dstEdge, _data_dstVertex, @@ -158,6 +158,7 @@ void P2ElementwiseFullStokes_2_0::apply( const P2Function< real_t >& src, macro_vertex_coord_id_3comp2, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } @@ -236,7 +237,7 @@ void P2ElementwiseFullStokes_2_0::toMatrix( const std::shared_ptr< SparseMatrixP this->timingTree_->start( "kernel" ); - toMatrix_macro_3D( + toMatrix_P2ElementwiseFullStokes_2_0_macro_3D( _data_dstEdge, _data_dstVertex, @@ -259,6 +260,7 @@ void P2ElementwiseFullStokes_2_0::toMatrix( const std::shared_ptr< SparseMatrixP mat, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } } diff --git a/operators/full_stokes/P2ElementwiseFullStokes_2_0.hpp b/operators/full_stokes/P2ElementwiseFullStokes_2_0.hpp index 2eaff7a7..f0fac0c0 100644 --- a/operators/full_stokes/P2ElementwiseFullStokes_2_0.hpp +++ b/operators/full_stokes/P2ElementwiseFullStokes_2_0.hpp @@ -29,12 +29,14 @@ #include "core/DataTypes.h" #include "hyteg/LikwidWrapper.hpp" +#include "hyteg/boundary/BoundaryConditions.hpp" #include "hyteg/communication/Syncing.hpp" #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp" #include "hyteg/operators/Operator.hpp" #include "hyteg/p2functionspace/P2Function.hpp" #include "hyteg/primitivestorage/PrimitiveStorage.hpp" #include "hyteg/sparseassembly/SparseMatrixProxy.hpp" +#include "hyteg/types/types.hpp" #define FUNC_PREFIX @@ -92,59 +94,68 @@ class P2ElementwiseFullStokes_2_0 : public Operator< P2Function< real_t >, P2Fun protected: private: - /// Kernel type: apply + /// Integral: P2ElementwiseFullStokes_2_0 + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: apply + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 1 | points: 4, degree: 2 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 629 616 36 0 0 0 0 1 - void apply_macro_3D( real_t* RESTRICT _data_dstEdge, - real_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - real_t* RESTRICT _data_srcEdge, - real_t* RESTRICT _data_srcVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; - /// Kernel type: toMatrix + void apply_P2ElementwiseFullStokes_2_0_macro_3D( real_t* RESTRICT _data_dstEdge, + real_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + real_t* RESTRICT _data_srcEdge, + real_t* RESTRICT _data_srcVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; + + /// Integral: P2ElementwiseFullStokes_2_0 + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: toMatrix + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 1 | points: 4, degree: 2 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 529 516 36 0 0 0 0 4 - void toMatrix_macro_3D( idx_t* RESTRICT _data_dstEdge, - idx_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - idx_t* RESTRICT _data_srcEdge, - idx_t* RESTRICT _data_srcVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - std::shared_ptr< SparseMatrixProxy > mat, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; + void toMatrix_P2ElementwiseFullStokes_2_0_macro_3D( idx_t* RESTRICT _data_dstEdge, + idx_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + idx_t* RESTRICT _data_srcEdge, + idx_t* RESTRICT _data_srcVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + std::shared_ptr< SparseMatrixProxy > mat, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; P2Function< real_t > mu; }; diff --git a/operators/full_stokes/P2ElementwiseFullStokes_2_1.cpp b/operators/full_stokes/P2ElementwiseFullStokes_2_1.cpp index 70063545..dba8cdb8 100644 --- a/operators/full_stokes/P2ElementwiseFullStokes_2_1.cpp +++ b/operators/full_stokes/P2ElementwiseFullStokes_2_1.cpp @@ -136,7 +136,7 @@ void P2ElementwiseFullStokes_2_1::apply( const P2Function< real_t >& src, this->timingTree_->start( "kernel" ); - apply_macro_3D( + apply_P2ElementwiseFullStokes_2_1_macro_3D( _data_dstEdge, _data_dstVertex, @@ -158,6 +158,7 @@ void P2ElementwiseFullStokes_2_1::apply( const P2Function< real_t >& src, macro_vertex_coord_id_3comp2, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } @@ -236,7 +237,7 @@ void P2ElementwiseFullStokes_2_1::toMatrix( const std::shared_ptr< SparseMatrixP this->timingTree_->start( "kernel" ); - toMatrix_macro_3D( + toMatrix_P2ElementwiseFullStokes_2_1_macro_3D( _data_dstEdge, _data_dstVertex, @@ -259,6 +260,7 @@ void P2ElementwiseFullStokes_2_1::toMatrix( const std::shared_ptr< SparseMatrixP mat, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } } diff --git a/operators/full_stokes/P2ElementwiseFullStokes_2_1.hpp b/operators/full_stokes/P2ElementwiseFullStokes_2_1.hpp index 6903a7f9..5cb09896 100644 --- a/operators/full_stokes/P2ElementwiseFullStokes_2_1.hpp +++ b/operators/full_stokes/P2ElementwiseFullStokes_2_1.hpp @@ -29,12 +29,14 @@ #include "core/DataTypes.h" #include "hyteg/LikwidWrapper.hpp" +#include "hyteg/boundary/BoundaryConditions.hpp" #include "hyteg/communication/Syncing.hpp" #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp" #include "hyteg/operators/Operator.hpp" #include "hyteg/p2functionspace/P2Function.hpp" #include "hyteg/primitivestorage/PrimitiveStorage.hpp" #include "hyteg/sparseassembly/SparseMatrixProxy.hpp" +#include "hyteg/types/types.hpp" #define FUNC_PREFIX @@ -92,59 +94,68 @@ class P2ElementwiseFullStokes_2_1 : public Operator< P2Function< real_t >, P2Fun protected: private: - /// Kernel type: apply + /// Integral: P2ElementwiseFullStokes_2_1 + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: apply + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 1 | points: 4, degree: 2 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 629 616 36 0 0 0 0 1 - void apply_macro_3D( real_t* RESTRICT _data_dstEdge, - real_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - real_t* RESTRICT _data_srcEdge, - real_t* RESTRICT _data_srcVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; - /// Kernel type: toMatrix + void apply_P2ElementwiseFullStokes_2_1_macro_3D( real_t* RESTRICT _data_dstEdge, + real_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + real_t* RESTRICT _data_srcEdge, + real_t* RESTRICT _data_srcVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; + + /// Integral: P2ElementwiseFullStokes_2_1 + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: toMatrix + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 1 | points: 4, degree: 2 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 529 516 36 0 0 0 0 4 - void toMatrix_macro_3D( idx_t* RESTRICT _data_dstEdge, - idx_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - idx_t* RESTRICT _data_srcEdge, - idx_t* RESTRICT _data_srcVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - std::shared_ptr< SparseMatrixProxy > mat, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; + void toMatrix_P2ElementwiseFullStokes_2_1_macro_3D( idx_t* RESTRICT _data_dstEdge, + idx_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + idx_t* RESTRICT _data_srcEdge, + idx_t* RESTRICT _data_srcVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + std::shared_ptr< SparseMatrixProxy > mat, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; P2Function< real_t > mu; }; diff --git a/operators/full_stokes/P2ElementwiseFullStokes_2_2.cpp b/operators/full_stokes/P2ElementwiseFullStokes_2_2.cpp index 4d0715c1..1a7be77d 100644 --- a/operators/full_stokes/P2ElementwiseFullStokes_2_2.cpp +++ b/operators/full_stokes/P2ElementwiseFullStokes_2_2.cpp @@ -136,7 +136,7 @@ void P2ElementwiseFullStokes_2_2::apply( const P2Function< real_t >& src, this->timingTree_->start( "kernel" ); - apply_macro_3D( + apply_P2ElementwiseFullStokes_2_2_macro_3D( _data_dstEdge, _data_dstVertex, @@ -158,6 +158,7 @@ void P2ElementwiseFullStokes_2_2::apply( const P2Function< real_t >& src, macro_vertex_coord_id_3comp2, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } @@ -236,7 +237,7 @@ void P2ElementwiseFullStokes_2_2::toMatrix( const std::shared_ptr< SparseMatrixP this->timingTree_->start( "kernel" ); - toMatrix_macro_3D( + toMatrix_P2ElementwiseFullStokes_2_2_macro_3D( _data_dstEdge, _data_dstVertex, @@ -259,6 +260,7 @@ void P2ElementwiseFullStokes_2_2::toMatrix( const std::shared_ptr< SparseMatrixP mat, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } } @@ -321,7 +323,7 @@ void P2ElementwiseFullStokes_2_2::computeInverseDiagonalOperatorValues() this->timingTree_->start( "kernel" ); - computeInverseDiagonalOperatorValues_macro_3D( + computeInverseDiagonalOperatorValues_P2ElementwiseFullStokes_2_2_macro_3D( _data_invDiag_Edge, _data_invDiag_Vertex, @@ -341,6 +343,7 @@ void P2ElementwiseFullStokes_2_2::computeInverseDiagonalOperatorValues() macro_vertex_coord_id_3comp2, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } diff --git a/operators/full_stokes/P2ElementwiseFullStokes_2_2.hpp b/operators/full_stokes/P2ElementwiseFullStokes_2_2.hpp index 521c219f..015385dc 100644 --- a/operators/full_stokes/P2ElementwiseFullStokes_2_2.hpp +++ b/operators/full_stokes/P2ElementwiseFullStokes_2_2.hpp @@ -29,6 +29,7 @@ #include "core/DataTypes.h" #include "hyteg/LikwidWrapper.hpp" +#include "hyteg/boundary/BoundaryConditions.hpp" #include "hyteg/communication/Syncing.hpp" #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp" #include "hyteg/operators/Operator.hpp" @@ -36,6 +37,7 @@ #include "hyteg/primitivestorage/PrimitiveStorage.hpp" #include "hyteg/solvers/Smoothables.hpp" #include "hyteg/sparseassembly/SparseMatrixProxy.hpp" +#include "hyteg/types/types.hpp" #define FUNC_PREFIX @@ -98,83 +100,98 @@ class P2ElementwiseFullStokes_2_2 : public Operator< P2Function< real_t >, P2Fun protected: private: - /// Kernel type: apply + /// Integral: P2ElementwiseFullStokes_2_2 + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: apply + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 1 | points: 4, degree: 2 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 449 436 36 0 0 0 0 1 - void apply_macro_3D( real_t* RESTRICT _data_dstEdge, - real_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - real_t* RESTRICT _data_srcEdge, - real_t* RESTRICT _data_srcVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; - /// Kernel type: toMatrix + void apply_P2ElementwiseFullStokes_2_2_macro_3D( real_t* RESTRICT _data_dstEdge, + real_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + real_t* RESTRICT _data_srcEdge, + real_t* RESTRICT _data_srcVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; + + /// Integral: P2ElementwiseFullStokes_2_2 + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: toMatrix + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 1 | points: 4, degree: 2 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 349 336 36 0 0 0 0 4 - void toMatrix_macro_3D( idx_t* RESTRICT _data_dstEdge, - idx_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - idx_t* RESTRICT _data_srcEdge, - idx_t* RESTRICT _data_srcVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - std::shared_ptr< SparseMatrixProxy > mat, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; - /// Kernel type: computeInverseDiagonalOperatorValues + void toMatrix_P2ElementwiseFullStokes_2_2_macro_3D( idx_t* RESTRICT _data_dstEdge, + idx_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + idx_t* RESTRICT _data_srcEdge, + idx_t* RESTRICT _data_srcVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + std::shared_ptr< SparseMatrixProxy > mat, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; + + /// Integral: P2ElementwiseFullStokes_2_2 + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: computeInverseDiagonalOperatorValues + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 1 | points: 4, degree: 2 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 179 156 36 0 0 0 0 1 - void computeInverseDiagonalOperatorValues_macro_3D( real_t* RESTRICT _data_invDiag_Edge, - real_t* RESTRICT _data_invDiag_Vertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; + void + computeInverseDiagonalOperatorValues_P2ElementwiseFullStokes_2_2_macro_3D( real_t* RESTRICT _data_invDiag_Edge, + real_t* RESTRICT _data_invDiag_Vertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; std::shared_ptr< P2Function< real_t > > invDiag_; P2Function< real_t > mu; diff --git a/operators/full_stokes/avx/P2ElementwiseFullStokesAnnulusMap_0_0_apply_macro_2D.cpp b/operators/full_stokes/avx/P2ElementwiseFullStokesAnnulusMap_0_0_apply_P2ElementwiseFullStokesAnnulusMap_0_0_macro_2D.cpp similarity index 99% rename from operators/full_stokes/avx/P2ElementwiseFullStokesAnnulusMap_0_0_apply_macro_2D.cpp rename to operators/full_stokes/avx/P2ElementwiseFullStokesAnnulusMap_0_0_apply_P2ElementwiseFullStokesAnnulusMap_0_0_macro_2D.cpp index 39f53c13..4dbc0bc1 100644 --- a/operators/full_stokes/avx/P2ElementwiseFullStokesAnnulusMap_0_0_apply_macro_2D.cpp +++ b/operators/full_stokes/avx/P2ElementwiseFullStokesAnnulusMap_0_0_apply_P2ElementwiseFullStokesAnnulusMap_0_0_macro_2D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseFullStokesAnnulusMap_0_0::apply_macro_2D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const +void P2ElementwiseFullStokesAnnulusMap_0_0::apply_P2ElementwiseFullStokesAnnulusMap_0_0_macro_2D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const { { const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669}; diff --git a/operators/full_stokes/avx/P2ElementwiseFullStokesAnnulusMap_0_0_computeInverseDiagonalOperatorValues_macro_2D.cpp b/operators/full_stokes/avx/P2ElementwiseFullStokesAnnulusMap_0_0_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokesAnnulusMap_0_0_macro_2D.cpp similarity index 99% rename from operators/full_stokes/avx/P2ElementwiseFullStokesAnnulusMap_0_0_computeInverseDiagonalOperatorValues_macro_2D.cpp rename to operators/full_stokes/avx/P2ElementwiseFullStokesAnnulusMap_0_0_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokesAnnulusMap_0_0_macro_2D.cpp index 1460316d..bb6db6bc 100644 --- a/operators/full_stokes/avx/P2ElementwiseFullStokesAnnulusMap_0_0_computeInverseDiagonalOperatorValues_macro_2D.cpp +++ b/operators/full_stokes/avx/P2ElementwiseFullStokesAnnulusMap_0_0_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokesAnnulusMap_0_0_macro_2D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseFullStokesAnnulusMap_0_0::computeInverseDiagonalOperatorValues_macro_2D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const +void P2ElementwiseFullStokesAnnulusMap_0_0::computeInverseDiagonalOperatorValues_P2ElementwiseFullStokesAnnulusMap_0_0_macro_2D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const { { const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669}; diff --git a/operators/full_stokes/avx/P2ElementwiseFullStokesAnnulusMap_0_1_apply_macro_2D.cpp b/operators/full_stokes/avx/P2ElementwiseFullStokesAnnulusMap_0_1_apply_P2ElementwiseFullStokesAnnulusMap_0_1_macro_2D.cpp similarity index 99% rename from operators/full_stokes/avx/P2ElementwiseFullStokesAnnulusMap_0_1_apply_macro_2D.cpp rename to operators/full_stokes/avx/P2ElementwiseFullStokesAnnulusMap_0_1_apply_P2ElementwiseFullStokesAnnulusMap_0_1_macro_2D.cpp index 3b856520..9018004f 100644 --- a/operators/full_stokes/avx/P2ElementwiseFullStokesAnnulusMap_0_1_apply_macro_2D.cpp +++ b/operators/full_stokes/avx/P2ElementwiseFullStokesAnnulusMap_0_1_apply_P2ElementwiseFullStokesAnnulusMap_0_1_macro_2D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseFullStokesAnnulusMap_0_1::apply_macro_2D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const +void P2ElementwiseFullStokesAnnulusMap_0_1::apply_P2ElementwiseFullStokesAnnulusMap_0_1_macro_2D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const { { const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669}; diff --git a/operators/full_stokes/avx/P2ElementwiseFullStokesAnnulusMap_1_0_apply_macro_2D.cpp b/operators/full_stokes/avx/P2ElementwiseFullStokesAnnulusMap_1_0_apply_P2ElementwiseFullStokesAnnulusMap_1_0_macro_2D.cpp similarity index 99% rename from operators/full_stokes/avx/P2ElementwiseFullStokesAnnulusMap_1_0_apply_macro_2D.cpp rename to operators/full_stokes/avx/P2ElementwiseFullStokesAnnulusMap_1_0_apply_P2ElementwiseFullStokesAnnulusMap_1_0_macro_2D.cpp index 2ef8ff10..5681bd3f 100644 --- a/operators/full_stokes/avx/P2ElementwiseFullStokesAnnulusMap_1_0_apply_macro_2D.cpp +++ b/operators/full_stokes/avx/P2ElementwiseFullStokesAnnulusMap_1_0_apply_P2ElementwiseFullStokesAnnulusMap_1_0_macro_2D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseFullStokesAnnulusMap_1_0::apply_macro_2D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const +void P2ElementwiseFullStokesAnnulusMap_1_0::apply_P2ElementwiseFullStokesAnnulusMap_1_0_macro_2D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const { { const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669}; diff --git a/operators/full_stokes/avx/P2ElementwiseFullStokesAnnulusMap_1_1_apply_macro_2D.cpp b/operators/full_stokes/avx/P2ElementwiseFullStokesAnnulusMap_1_1_apply_P2ElementwiseFullStokesAnnulusMap_1_1_macro_2D.cpp similarity index 99% rename from operators/full_stokes/avx/P2ElementwiseFullStokesAnnulusMap_1_1_apply_macro_2D.cpp rename to operators/full_stokes/avx/P2ElementwiseFullStokesAnnulusMap_1_1_apply_P2ElementwiseFullStokesAnnulusMap_1_1_macro_2D.cpp index 6211a35b..4efc8a1d 100644 --- a/operators/full_stokes/avx/P2ElementwiseFullStokesAnnulusMap_1_1_apply_macro_2D.cpp +++ b/operators/full_stokes/avx/P2ElementwiseFullStokesAnnulusMap_1_1_apply_P2ElementwiseFullStokesAnnulusMap_1_1_macro_2D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseFullStokesAnnulusMap_1_1::apply_macro_2D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const +void P2ElementwiseFullStokesAnnulusMap_1_1::apply_P2ElementwiseFullStokesAnnulusMap_1_1_macro_2D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const { { const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669}; diff --git a/operators/full_stokes/avx/P2ElementwiseFullStokesAnnulusMap_1_1_computeInverseDiagonalOperatorValues_macro_2D.cpp b/operators/full_stokes/avx/P2ElementwiseFullStokesAnnulusMap_1_1_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokesAnnulusMap_1_1_macro_2D.cpp similarity index 99% rename from operators/full_stokes/avx/P2ElementwiseFullStokesAnnulusMap_1_1_computeInverseDiagonalOperatorValues_macro_2D.cpp rename to operators/full_stokes/avx/P2ElementwiseFullStokesAnnulusMap_1_1_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokesAnnulusMap_1_1_macro_2D.cpp index 87383033..6240b9a1 100644 --- a/operators/full_stokes/avx/P2ElementwiseFullStokesAnnulusMap_1_1_computeInverseDiagonalOperatorValues_macro_2D.cpp +++ b/operators/full_stokes/avx/P2ElementwiseFullStokesAnnulusMap_1_1_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokesAnnulusMap_1_1_macro_2D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseFullStokesAnnulusMap_1_1::computeInverseDiagonalOperatorValues_macro_2D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const +void P2ElementwiseFullStokesAnnulusMap_1_1::computeInverseDiagonalOperatorValues_P2ElementwiseFullStokesAnnulusMap_1_1_macro_2D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const { { const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669}; diff --git a/operators/full_stokes/avx/P2ElementwiseFullStokesIcosahedralShellMap_0_0_apply_macro_3D.cpp b/operators/full_stokes/avx/P2ElementwiseFullStokesIcosahedralShellMap_0_0_apply_P2ElementwiseFullStokesIcosahedralShellMap_0_0_macro_3D.cpp similarity index 99% rename from operators/full_stokes/avx/P2ElementwiseFullStokesIcosahedralShellMap_0_0_apply_macro_3D.cpp rename to operators/full_stokes/avx/P2ElementwiseFullStokesIcosahedralShellMap_0_0_apply_P2ElementwiseFullStokesIcosahedralShellMap_0_0_macro_3D.cpp index d1d11acf..bd795ece 100644 --- a/operators/full_stokes/avx/P2ElementwiseFullStokesIcosahedralShellMap_0_0_apply_macro_3D.cpp +++ b/operators/full_stokes/avx/P2ElementwiseFullStokesIcosahedralShellMap_0_0_apply_P2ElementwiseFullStokesIcosahedralShellMap_0_0_macro_3D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseFullStokesIcosahedralShellMap_0_0::apply_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const +void P2ElementwiseFullStokesIcosahedralShellMap_0_0::apply_P2ElementwiseFullStokesIcosahedralShellMap_0_0_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const { { const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983}; diff --git a/operators/full_stokes/avx/P2ElementwiseFullStokesIcosahedralShellMap_0_0_computeInverseDiagonalOperatorValues_macro_3D.cpp b/operators/full_stokes/avx/P2ElementwiseFullStokesIcosahedralShellMap_0_0_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokesIcosahedralShellMap_0_0_macro_3D.cpp similarity index 99% rename from operators/full_stokes/avx/P2ElementwiseFullStokesIcosahedralShellMap_0_0_computeInverseDiagonalOperatorValues_macro_3D.cpp rename to operators/full_stokes/avx/P2ElementwiseFullStokesIcosahedralShellMap_0_0_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokesIcosahedralShellMap_0_0_macro_3D.cpp index a16c5329..340f7d64 100644 --- a/operators/full_stokes/avx/P2ElementwiseFullStokesIcosahedralShellMap_0_0_computeInverseDiagonalOperatorValues_macro_3D.cpp +++ b/operators/full_stokes/avx/P2ElementwiseFullStokesIcosahedralShellMap_0_0_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokesIcosahedralShellMap_0_0_macro_3D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseFullStokesIcosahedralShellMap_0_0::computeInverseDiagonalOperatorValues_macro_3D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const +void P2ElementwiseFullStokesIcosahedralShellMap_0_0::computeInverseDiagonalOperatorValues_P2ElementwiseFullStokesIcosahedralShellMap_0_0_macro_3D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const { { const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983}; diff --git a/operators/full_stokes/avx/P2ElementwiseFullStokesIcosahedralShellMap_0_1_apply_macro_3D.cpp b/operators/full_stokes/avx/P2ElementwiseFullStokesIcosahedralShellMap_0_1_apply_P2ElementwiseFullStokesIcosahedralShellMap_0_1_macro_3D.cpp similarity index 99% rename from operators/full_stokes/avx/P2ElementwiseFullStokesIcosahedralShellMap_0_1_apply_macro_3D.cpp rename to operators/full_stokes/avx/P2ElementwiseFullStokesIcosahedralShellMap_0_1_apply_P2ElementwiseFullStokesIcosahedralShellMap_0_1_macro_3D.cpp index 8a7cf2ab..8f0f8014 100644 --- a/operators/full_stokes/avx/P2ElementwiseFullStokesIcosahedralShellMap_0_1_apply_macro_3D.cpp +++ b/operators/full_stokes/avx/P2ElementwiseFullStokesIcosahedralShellMap_0_1_apply_P2ElementwiseFullStokesIcosahedralShellMap_0_1_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseFullStokesIcosahedralShellMap_0_1::apply_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const +void P2ElementwiseFullStokesIcosahedralShellMap_0_1::apply_P2ElementwiseFullStokesIcosahedralShellMap_0_1_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const { { const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983}; diff --git a/operators/full_stokes/avx/P2ElementwiseFullStokesIcosahedralShellMap_0_2_apply_macro_3D.cpp b/operators/full_stokes/avx/P2ElementwiseFullStokesIcosahedralShellMap_0_2_apply_P2ElementwiseFullStokesIcosahedralShellMap_0_2_macro_3D.cpp similarity index 99% rename from operators/full_stokes/avx/P2ElementwiseFullStokesIcosahedralShellMap_0_2_apply_macro_3D.cpp rename to operators/full_stokes/avx/P2ElementwiseFullStokesIcosahedralShellMap_0_2_apply_P2ElementwiseFullStokesIcosahedralShellMap_0_2_macro_3D.cpp index 8baecf92..46887de2 100644 --- a/operators/full_stokes/avx/P2ElementwiseFullStokesIcosahedralShellMap_0_2_apply_macro_3D.cpp +++ b/operators/full_stokes/avx/P2ElementwiseFullStokesIcosahedralShellMap_0_2_apply_P2ElementwiseFullStokesIcosahedralShellMap_0_2_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseFullStokesIcosahedralShellMap_0_2::apply_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const +void P2ElementwiseFullStokesIcosahedralShellMap_0_2::apply_P2ElementwiseFullStokesIcosahedralShellMap_0_2_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const { { const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983}; diff --git a/operators/full_stokes/avx/P2ElementwiseFullStokesIcosahedralShellMap_1_0_apply_macro_3D.cpp b/operators/full_stokes/avx/P2ElementwiseFullStokesIcosahedralShellMap_1_0_apply_P2ElementwiseFullStokesIcosahedralShellMap_1_0_macro_3D.cpp similarity index 99% rename from operators/full_stokes/avx/P2ElementwiseFullStokesIcosahedralShellMap_1_0_apply_macro_3D.cpp rename to operators/full_stokes/avx/P2ElementwiseFullStokesIcosahedralShellMap_1_0_apply_P2ElementwiseFullStokesIcosahedralShellMap_1_0_macro_3D.cpp index 3992a8cd..8811dc99 100644 --- a/operators/full_stokes/avx/P2ElementwiseFullStokesIcosahedralShellMap_1_0_apply_macro_3D.cpp +++ b/operators/full_stokes/avx/P2ElementwiseFullStokesIcosahedralShellMap_1_0_apply_P2ElementwiseFullStokesIcosahedralShellMap_1_0_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseFullStokesIcosahedralShellMap_1_0::apply_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const +void P2ElementwiseFullStokesIcosahedralShellMap_1_0::apply_P2ElementwiseFullStokesIcosahedralShellMap_1_0_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const { { const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983}; diff --git a/operators/full_stokes/avx/P2ElementwiseFullStokesIcosahedralShellMap_1_1_apply_macro_3D.cpp b/operators/full_stokes/avx/P2ElementwiseFullStokesIcosahedralShellMap_1_1_apply_P2ElementwiseFullStokesIcosahedralShellMap_1_1_macro_3D.cpp similarity index 99% rename from operators/full_stokes/avx/P2ElementwiseFullStokesIcosahedralShellMap_1_1_apply_macro_3D.cpp rename to operators/full_stokes/avx/P2ElementwiseFullStokesIcosahedralShellMap_1_1_apply_P2ElementwiseFullStokesIcosahedralShellMap_1_1_macro_3D.cpp index f8a51d81..394fcd42 100644 --- a/operators/full_stokes/avx/P2ElementwiseFullStokesIcosahedralShellMap_1_1_apply_macro_3D.cpp +++ b/operators/full_stokes/avx/P2ElementwiseFullStokesIcosahedralShellMap_1_1_apply_P2ElementwiseFullStokesIcosahedralShellMap_1_1_macro_3D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseFullStokesIcosahedralShellMap_1_1::apply_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const +void P2ElementwiseFullStokesIcosahedralShellMap_1_1::apply_P2ElementwiseFullStokesIcosahedralShellMap_1_1_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const { { const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983}; diff --git a/operators/full_stokes/avx/P2ElementwiseFullStokesIcosahedralShellMap_1_1_computeInverseDiagonalOperatorValues_macro_3D.cpp b/operators/full_stokes/avx/P2ElementwiseFullStokesIcosahedralShellMap_1_1_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokesIcosahedralShellMap_1_1_macro_3D.cpp similarity index 99% rename from operators/full_stokes/avx/P2ElementwiseFullStokesIcosahedralShellMap_1_1_computeInverseDiagonalOperatorValues_macro_3D.cpp rename to operators/full_stokes/avx/P2ElementwiseFullStokesIcosahedralShellMap_1_1_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokesIcosahedralShellMap_1_1_macro_3D.cpp index 4f6e4f54..9527aac2 100644 --- a/operators/full_stokes/avx/P2ElementwiseFullStokesIcosahedralShellMap_1_1_computeInverseDiagonalOperatorValues_macro_3D.cpp +++ b/operators/full_stokes/avx/P2ElementwiseFullStokesIcosahedralShellMap_1_1_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokesIcosahedralShellMap_1_1_macro_3D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseFullStokesIcosahedralShellMap_1_1::computeInverseDiagonalOperatorValues_macro_3D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const +void P2ElementwiseFullStokesIcosahedralShellMap_1_1::computeInverseDiagonalOperatorValues_P2ElementwiseFullStokesIcosahedralShellMap_1_1_macro_3D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const { { const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983}; diff --git a/operators/full_stokes/avx/P2ElementwiseFullStokesIcosahedralShellMap_1_2_apply_macro_3D.cpp b/operators/full_stokes/avx/P2ElementwiseFullStokesIcosahedralShellMap_1_2_apply_P2ElementwiseFullStokesIcosahedralShellMap_1_2_macro_3D.cpp similarity index 99% rename from operators/full_stokes/avx/P2ElementwiseFullStokesIcosahedralShellMap_1_2_apply_macro_3D.cpp rename to operators/full_stokes/avx/P2ElementwiseFullStokesIcosahedralShellMap_1_2_apply_P2ElementwiseFullStokesIcosahedralShellMap_1_2_macro_3D.cpp index 4a7346e9..3db3326f 100644 --- a/operators/full_stokes/avx/P2ElementwiseFullStokesIcosahedralShellMap_1_2_apply_macro_3D.cpp +++ b/operators/full_stokes/avx/P2ElementwiseFullStokesIcosahedralShellMap_1_2_apply_P2ElementwiseFullStokesIcosahedralShellMap_1_2_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseFullStokesIcosahedralShellMap_1_2::apply_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const +void P2ElementwiseFullStokesIcosahedralShellMap_1_2::apply_P2ElementwiseFullStokesIcosahedralShellMap_1_2_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const { { const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983}; diff --git a/operators/full_stokes/avx/P2ElementwiseFullStokesIcosahedralShellMap_2_0_apply_macro_3D.cpp b/operators/full_stokes/avx/P2ElementwiseFullStokesIcosahedralShellMap_2_0_apply_P2ElementwiseFullStokesIcosahedralShellMap_2_0_macro_3D.cpp similarity index 99% rename from operators/full_stokes/avx/P2ElementwiseFullStokesIcosahedralShellMap_2_0_apply_macro_3D.cpp rename to operators/full_stokes/avx/P2ElementwiseFullStokesIcosahedralShellMap_2_0_apply_P2ElementwiseFullStokesIcosahedralShellMap_2_0_macro_3D.cpp index 98e38c67..b4f9412f 100644 --- a/operators/full_stokes/avx/P2ElementwiseFullStokesIcosahedralShellMap_2_0_apply_macro_3D.cpp +++ b/operators/full_stokes/avx/P2ElementwiseFullStokesIcosahedralShellMap_2_0_apply_P2ElementwiseFullStokesIcosahedralShellMap_2_0_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseFullStokesIcosahedralShellMap_2_0::apply_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const +void P2ElementwiseFullStokesIcosahedralShellMap_2_0::apply_P2ElementwiseFullStokesIcosahedralShellMap_2_0_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const { { const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983}; diff --git a/operators/full_stokes/avx/P2ElementwiseFullStokesIcosahedralShellMap_2_1_apply_macro_3D.cpp b/operators/full_stokes/avx/P2ElementwiseFullStokesIcosahedralShellMap_2_1_apply_P2ElementwiseFullStokesIcosahedralShellMap_2_1_macro_3D.cpp similarity index 99% rename from operators/full_stokes/avx/P2ElementwiseFullStokesIcosahedralShellMap_2_1_apply_macro_3D.cpp rename to operators/full_stokes/avx/P2ElementwiseFullStokesIcosahedralShellMap_2_1_apply_P2ElementwiseFullStokesIcosahedralShellMap_2_1_macro_3D.cpp index 77dfb79c..401e9caf 100644 --- a/operators/full_stokes/avx/P2ElementwiseFullStokesIcosahedralShellMap_2_1_apply_macro_3D.cpp +++ b/operators/full_stokes/avx/P2ElementwiseFullStokesIcosahedralShellMap_2_1_apply_P2ElementwiseFullStokesIcosahedralShellMap_2_1_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseFullStokesIcosahedralShellMap_2_1::apply_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const +void P2ElementwiseFullStokesIcosahedralShellMap_2_1::apply_P2ElementwiseFullStokesIcosahedralShellMap_2_1_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const { { const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983}; diff --git a/operators/full_stokes/avx/P2ElementwiseFullStokesIcosahedralShellMap_2_2_apply_macro_3D.cpp b/operators/full_stokes/avx/P2ElementwiseFullStokesIcosahedralShellMap_2_2_apply_P2ElementwiseFullStokesIcosahedralShellMap_2_2_macro_3D.cpp similarity index 99% rename from operators/full_stokes/avx/P2ElementwiseFullStokesIcosahedralShellMap_2_2_apply_macro_3D.cpp rename to operators/full_stokes/avx/P2ElementwiseFullStokesIcosahedralShellMap_2_2_apply_P2ElementwiseFullStokesIcosahedralShellMap_2_2_macro_3D.cpp index bca7d6ac..34bba0e9 100644 --- a/operators/full_stokes/avx/P2ElementwiseFullStokesIcosahedralShellMap_2_2_apply_macro_3D.cpp +++ b/operators/full_stokes/avx/P2ElementwiseFullStokesIcosahedralShellMap_2_2_apply_P2ElementwiseFullStokesIcosahedralShellMap_2_2_macro_3D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseFullStokesIcosahedralShellMap_2_2::apply_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const +void P2ElementwiseFullStokesIcosahedralShellMap_2_2::apply_P2ElementwiseFullStokesIcosahedralShellMap_2_2_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const { { const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983}; diff --git a/operators/full_stokes/avx/P2ElementwiseFullStokesIcosahedralShellMap_2_2_computeInverseDiagonalOperatorValues_macro_3D.cpp b/operators/full_stokes/avx/P2ElementwiseFullStokesIcosahedralShellMap_2_2_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokesIcosahedralShellMap_2_2_macro_3D.cpp similarity index 99% rename from operators/full_stokes/avx/P2ElementwiseFullStokesIcosahedralShellMap_2_2_computeInverseDiagonalOperatorValues_macro_3D.cpp rename to operators/full_stokes/avx/P2ElementwiseFullStokesIcosahedralShellMap_2_2_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokesIcosahedralShellMap_2_2_macro_3D.cpp index fee58ccd..cc95c034 100644 --- a/operators/full_stokes/avx/P2ElementwiseFullStokesIcosahedralShellMap_2_2_computeInverseDiagonalOperatorValues_macro_3D.cpp +++ b/operators/full_stokes/avx/P2ElementwiseFullStokesIcosahedralShellMap_2_2_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokesIcosahedralShellMap_2_2_macro_3D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseFullStokesIcosahedralShellMap_2_2::computeInverseDiagonalOperatorValues_macro_3D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const +void P2ElementwiseFullStokesIcosahedralShellMap_2_2::computeInverseDiagonalOperatorValues_P2ElementwiseFullStokesIcosahedralShellMap_2_2_macro_3D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const { { const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983}; diff --git a/operators/full_stokes/avx/P2ElementwiseFullStokes_0_0_apply_macro_2D.cpp b/operators/full_stokes/avx/P2ElementwiseFullStokes_0_0_apply_P2ElementwiseFullStokes_0_0_macro_2D.cpp similarity index 99% rename from operators/full_stokes/avx/P2ElementwiseFullStokes_0_0_apply_macro_2D.cpp rename to operators/full_stokes/avx/P2ElementwiseFullStokes_0_0_apply_P2ElementwiseFullStokes_0_0_macro_2D.cpp index 6f9d6544..100047eb 100644 --- a/operators/full_stokes/avx/P2ElementwiseFullStokes_0_0_apply_macro_2D.cpp +++ b/operators/full_stokes/avx/P2ElementwiseFullStokes_0_0_apply_P2ElementwiseFullStokes_0_0_macro_2D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseFullStokes_0_0::apply_macro_2D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseFullStokes_0_0::apply_P2ElementwiseFullStokes_0_0_macro_2D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.16666666666666666, 0.16666666666666666, 0.16666666666666666}; diff --git a/operators/full_stokes/avx/P2ElementwiseFullStokes_0_0_apply_macro_3D.cpp b/operators/full_stokes/avx/P2ElementwiseFullStokes_0_0_apply_P2ElementwiseFullStokes_0_0_macro_3D.cpp similarity index 99% rename from operators/full_stokes/avx/P2ElementwiseFullStokes_0_0_apply_macro_3D.cpp rename to operators/full_stokes/avx/P2ElementwiseFullStokes_0_0_apply_P2ElementwiseFullStokes_0_0_macro_3D.cpp index 7935e1ec..37a69d18 100644 --- a/operators/full_stokes/avx/P2ElementwiseFullStokes_0_0_apply_macro_3D.cpp +++ b/operators/full_stokes/avx/P2ElementwiseFullStokes_0_0_apply_P2ElementwiseFullStokes_0_0_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseFullStokes_0_0::apply_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseFullStokes_0_0::apply_P2ElementwiseFullStokes_0_0_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.041666666666666657, 0.041666666666666657, 0.041666666666666657, 0.041666666666666657}; diff --git a/operators/full_stokes/avx/P2ElementwiseFullStokes_0_0_computeInverseDiagonalOperatorValues_macro_2D.cpp b/operators/full_stokes/avx/P2ElementwiseFullStokes_0_0_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokes_0_0_macro_2D.cpp similarity index 99% rename from operators/full_stokes/avx/P2ElementwiseFullStokes_0_0_computeInverseDiagonalOperatorValues_macro_2D.cpp rename to operators/full_stokes/avx/P2ElementwiseFullStokes_0_0_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokes_0_0_macro_2D.cpp index 7533c51a..7f40a662 100644 --- a/operators/full_stokes/avx/P2ElementwiseFullStokes_0_0_computeInverseDiagonalOperatorValues_macro_2D.cpp +++ b/operators/full_stokes/avx/P2ElementwiseFullStokes_0_0_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokes_0_0_macro_2D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseFullStokes_0_0::computeInverseDiagonalOperatorValues_macro_2D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseFullStokes_0_0::computeInverseDiagonalOperatorValues_P2ElementwiseFullStokes_0_0_macro_2D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.16666666666666666, 0.16666666666666666, 0.16666666666666666}; diff --git a/operators/full_stokes/avx/P2ElementwiseFullStokes_0_0_computeInverseDiagonalOperatorValues_macro_3D.cpp b/operators/full_stokes/avx/P2ElementwiseFullStokes_0_0_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokes_0_0_macro_3D.cpp similarity index 99% rename from operators/full_stokes/avx/P2ElementwiseFullStokes_0_0_computeInverseDiagonalOperatorValues_macro_3D.cpp rename to operators/full_stokes/avx/P2ElementwiseFullStokes_0_0_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokes_0_0_macro_3D.cpp index 36278a1b..26ae96be 100644 --- a/operators/full_stokes/avx/P2ElementwiseFullStokes_0_0_computeInverseDiagonalOperatorValues_macro_3D.cpp +++ b/operators/full_stokes/avx/P2ElementwiseFullStokes_0_0_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokes_0_0_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseFullStokes_0_0::computeInverseDiagonalOperatorValues_macro_3D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseFullStokes_0_0::computeInverseDiagonalOperatorValues_P2ElementwiseFullStokes_0_0_macro_3D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.041666666666666657, 0.041666666666666657, 0.041666666666666657, 0.041666666666666657}; diff --git a/operators/full_stokes/avx/P2ElementwiseFullStokes_0_1_apply_macro_2D.cpp b/operators/full_stokes/avx/P2ElementwiseFullStokes_0_1_apply_P2ElementwiseFullStokes_0_1_macro_2D.cpp similarity index 99% rename from operators/full_stokes/avx/P2ElementwiseFullStokes_0_1_apply_macro_2D.cpp rename to operators/full_stokes/avx/P2ElementwiseFullStokes_0_1_apply_P2ElementwiseFullStokes_0_1_macro_2D.cpp index 477da792..146c9577 100644 --- a/operators/full_stokes/avx/P2ElementwiseFullStokes_0_1_apply_macro_2D.cpp +++ b/operators/full_stokes/avx/P2ElementwiseFullStokes_0_1_apply_P2ElementwiseFullStokes_0_1_macro_2D.cpp @@ -41,6 +41,10 @@ + + + + @@ -54,7 +58,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseFullStokes_0_1::apply_macro_2D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseFullStokes_0_1::apply_P2ElementwiseFullStokes_0_1_macro_2D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.16666666666666666, 0.16666666666666666, 0.16666666666666666}; diff --git a/operators/full_stokes/avx/P2ElementwiseFullStokes_0_1_apply_macro_3D.cpp b/operators/full_stokes/avx/P2ElementwiseFullStokes_0_1_apply_P2ElementwiseFullStokes_0_1_macro_3D.cpp similarity index 99% rename from operators/full_stokes/avx/P2ElementwiseFullStokes_0_1_apply_macro_3D.cpp rename to operators/full_stokes/avx/P2ElementwiseFullStokes_0_1_apply_P2ElementwiseFullStokes_0_1_macro_3D.cpp index 83689cd7..f9f95d88 100644 --- a/operators/full_stokes/avx/P2ElementwiseFullStokes_0_1_apply_macro_3D.cpp +++ b/operators/full_stokes/avx/P2ElementwiseFullStokes_0_1_apply_P2ElementwiseFullStokes_0_1_macro_3D.cpp @@ -41,6 +41,10 @@ + + + + @@ -54,7 +58,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseFullStokes_0_1::apply_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseFullStokes_0_1::apply_P2ElementwiseFullStokes_0_1_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.041666666666666657, 0.041666666666666657, 0.041666666666666657, 0.041666666666666657}; diff --git a/operators/full_stokes/avx/P2ElementwiseFullStokes_0_2_apply_macro_3D.cpp b/operators/full_stokes/avx/P2ElementwiseFullStokes_0_2_apply_P2ElementwiseFullStokes_0_2_macro_3D.cpp similarity index 99% rename from operators/full_stokes/avx/P2ElementwiseFullStokes_0_2_apply_macro_3D.cpp rename to operators/full_stokes/avx/P2ElementwiseFullStokes_0_2_apply_P2ElementwiseFullStokes_0_2_macro_3D.cpp index f91882dd..c4fda7ce 100644 --- a/operators/full_stokes/avx/P2ElementwiseFullStokes_0_2_apply_macro_3D.cpp +++ b/operators/full_stokes/avx/P2ElementwiseFullStokes_0_2_apply_P2ElementwiseFullStokes_0_2_macro_3D.cpp @@ -41,6 +41,10 @@ + + + + @@ -54,7 +58,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseFullStokes_0_2::apply_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseFullStokes_0_2::apply_P2ElementwiseFullStokes_0_2_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.041666666666666657, 0.041666666666666657, 0.041666666666666657, 0.041666666666666657}; diff --git a/operators/full_stokes/avx/P2ElementwiseFullStokes_1_0_apply_macro_2D.cpp b/operators/full_stokes/avx/P2ElementwiseFullStokes_1_0_apply_P2ElementwiseFullStokes_1_0_macro_2D.cpp similarity index 99% rename from operators/full_stokes/avx/P2ElementwiseFullStokes_1_0_apply_macro_2D.cpp rename to operators/full_stokes/avx/P2ElementwiseFullStokes_1_0_apply_P2ElementwiseFullStokes_1_0_macro_2D.cpp index 197bdd29..b51195bb 100644 --- a/operators/full_stokes/avx/P2ElementwiseFullStokes_1_0_apply_macro_2D.cpp +++ b/operators/full_stokes/avx/P2ElementwiseFullStokes_1_0_apply_P2ElementwiseFullStokes_1_0_macro_2D.cpp @@ -41,6 +41,10 @@ + + + + @@ -54,7 +58,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseFullStokes_1_0::apply_macro_2D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseFullStokes_1_0::apply_P2ElementwiseFullStokes_1_0_macro_2D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.16666666666666666, 0.16666666666666666, 0.16666666666666666}; diff --git a/operators/full_stokes/avx/P2ElementwiseFullStokes_1_0_apply_macro_3D.cpp b/operators/full_stokes/avx/P2ElementwiseFullStokes_1_0_apply_P2ElementwiseFullStokes_1_0_macro_3D.cpp similarity index 99% rename from operators/full_stokes/avx/P2ElementwiseFullStokes_1_0_apply_macro_3D.cpp rename to operators/full_stokes/avx/P2ElementwiseFullStokes_1_0_apply_P2ElementwiseFullStokes_1_0_macro_3D.cpp index 1a4835c0..f4aa4d2b 100644 --- a/operators/full_stokes/avx/P2ElementwiseFullStokes_1_0_apply_macro_3D.cpp +++ b/operators/full_stokes/avx/P2ElementwiseFullStokes_1_0_apply_P2ElementwiseFullStokes_1_0_macro_3D.cpp @@ -41,6 +41,10 @@ + + + + @@ -54,7 +58,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseFullStokes_1_0::apply_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseFullStokes_1_0::apply_P2ElementwiseFullStokes_1_0_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.041666666666666657, 0.041666666666666657, 0.041666666666666657, 0.041666666666666657}; diff --git a/operators/full_stokes/avx/P2ElementwiseFullStokes_1_1_apply_macro_2D.cpp b/operators/full_stokes/avx/P2ElementwiseFullStokes_1_1_apply_P2ElementwiseFullStokes_1_1_macro_2D.cpp similarity index 99% rename from operators/full_stokes/avx/P2ElementwiseFullStokes_1_1_apply_macro_2D.cpp rename to operators/full_stokes/avx/P2ElementwiseFullStokes_1_1_apply_P2ElementwiseFullStokes_1_1_macro_2D.cpp index 921bacac..4af8f031 100644 --- a/operators/full_stokes/avx/P2ElementwiseFullStokes_1_1_apply_macro_2D.cpp +++ b/operators/full_stokes/avx/P2ElementwiseFullStokes_1_1_apply_P2ElementwiseFullStokes_1_1_macro_2D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseFullStokes_1_1::apply_macro_2D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseFullStokes_1_1::apply_P2ElementwiseFullStokes_1_1_macro_2D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.16666666666666666, 0.16666666666666666, 0.16666666666666666}; diff --git a/operators/full_stokes/avx/P2ElementwiseFullStokes_1_1_apply_macro_3D.cpp b/operators/full_stokes/avx/P2ElementwiseFullStokes_1_1_apply_P2ElementwiseFullStokes_1_1_macro_3D.cpp similarity index 99% rename from operators/full_stokes/avx/P2ElementwiseFullStokes_1_1_apply_macro_3D.cpp rename to operators/full_stokes/avx/P2ElementwiseFullStokes_1_1_apply_P2ElementwiseFullStokes_1_1_macro_3D.cpp index ca6d5eb5..91b223eb 100644 --- a/operators/full_stokes/avx/P2ElementwiseFullStokes_1_1_apply_macro_3D.cpp +++ b/operators/full_stokes/avx/P2ElementwiseFullStokes_1_1_apply_P2ElementwiseFullStokes_1_1_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseFullStokes_1_1::apply_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseFullStokes_1_1::apply_P2ElementwiseFullStokes_1_1_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.041666666666666657, 0.041666666666666657, 0.041666666666666657, 0.041666666666666657}; diff --git a/operators/full_stokes/avx/P2ElementwiseFullStokes_1_1_computeInverseDiagonalOperatorValues_macro_2D.cpp b/operators/full_stokes/avx/P2ElementwiseFullStokes_1_1_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokes_1_1_macro_2D.cpp similarity index 99% rename from operators/full_stokes/avx/P2ElementwiseFullStokes_1_1_computeInverseDiagonalOperatorValues_macro_2D.cpp rename to operators/full_stokes/avx/P2ElementwiseFullStokes_1_1_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokes_1_1_macro_2D.cpp index 6e71a669..8d9b3df2 100644 --- a/operators/full_stokes/avx/P2ElementwiseFullStokes_1_1_computeInverseDiagonalOperatorValues_macro_2D.cpp +++ b/operators/full_stokes/avx/P2ElementwiseFullStokes_1_1_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokes_1_1_macro_2D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseFullStokes_1_1::computeInverseDiagonalOperatorValues_macro_2D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseFullStokes_1_1::computeInverseDiagonalOperatorValues_P2ElementwiseFullStokes_1_1_macro_2D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.16666666666666666, 0.16666666666666666, 0.16666666666666666}; diff --git a/operators/full_stokes/avx/P2ElementwiseFullStokes_1_1_computeInverseDiagonalOperatorValues_macro_3D.cpp b/operators/full_stokes/avx/P2ElementwiseFullStokes_1_1_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokes_1_1_macro_3D.cpp similarity index 99% rename from operators/full_stokes/avx/P2ElementwiseFullStokes_1_1_computeInverseDiagonalOperatorValues_macro_3D.cpp rename to operators/full_stokes/avx/P2ElementwiseFullStokes_1_1_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokes_1_1_macro_3D.cpp index 7931c82b..729b593c 100644 --- a/operators/full_stokes/avx/P2ElementwiseFullStokes_1_1_computeInverseDiagonalOperatorValues_macro_3D.cpp +++ b/operators/full_stokes/avx/P2ElementwiseFullStokes_1_1_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokes_1_1_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseFullStokes_1_1::computeInverseDiagonalOperatorValues_macro_3D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseFullStokes_1_1::computeInverseDiagonalOperatorValues_P2ElementwiseFullStokes_1_1_macro_3D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.041666666666666657, 0.041666666666666657, 0.041666666666666657, 0.041666666666666657}; diff --git a/operators/full_stokes/avx/P2ElementwiseFullStokes_1_2_apply_macro_3D.cpp b/operators/full_stokes/avx/P2ElementwiseFullStokes_1_2_apply_P2ElementwiseFullStokes_1_2_macro_3D.cpp similarity index 99% rename from operators/full_stokes/avx/P2ElementwiseFullStokes_1_2_apply_macro_3D.cpp rename to operators/full_stokes/avx/P2ElementwiseFullStokes_1_2_apply_P2ElementwiseFullStokes_1_2_macro_3D.cpp index a0835f60..b6c06168 100644 --- a/operators/full_stokes/avx/P2ElementwiseFullStokes_1_2_apply_macro_3D.cpp +++ b/operators/full_stokes/avx/P2ElementwiseFullStokes_1_2_apply_P2ElementwiseFullStokes_1_2_macro_3D.cpp @@ -41,6 +41,10 @@ + + + + @@ -54,7 +58,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseFullStokes_1_2::apply_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseFullStokes_1_2::apply_P2ElementwiseFullStokes_1_2_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.041666666666666657, 0.041666666666666657, 0.041666666666666657, 0.041666666666666657}; diff --git a/operators/full_stokes/avx/P2ElementwiseFullStokes_2_0_apply_macro_3D.cpp b/operators/full_stokes/avx/P2ElementwiseFullStokes_2_0_apply_P2ElementwiseFullStokes_2_0_macro_3D.cpp similarity index 99% rename from operators/full_stokes/avx/P2ElementwiseFullStokes_2_0_apply_macro_3D.cpp rename to operators/full_stokes/avx/P2ElementwiseFullStokes_2_0_apply_P2ElementwiseFullStokes_2_0_macro_3D.cpp index 2f099c32..de5791d1 100644 --- a/operators/full_stokes/avx/P2ElementwiseFullStokes_2_0_apply_macro_3D.cpp +++ b/operators/full_stokes/avx/P2ElementwiseFullStokes_2_0_apply_P2ElementwiseFullStokes_2_0_macro_3D.cpp @@ -41,6 +41,10 @@ + + + + @@ -54,7 +58,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseFullStokes_2_0::apply_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseFullStokes_2_0::apply_P2ElementwiseFullStokes_2_0_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.041666666666666657, 0.041666666666666657, 0.041666666666666657, 0.041666666666666657}; diff --git a/operators/full_stokes/avx/P2ElementwiseFullStokes_2_1_apply_macro_3D.cpp b/operators/full_stokes/avx/P2ElementwiseFullStokes_2_1_apply_P2ElementwiseFullStokes_2_1_macro_3D.cpp similarity index 99% rename from operators/full_stokes/avx/P2ElementwiseFullStokes_2_1_apply_macro_3D.cpp rename to operators/full_stokes/avx/P2ElementwiseFullStokes_2_1_apply_P2ElementwiseFullStokes_2_1_macro_3D.cpp index 2c3c6d45..3d8e9bfb 100644 --- a/operators/full_stokes/avx/P2ElementwiseFullStokes_2_1_apply_macro_3D.cpp +++ b/operators/full_stokes/avx/P2ElementwiseFullStokes_2_1_apply_P2ElementwiseFullStokes_2_1_macro_3D.cpp @@ -41,6 +41,10 @@ + + + + @@ -54,7 +58,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseFullStokes_2_1::apply_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseFullStokes_2_1::apply_P2ElementwiseFullStokes_2_1_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.041666666666666657, 0.041666666666666657, 0.041666666666666657, 0.041666666666666657}; diff --git a/operators/full_stokes/avx/P2ElementwiseFullStokes_2_2_apply_macro_3D.cpp b/operators/full_stokes/avx/P2ElementwiseFullStokes_2_2_apply_P2ElementwiseFullStokes_2_2_macro_3D.cpp similarity index 99% rename from operators/full_stokes/avx/P2ElementwiseFullStokes_2_2_apply_macro_3D.cpp rename to operators/full_stokes/avx/P2ElementwiseFullStokes_2_2_apply_P2ElementwiseFullStokes_2_2_macro_3D.cpp index 7d065898..aa9338f2 100644 --- a/operators/full_stokes/avx/P2ElementwiseFullStokes_2_2_apply_macro_3D.cpp +++ b/operators/full_stokes/avx/P2ElementwiseFullStokes_2_2_apply_P2ElementwiseFullStokes_2_2_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseFullStokes_2_2::apply_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseFullStokes_2_2::apply_P2ElementwiseFullStokes_2_2_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.041666666666666657, 0.041666666666666657, 0.041666666666666657, 0.041666666666666657}; diff --git a/operators/full_stokes/avx/P2ElementwiseFullStokes_2_2_computeInverseDiagonalOperatorValues_macro_3D.cpp b/operators/full_stokes/avx/P2ElementwiseFullStokes_2_2_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokes_2_2_macro_3D.cpp similarity index 99% rename from operators/full_stokes/avx/P2ElementwiseFullStokes_2_2_computeInverseDiagonalOperatorValues_macro_3D.cpp rename to operators/full_stokes/avx/P2ElementwiseFullStokes_2_2_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokes_2_2_macro_3D.cpp index 926febf0..10a45775 100644 --- a/operators/full_stokes/avx/P2ElementwiseFullStokes_2_2_computeInverseDiagonalOperatorValues_macro_3D.cpp +++ b/operators/full_stokes/avx/P2ElementwiseFullStokes_2_2_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokes_2_2_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseFullStokes_2_2::computeInverseDiagonalOperatorValues_macro_3D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseFullStokes_2_2::computeInverseDiagonalOperatorValues_P2ElementwiseFullStokes_2_2_macro_3D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.041666666666666657, 0.041666666666666657, 0.041666666666666657, 0.041666666666666657}; diff --git a/operators/full_stokes/noarch/P2ElementwiseFullStokesAnnulusMap_0_0_apply_macro_2D.cpp b/operators/full_stokes/noarch/P2ElementwiseFullStokesAnnulusMap_0_0_apply_P2ElementwiseFullStokesAnnulusMap_0_0_macro_2D.cpp similarity index 99% rename from operators/full_stokes/noarch/P2ElementwiseFullStokesAnnulusMap_0_0_apply_macro_2D.cpp rename to operators/full_stokes/noarch/P2ElementwiseFullStokesAnnulusMap_0_0_apply_P2ElementwiseFullStokesAnnulusMap_0_0_macro_2D.cpp index fcfdb454..9b6ea9f3 100644 --- a/operators/full_stokes/noarch/P2ElementwiseFullStokesAnnulusMap_0_0_apply_macro_2D.cpp +++ b/operators/full_stokes/noarch/P2ElementwiseFullStokesAnnulusMap_0_0_apply_P2ElementwiseFullStokesAnnulusMap_0_0_macro_2D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseFullStokesAnnulusMap_0_0::apply_macro_2D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const +void P2ElementwiseFullStokesAnnulusMap_0_0::apply_P2ElementwiseFullStokesAnnulusMap_0_0_macro_2D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const { { const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669}; diff --git a/operators/full_stokes/noarch/P2ElementwiseFullStokesAnnulusMap_0_0_computeInverseDiagonalOperatorValues_macro_2D.cpp b/operators/full_stokes/noarch/P2ElementwiseFullStokesAnnulusMap_0_0_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokesAnnulusMap_0_0_macro_2D.cpp similarity index 98% rename from operators/full_stokes/noarch/P2ElementwiseFullStokesAnnulusMap_0_0_computeInverseDiagonalOperatorValues_macro_2D.cpp rename to operators/full_stokes/noarch/P2ElementwiseFullStokesAnnulusMap_0_0_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokesAnnulusMap_0_0_macro_2D.cpp index 9eb6d67f..ec6820a3 100644 --- a/operators/full_stokes/noarch/P2ElementwiseFullStokesAnnulusMap_0_0_computeInverseDiagonalOperatorValues_macro_2D.cpp +++ b/operators/full_stokes/noarch/P2ElementwiseFullStokesAnnulusMap_0_0_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokesAnnulusMap_0_0_macro_2D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseFullStokesAnnulusMap_0_0::computeInverseDiagonalOperatorValues_macro_2D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const +void P2ElementwiseFullStokesAnnulusMap_0_0::computeInverseDiagonalOperatorValues_P2ElementwiseFullStokesAnnulusMap_0_0_macro_2D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const { { const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669}; diff --git a/operators/full_stokes/noarch/P2ElementwiseFullStokesAnnulusMap_0_0_toMatrix_macro_2D.cpp b/operators/full_stokes/noarch/P2ElementwiseFullStokesAnnulusMap_0_0_toMatrix_P2ElementwiseFullStokesAnnulusMap_0_0_macro_2D.cpp similarity index 99% rename from operators/full_stokes/noarch/P2ElementwiseFullStokesAnnulusMap_0_0_toMatrix_macro_2D.cpp rename to operators/full_stokes/noarch/P2ElementwiseFullStokesAnnulusMap_0_0_toMatrix_P2ElementwiseFullStokesAnnulusMap_0_0_macro_2D.cpp index 3f9a5785..ad06d887 100644 --- a/operators/full_stokes/noarch/P2ElementwiseFullStokesAnnulusMap_0_0_toMatrix_macro_2D.cpp +++ b/operators/full_stokes/noarch/P2ElementwiseFullStokesAnnulusMap_0_0_toMatrix_P2ElementwiseFullStokesAnnulusMap_0_0_macro_2D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseFullStokesAnnulusMap_0_0::toMatrix_macro_2D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const +void P2ElementwiseFullStokesAnnulusMap_0_0::toMatrix_P2ElementwiseFullStokesAnnulusMap_0_0_macro_2D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const { { const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669}; diff --git a/operators/full_stokes/noarch/P2ElementwiseFullStokesAnnulusMap_0_1_apply_macro_2D.cpp b/operators/full_stokes/noarch/P2ElementwiseFullStokesAnnulusMap_0_1_apply_P2ElementwiseFullStokesAnnulusMap_0_1_macro_2D.cpp similarity index 99% rename from operators/full_stokes/noarch/P2ElementwiseFullStokesAnnulusMap_0_1_apply_macro_2D.cpp rename to operators/full_stokes/noarch/P2ElementwiseFullStokesAnnulusMap_0_1_apply_P2ElementwiseFullStokesAnnulusMap_0_1_macro_2D.cpp index 66828422..e0d4c035 100644 --- a/operators/full_stokes/noarch/P2ElementwiseFullStokesAnnulusMap_0_1_apply_macro_2D.cpp +++ b/operators/full_stokes/noarch/P2ElementwiseFullStokesAnnulusMap_0_1_apply_P2ElementwiseFullStokesAnnulusMap_0_1_macro_2D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseFullStokesAnnulusMap_0_1::apply_macro_2D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const +void P2ElementwiseFullStokesAnnulusMap_0_1::apply_P2ElementwiseFullStokesAnnulusMap_0_1_macro_2D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const { { const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669}; diff --git a/operators/full_stokes/noarch/P2ElementwiseFullStokesAnnulusMap_0_1_toMatrix_macro_2D.cpp b/operators/full_stokes/noarch/P2ElementwiseFullStokesAnnulusMap_0_1_toMatrix_P2ElementwiseFullStokesAnnulusMap_0_1_macro_2D.cpp similarity index 99% rename from operators/full_stokes/noarch/P2ElementwiseFullStokesAnnulusMap_0_1_toMatrix_macro_2D.cpp rename to operators/full_stokes/noarch/P2ElementwiseFullStokesAnnulusMap_0_1_toMatrix_P2ElementwiseFullStokesAnnulusMap_0_1_macro_2D.cpp index 14582315..f942f57d 100644 --- a/operators/full_stokes/noarch/P2ElementwiseFullStokesAnnulusMap_0_1_toMatrix_macro_2D.cpp +++ b/operators/full_stokes/noarch/P2ElementwiseFullStokesAnnulusMap_0_1_toMatrix_P2ElementwiseFullStokesAnnulusMap_0_1_macro_2D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseFullStokesAnnulusMap_0_1::toMatrix_macro_2D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const +void P2ElementwiseFullStokesAnnulusMap_0_1::toMatrix_P2ElementwiseFullStokesAnnulusMap_0_1_macro_2D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const { { const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669}; diff --git a/operators/full_stokes/noarch/P2ElementwiseFullStokesAnnulusMap_1_0_apply_macro_2D.cpp b/operators/full_stokes/noarch/P2ElementwiseFullStokesAnnulusMap_1_0_apply_P2ElementwiseFullStokesAnnulusMap_1_0_macro_2D.cpp similarity index 99% rename from operators/full_stokes/noarch/P2ElementwiseFullStokesAnnulusMap_1_0_apply_macro_2D.cpp rename to operators/full_stokes/noarch/P2ElementwiseFullStokesAnnulusMap_1_0_apply_P2ElementwiseFullStokesAnnulusMap_1_0_macro_2D.cpp index 9b0d1793..cad742bd 100644 --- a/operators/full_stokes/noarch/P2ElementwiseFullStokesAnnulusMap_1_0_apply_macro_2D.cpp +++ b/operators/full_stokes/noarch/P2ElementwiseFullStokesAnnulusMap_1_0_apply_P2ElementwiseFullStokesAnnulusMap_1_0_macro_2D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseFullStokesAnnulusMap_1_0::apply_macro_2D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const +void P2ElementwiseFullStokesAnnulusMap_1_0::apply_P2ElementwiseFullStokesAnnulusMap_1_0_macro_2D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const { { const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669}; diff --git a/operators/full_stokes/noarch/P2ElementwiseFullStokesAnnulusMap_1_0_toMatrix_macro_2D.cpp b/operators/full_stokes/noarch/P2ElementwiseFullStokesAnnulusMap_1_0_toMatrix_P2ElementwiseFullStokesAnnulusMap_1_0_macro_2D.cpp similarity index 99% rename from operators/full_stokes/noarch/P2ElementwiseFullStokesAnnulusMap_1_0_toMatrix_macro_2D.cpp rename to operators/full_stokes/noarch/P2ElementwiseFullStokesAnnulusMap_1_0_toMatrix_P2ElementwiseFullStokesAnnulusMap_1_0_macro_2D.cpp index d0bebd24..1e9c19f6 100644 --- a/operators/full_stokes/noarch/P2ElementwiseFullStokesAnnulusMap_1_0_toMatrix_macro_2D.cpp +++ b/operators/full_stokes/noarch/P2ElementwiseFullStokesAnnulusMap_1_0_toMatrix_P2ElementwiseFullStokesAnnulusMap_1_0_macro_2D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseFullStokesAnnulusMap_1_0::toMatrix_macro_2D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const +void P2ElementwiseFullStokesAnnulusMap_1_0::toMatrix_P2ElementwiseFullStokesAnnulusMap_1_0_macro_2D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const { { const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669}; diff --git a/operators/full_stokes/noarch/P2ElementwiseFullStokesAnnulusMap_1_1_apply_macro_2D.cpp b/operators/full_stokes/noarch/P2ElementwiseFullStokesAnnulusMap_1_1_apply_P2ElementwiseFullStokesAnnulusMap_1_1_macro_2D.cpp similarity index 99% rename from operators/full_stokes/noarch/P2ElementwiseFullStokesAnnulusMap_1_1_apply_macro_2D.cpp rename to operators/full_stokes/noarch/P2ElementwiseFullStokesAnnulusMap_1_1_apply_P2ElementwiseFullStokesAnnulusMap_1_1_macro_2D.cpp index 979ed833..6d453b80 100644 --- a/operators/full_stokes/noarch/P2ElementwiseFullStokesAnnulusMap_1_1_apply_macro_2D.cpp +++ b/operators/full_stokes/noarch/P2ElementwiseFullStokesAnnulusMap_1_1_apply_P2ElementwiseFullStokesAnnulusMap_1_1_macro_2D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseFullStokesAnnulusMap_1_1::apply_macro_2D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const +void P2ElementwiseFullStokesAnnulusMap_1_1::apply_P2ElementwiseFullStokesAnnulusMap_1_1_macro_2D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const { { const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669}; diff --git a/operators/full_stokes/noarch/P2ElementwiseFullStokesAnnulusMap_1_1_computeInverseDiagonalOperatorValues_macro_2D.cpp b/operators/full_stokes/noarch/P2ElementwiseFullStokesAnnulusMap_1_1_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokesAnnulusMap_1_1_macro_2D.cpp similarity index 98% rename from operators/full_stokes/noarch/P2ElementwiseFullStokesAnnulusMap_1_1_computeInverseDiagonalOperatorValues_macro_2D.cpp rename to operators/full_stokes/noarch/P2ElementwiseFullStokesAnnulusMap_1_1_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokesAnnulusMap_1_1_macro_2D.cpp index 76943090..f2039f44 100644 --- a/operators/full_stokes/noarch/P2ElementwiseFullStokesAnnulusMap_1_1_computeInverseDiagonalOperatorValues_macro_2D.cpp +++ b/operators/full_stokes/noarch/P2ElementwiseFullStokesAnnulusMap_1_1_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokesAnnulusMap_1_1_macro_2D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseFullStokesAnnulusMap_1_1::computeInverseDiagonalOperatorValues_macro_2D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const +void P2ElementwiseFullStokesAnnulusMap_1_1::computeInverseDiagonalOperatorValues_P2ElementwiseFullStokesAnnulusMap_1_1_macro_2D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const { { const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669}; diff --git a/operators/full_stokes/noarch/P2ElementwiseFullStokesAnnulusMap_1_1_toMatrix_macro_2D.cpp b/operators/full_stokes/noarch/P2ElementwiseFullStokesAnnulusMap_1_1_toMatrix_P2ElementwiseFullStokesAnnulusMap_1_1_macro_2D.cpp similarity index 99% rename from operators/full_stokes/noarch/P2ElementwiseFullStokesAnnulusMap_1_1_toMatrix_macro_2D.cpp rename to operators/full_stokes/noarch/P2ElementwiseFullStokesAnnulusMap_1_1_toMatrix_P2ElementwiseFullStokesAnnulusMap_1_1_macro_2D.cpp index 2a2aa192..21eb2177 100644 --- a/operators/full_stokes/noarch/P2ElementwiseFullStokesAnnulusMap_1_1_toMatrix_macro_2D.cpp +++ b/operators/full_stokes/noarch/P2ElementwiseFullStokesAnnulusMap_1_1_toMatrix_P2ElementwiseFullStokesAnnulusMap_1_1_macro_2D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseFullStokesAnnulusMap_1_1::toMatrix_macro_2D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const +void P2ElementwiseFullStokesAnnulusMap_1_1::toMatrix_P2ElementwiseFullStokesAnnulusMap_1_1_macro_2D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const { { const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669}; diff --git a/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_0_0_apply_macro_3D.cpp b/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_0_0_apply_P2ElementwiseFullStokesIcosahedralShellMap_0_0_macro_3D.cpp similarity index 99% rename from operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_0_0_apply_macro_3D.cpp rename to operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_0_0_apply_P2ElementwiseFullStokesIcosahedralShellMap_0_0_macro_3D.cpp index 81f22134..c8a4bbe1 100644 --- a/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_0_0_apply_macro_3D.cpp +++ b/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_0_0_apply_P2ElementwiseFullStokesIcosahedralShellMap_0_0_macro_3D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseFullStokesIcosahedralShellMap_0_0::apply_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const +void P2ElementwiseFullStokesIcosahedralShellMap_0_0::apply_P2ElementwiseFullStokesIcosahedralShellMap_0_0_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const { { const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983}; diff --git a/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_0_0_computeInverseDiagonalOperatorValues_macro_3D.cpp b/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_0_0_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokesIcosahedralShellMap_0_0_macro_3D.cpp similarity index 99% rename from operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_0_0_computeInverseDiagonalOperatorValues_macro_3D.cpp rename to operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_0_0_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokesIcosahedralShellMap_0_0_macro_3D.cpp index f3df1c09..acccfd6f 100644 --- a/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_0_0_computeInverseDiagonalOperatorValues_macro_3D.cpp +++ b/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_0_0_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokesIcosahedralShellMap_0_0_macro_3D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseFullStokesIcosahedralShellMap_0_0::computeInverseDiagonalOperatorValues_macro_3D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const +void P2ElementwiseFullStokesIcosahedralShellMap_0_0::computeInverseDiagonalOperatorValues_P2ElementwiseFullStokesIcosahedralShellMap_0_0_macro_3D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const { { const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983}; diff --git a/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_0_0_toMatrix_macro_3D.cpp b/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_0_0_toMatrix_P2ElementwiseFullStokesIcosahedralShellMap_0_0_macro_3D.cpp similarity index 99% rename from operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_0_0_toMatrix_macro_3D.cpp rename to operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_0_0_toMatrix_P2ElementwiseFullStokesIcosahedralShellMap_0_0_macro_3D.cpp index a9d9a1f5..d5ad8eb1 100644 --- a/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_0_0_toMatrix_macro_3D.cpp +++ b/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_0_0_toMatrix_P2ElementwiseFullStokesIcosahedralShellMap_0_0_macro_3D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseFullStokesIcosahedralShellMap_0_0::toMatrix_macro_3D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const +void P2ElementwiseFullStokesIcosahedralShellMap_0_0::toMatrix_P2ElementwiseFullStokesIcosahedralShellMap_0_0_macro_3D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const { { const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983}; diff --git a/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_0_1_apply_macro_3D.cpp b/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_0_1_apply_P2ElementwiseFullStokesIcosahedralShellMap_0_1_macro_3D.cpp similarity index 99% rename from operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_0_1_apply_macro_3D.cpp rename to operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_0_1_apply_P2ElementwiseFullStokesIcosahedralShellMap_0_1_macro_3D.cpp index f044c0ae..04de9f72 100644 --- a/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_0_1_apply_macro_3D.cpp +++ b/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_0_1_apply_P2ElementwiseFullStokesIcosahedralShellMap_0_1_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseFullStokesIcosahedralShellMap_0_1::apply_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const +void P2ElementwiseFullStokesIcosahedralShellMap_0_1::apply_P2ElementwiseFullStokesIcosahedralShellMap_0_1_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const { { const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983}; diff --git a/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_0_1_toMatrix_macro_3D.cpp b/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_0_1_toMatrix_P2ElementwiseFullStokesIcosahedralShellMap_0_1_macro_3D.cpp similarity index 99% rename from operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_0_1_toMatrix_macro_3D.cpp rename to operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_0_1_toMatrix_P2ElementwiseFullStokesIcosahedralShellMap_0_1_macro_3D.cpp index 70b5b051..3f23db97 100644 --- a/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_0_1_toMatrix_macro_3D.cpp +++ b/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_0_1_toMatrix_P2ElementwiseFullStokesIcosahedralShellMap_0_1_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseFullStokesIcosahedralShellMap_0_1::toMatrix_macro_3D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const +void P2ElementwiseFullStokesIcosahedralShellMap_0_1::toMatrix_P2ElementwiseFullStokesIcosahedralShellMap_0_1_macro_3D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const { { const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983}; diff --git a/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_0_2_apply_macro_3D.cpp b/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_0_2_apply_P2ElementwiseFullStokesIcosahedralShellMap_0_2_macro_3D.cpp similarity index 99% rename from operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_0_2_apply_macro_3D.cpp rename to operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_0_2_apply_P2ElementwiseFullStokesIcosahedralShellMap_0_2_macro_3D.cpp index 1e0cc9b7..b19089c4 100644 --- a/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_0_2_apply_macro_3D.cpp +++ b/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_0_2_apply_P2ElementwiseFullStokesIcosahedralShellMap_0_2_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseFullStokesIcosahedralShellMap_0_2::apply_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const +void P2ElementwiseFullStokesIcosahedralShellMap_0_2::apply_P2ElementwiseFullStokesIcosahedralShellMap_0_2_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const { { const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983}; diff --git a/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_0_2_toMatrix_macro_3D.cpp b/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_0_2_toMatrix_P2ElementwiseFullStokesIcosahedralShellMap_0_2_macro_3D.cpp similarity index 99% rename from operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_0_2_toMatrix_macro_3D.cpp rename to operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_0_2_toMatrix_P2ElementwiseFullStokesIcosahedralShellMap_0_2_macro_3D.cpp index b5b1070e..aa32fff8 100644 --- a/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_0_2_toMatrix_macro_3D.cpp +++ b/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_0_2_toMatrix_P2ElementwiseFullStokesIcosahedralShellMap_0_2_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseFullStokesIcosahedralShellMap_0_2::toMatrix_macro_3D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const +void P2ElementwiseFullStokesIcosahedralShellMap_0_2::toMatrix_P2ElementwiseFullStokesIcosahedralShellMap_0_2_macro_3D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const { { const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983}; diff --git a/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_1_0_apply_macro_3D.cpp b/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_1_0_apply_P2ElementwiseFullStokesIcosahedralShellMap_1_0_macro_3D.cpp similarity index 99% rename from operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_1_0_apply_macro_3D.cpp rename to operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_1_0_apply_P2ElementwiseFullStokesIcosahedralShellMap_1_0_macro_3D.cpp index 333fd1e8..74b56c62 100644 --- a/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_1_0_apply_macro_3D.cpp +++ b/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_1_0_apply_P2ElementwiseFullStokesIcosahedralShellMap_1_0_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseFullStokesIcosahedralShellMap_1_0::apply_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const +void P2ElementwiseFullStokesIcosahedralShellMap_1_0::apply_P2ElementwiseFullStokesIcosahedralShellMap_1_0_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const { { const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983}; diff --git a/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_1_0_toMatrix_macro_3D.cpp b/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_1_0_toMatrix_P2ElementwiseFullStokesIcosahedralShellMap_1_0_macro_3D.cpp similarity index 99% rename from operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_1_0_toMatrix_macro_3D.cpp rename to operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_1_0_toMatrix_P2ElementwiseFullStokesIcosahedralShellMap_1_0_macro_3D.cpp index d72855f6..a202968d 100644 --- a/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_1_0_toMatrix_macro_3D.cpp +++ b/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_1_0_toMatrix_P2ElementwiseFullStokesIcosahedralShellMap_1_0_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseFullStokesIcosahedralShellMap_1_0::toMatrix_macro_3D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const +void P2ElementwiseFullStokesIcosahedralShellMap_1_0::toMatrix_P2ElementwiseFullStokesIcosahedralShellMap_1_0_macro_3D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const { { const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983}; diff --git a/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_1_1_apply_macro_3D.cpp b/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_1_1_apply_P2ElementwiseFullStokesIcosahedralShellMap_1_1_macro_3D.cpp similarity index 99% rename from operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_1_1_apply_macro_3D.cpp rename to operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_1_1_apply_P2ElementwiseFullStokesIcosahedralShellMap_1_1_macro_3D.cpp index a57a3fd2..772db896 100644 --- a/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_1_1_apply_macro_3D.cpp +++ b/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_1_1_apply_P2ElementwiseFullStokesIcosahedralShellMap_1_1_macro_3D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseFullStokesIcosahedralShellMap_1_1::apply_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const +void P2ElementwiseFullStokesIcosahedralShellMap_1_1::apply_P2ElementwiseFullStokesIcosahedralShellMap_1_1_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const { { const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983}; diff --git a/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_1_1_computeInverseDiagonalOperatorValues_macro_3D.cpp b/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_1_1_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokesIcosahedralShellMap_1_1_macro_3D.cpp similarity index 99% rename from operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_1_1_computeInverseDiagonalOperatorValues_macro_3D.cpp rename to operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_1_1_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokesIcosahedralShellMap_1_1_macro_3D.cpp index bb0882fa..662bd4a7 100644 --- a/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_1_1_computeInverseDiagonalOperatorValues_macro_3D.cpp +++ b/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_1_1_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokesIcosahedralShellMap_1_1_macro_3D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseFullStokesIcosahedralShellMap_1_1::computeInverseDiagonalOperatorValues_macro_3D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const +void P2ElementwiseFullStokesIcosahedralShellMap_1_1::computeInverseDiagonalOperatorValues_P2ElementwiseFullStokesIcosahedralShellMap_1_1_macro_3D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const { { const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983}; diff --git a/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_1_1_toMatrix_macro_3D.cpp b/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_1_1_toMatrix_P2ElementwiseFullStokesIcosahedralShellMap_1_1_macro_3D.cpp similarity index 99% rename from operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_1_1_toMatrix_macro_3D.cpp rename to operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_1_1_toMatrix_P2ElementwiseFullStokesIcosahedralShellMap_1_1_macro_3D.cpp index 434add40..b8eb8580 100644 --- a/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_1_1_toMatrix_macro_3D.cpp +++ b/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_1_1_toMatrix_P2ElementwiseFullStokesIcosahedralShellMap_1_1_macro_3D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseFullStokesIcosahedralShellMap_1_1::toMatrix_macro_3D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const +void P2ElementwiseFullStokesIcosahedralShellMap_1_1::toMatrix_P2ElementwiseFullStokesIcosahedralShellMap_1_1_macro_3D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const { { const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983}; diff --git a/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_1_2_apply_macro_3D.cpp b/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_1_2_apply_P2ElementwiseFullStokesIcosahedralShellMap_1_2_macro_3D.cpp similarity index 99% rename from operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_1_2_apply_macro_3D.cpp rename to operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_1_2_apply_P2ElementwiseFullStokesIcosahedralShellMap_1_2_macro_3D.cpp index 8b23ea41..648491d4 100644 --- a/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_1_2_apply_macro_3D.cpp +++ b/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_1_2_apply_P2ElementwiseFullStokesIcosahedralShellMap_1_2_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseFullStokesIcosahedralShellMap_1_2::apply_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const +void P2ElementwiseFullStokesIcosahedralShellMap_1_2::apply_P2ElementwiseFullStokesIcosahedralShellMap_1_2_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const { { const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983}; diff --git a/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_1_2_toMatrix_macro_3D.cpp b/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_1_2_toMatrix_P2ElementwiseFullStokesIcosahedralShellMap_1_2_macro_3D.cpp similarity index 99% rename from operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_1_2_toMatrix_macro_3D.cpp rename to operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_1_2_toMatrix_P2ElementwiseFullStokesIcosahedralShellMap_1_2_macro_3D.cpp index fd776b0a..2cef19c3 100644 --- a/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_1_2_toMatrix_macro_3D.cpp +++ b/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_1_2_toMatrix_P2ElementwiseFullStokesIcosahedralShellMap_1_2_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseFullStokesIcosahedralShellMap_1_2::toMatrix_macro_3D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const +void P2ElementwiseFullStokesIcosahedralShellMap_1_2::toMatrix_P2ElementwiseFullStokesIcosahedralShellMap_1_2_macro_3D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const { { const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983}; diff --git a/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_2_0_apply_macro_3D.cpp b/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_2_0_apply_P2ElementwiseFullStokesIcosahedralShellMap_2_0_macro_3D.cpp similarity index 99% rename from operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_2_0_apply_macro_3D.cpp rename to operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_2_0_apply_P2ElementwiseFullStokesIcosahedralShellMap_2_0_macro_3D.cpp index 2434f82c..b92b8c5b 100644 --- a/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_2_0_apply_macro_3D.cpp +++ b/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_2_0_apply_P2ElementwiseFullStokesIcosahedralShellMap_2_0_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseFullStokesIcosahedralShellMap_2_0::apply_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const +void P2ElementwiseFullStokesIcosahedralShellMap_2_0::apply_P2ElementwiseFullStokesIcosahedralShellMap_2_0_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const { { const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983}; diff --git a/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_2_0_toMatrix_macro_3D.cpp b/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_2_0_toMatrix_P2ElementwiseFullStokesIcosahedralShellMap_2_0_macro_3D.cpp similarity index 99% rename from operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_2_0_toMatrix_macro_3D.cpp rename to operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_2_0_toMatrix_P2ElementwiseFullStokesIcosahedralShellMap_2_0_macro_3D.cpp index 2e9034d4..9cbc1b0c 100644 --- a/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_2_0_toMatrix_macro_3D.cpp +++ b/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_2_0_toMatrix_P2ElementwiseFullStokesIcosahedralShellMap_2_0_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseFullStokesIcosahedralShellMap_2_0::toMatrix_macro_3D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const +void P2ElementwiseFullStokesIcosahedralShellMap_2_0::toMatrix_P2ElementwiseFullStokesIcosahedralShellMap_2_0_macro_3D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const { { const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983}; diff --git a/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_2_1_apply_macro_3D.cpp b/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_2_1_apply_P2ElementwiseFullStokesIcosahedralShellMap_2_1_macro_3D.cpp similarity index 99% rename from operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_2_1_apply_macro_3D.cpp rename to operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_2_1_apply_P2ElementwiseFullStokesIcosahedralShellMap_2_1_macro_3D.cpp index 5755a5ae..3d4c2235 100644 --- a/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_2_1_apply_macro_3D.cpp +++ b/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_2_1_apply_P2ElementwiseFullStokesIcosahedralShellMap_2_1_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseFullStokesIcosahedralShellMap_2_1::apply_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const +void P2ElementwiseFullStokesIcosahedralShellMap_2_1::apply_P2ElementwiseFullStokesIcosahedralShellMap_2_1_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const { { const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983}; diff --git a/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_2_1_toMatrix_macro_3D.cpp b/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_2_1_toMatrix_P2ElementwiseFullStokesIcosahedralShellMap_2_1_macro_3D.cpp similarity index 99% rename from operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_2_1_toMatrix_macro_3D.cpp rename to operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_2_1_toMatrix_P2ElementwiseFullStokesIcosahedralShellMap_2_1_macro_3D.cpp index 2eb5019f..8ef485bc 100644 --- a/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_2_1_toMatrix_macro_3D.cpp +++ b/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_2_1_toMatrix_P2ElementwiseFullStokesIcosahedralShellMap_2_1_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseFullStokesIcosahedralShellMap_2_1::toMatrix_macro_3D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const +void P2ElementwiseFullStokesIcosahedralShellMap_2_1::toMatrix_P2ElementwiseFullStokesIcosahedralShellMap_2_1_macro_3D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const { { const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983}; diff --git a/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_2_2_apply_macro_3D.cpp b/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_2_2_apply_P2ElementwiseFullStokesIcosahedralShellMap_2_2_macro_3D.cpp similarity index 99% rename from operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_2_2_apply_macro_3D.cpp rename to operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_2_2_apply_P2ElementwiseFullStokesIcosahedralShellMap_2_2_macro_3D.cpp index e84963ef..b9cd02ce 100644 --- a/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_2_2_apply_macro_3D.cpp +++ b/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_2_2_apply_P2ElementwiseFullStokesIcosahedralShellMap_2_2_macro_3D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseFullStokesIcosahedralShellMap_2_2::apply_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const +void P2ElementwiseFullStokesIcosahedralShellMap_2_2::apply_P2ElementwiseFullStokesIcosahedralShellMap_2_2_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const { { const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983}; diff --git a/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_2_2_computeInverseDiagonalOperatorValues_macro_3D.cpp b/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_2_2_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokesIcosahedralShellMap_2_2_macro_3D.cpp similarity index 99% rename from operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_2_2_computeInverseDiagonalOperatorValues_macro_3D.cpp rename to operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_2_2_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokesIcosahedralShellMap_2_2_macro_3D.cpp index 520de317..65a69f3e 100644 --- a/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_2_2_computeInverseDiagonalOperatorValues_macro_3D.cpp +++ b/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_2_2_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokesIcosahedralShellMap_2_2_macro_3D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseFullStokesIcosahedralShellMap_2_2::computeInverseDiagonalOperatorValues_macro_3D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const +void P2ElementwiseFullStokesIcosahedralShellMap_2_2::computeInverseDiagonalOperatorValues_P2ElementwiseFullStokesIcosahedralShellMap_2_2_macro_3D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const { { const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983}; diff --git a/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_2_2_toMatrix_macro_3D.cpp b/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_2_2_toMatrix_P2ElementwiseFullStokesIcosahedralShellMap_2_2_macro_3D.cpp similarity index 99% rename from operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_2_2_toMatrix_macro_3D.cpp rename to operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_2_2_toMatrix_P2ElementwiseFullStokesIcosahedralShellMap_2_2_macro_3D.cpp index 86ee979c..3946b762 100644 --- a/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_2_2_toMatrix_macro_3D.cpp +++ b/operators/full_stokes/noarch/P2ElementwiseFullStokesIcosahedralShellMap_2_2_toMatrix_P2ElementwiseFullStokesIcosahedralShellMap_2_2_macro_3D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseFullStokesIcosahedralShellMap_2_2::toMatrix_macro_3D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const +void P2ElementwiseFullStokesIcosahedralShellMap_2_2::toMatrix_P2ElementwiseFullStokesIcosahedralShellMap_2_2_macro_3D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const { { const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983}; diff --git a/operators/full_stokes/noarch/P2ElementwiseFullStokes_0_0_apply_macro_2D.cpp b/operators/full_stokes/noarch/P2ElementwiseFullStokes_0_0_apply_P2ElementwiseFullStokes_0_0_macro_2D.cpp similarity index 99% rename from operators/full_stokes/noarch/P2ElementwiseFullStokes_0_0_apply_macro_2D.cpp rename to operators/full_stokes/noarch/P2ElementwiseFullStokes_0_0_apply_P2ElementwiseFullStokes_0_0_macro_2D.cpp index 6b4d7d52..61bacabf 100644 --- a/operators/full_stokes/noarch/P2ElementwiseFullStokes_0_0_apply_macro_2D.cpp +++ b/operators/full_stokes/noarch/P2ElementwiseFullStokes_0_0_apply_P2ElementwiseFullStokes_0_0_macro_2D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseFullStokes_0_0::apply_macro_2D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseFullStokes_0_0::apply_P2ElementwiseFullStokes_0_0_macro_2D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.16666666666666666, 0.16666666666666666, 0.16666666666666666}; diff --git a/operators/full_stokes/noarch/P2ElementwiseFullStokes_0_0_apply_macro_3D.cpp b/operators/full_stokes/noarch/P2ElementwiseFullStokes_0_0_apply_P2ElementwiseFullStokes_0_0_macro_3D.cpp similarity index 99% rename from operators/full_stokes/noarch/P2ElementwiseFullStokes_0_0_apply_macro_3D.cpp rename to operators/full_stokes/noarch/P2ElementwiseFullStokes_0_0_apply_P2ElementwiseFullStokes_0_0_macro_3D.cpp index 1aff6791..4a0288c3 100644 --- a/operators/full_stokes/noarch/P2ElementwiseFullStokes_0_0_apply_macro_3D.cpp +++ b/operators/full_stokes/noarch/P2ElementwiseFullStokes_0_0_apply_P2ElementwiseFullStokes_0_0_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseFullStokes_0_0::apply_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseFullStokes_0_0::apply_P2ElementwiseFullStokes_0_0_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.041666666666666657, 0.041666666666666657, 0.041666666666666657, 0.041666666666666657}; diff --git a/operators/full_stokes/noarch/P2ElementwiseFullStokes_0_0_computeInverseDiagonalOperatorValues_macro_2D.cpp b/operators/full_stokes/noarch/P2ElementwiseFullStokes_0_0_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokes_0_0_macro_2D.cpp similarity index 99% rename from operators/full_stokes/noarch/P2ElementwiseFullStokes_0_0_computeInverseDiagonalOperatorValues_macro_2D.cpp rename to operators/full_stokes/noarch/P2ElementwiseFullStokes_0_0_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokes_0_0_macro_2D.cpp index c9ff205f..06a99e0d 100644 --- a/operators/full_stokes/noarch/P2ElementwiseFullStokes_0_0_computeInverseDiagonalOperatorValues_macro_2D.cpp +++ b/operators/full_stokes/noarch/P2ElementwiseFullStokes_0_0_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokes_0_0_macro_2D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseFullStokes_0_0::computeInverseDiagonalOperatorValues_macro_2D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseFullStokes_0_0::computeInverseDiagonalOperatorValues_P2ElementwiseFullStokes_0_0_macro_2D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.16666666666666666, 0.16666666666666666, 0.16666666666666666}; diff --git a/operators/full_stokes/noarch/P2ElementwiseFullStokes_0_0_computeInverseDiagonalOperatorValues_macro_3D.cpp b/operators/full_stokes/noarch/P2ElementwiseFullStokes_0_0_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokes_0_0_macro_3D.cpp similarity index 99% rename from operators/full_stokes/noarch/P2ElementwiseFullStokes_0_0_computeInverseDiagonalOperatorValues_macro_3D.cpp rename to operators/full_stokes/noarch/P2ElementwiseFullStokes_0_0_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokes_0_0_macro_3D.cpp index 32cc6f0d..449bf148 100644 --- a/operators/full_stokes/noarch/P2ElementwiseFullStokes_0_0_computeInverseDiagonalOperatorValues_macro_3D.cpp +++ b/operators/full_stokes/noarch/P2ElementwiseFullStokes_0_0_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokes_0_0_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseFullStokes_0_0::computeInverseDiagonalOperatorValues_macro_3D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseFullStokes_0_0::computeInverseDiagonalOperatorValues_P2ElementwiseFullStokes_0_0_macro_3D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.041666666666666657, 0.041666666666666657, 0.041666666666666657, 0.041666666666666657}; diff --git a/operators/full_stokes/noarch/P2ElementwiseFullStokes_0_0_toMatrix_macro_2D.cpp b/operators/full_stokes/noarch/P2ElementwiseFullStokes_0_0_toMatrix_P2ElementwiseFullStokes_0_0_macro_2D.cpp similarity index 99% rename from operators/full_stokes/noarch/P2ElementwiseFullStokes_0_0_toMatrix_macro_2D.cpp rename to operators/full_stokes/noarch/P2ElementwiseFullStokes_0_0_toMatrix_P2ElementwiseFullStokes_0_0_macro_2D.cpp index a4527e17..95ab9e88 100644 --- a/operators/full_stokes/noarch/P2ElementwiseFullStokes_0_0_toMatrix_macro_2D.cpp +++ b/operators/full_stokes/noarch/P2ElementwiseFullStokes_0_0_toMatrix_P2ElementwiseFullStokes_0_0_macro_2D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseFullStokes_0_0::toMatrix_macro_2D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseFullStokes_0_0::toMatrix_P2ElementwiseFullStokes_0_0_macro_2D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.16666666666666666, 0.16666666666666666, 0.16666666666666666}; diff --git a/operators/full_stokes/noarch/P2ElementwiseFullStokes_0_0_toMatrix_macro_3D.cpp b/operators/full_stokes/noarch/P2ElementwiseFullStokes_0_0_toMatrix_P2ElementwiseFullStokes_0_0_macro_3D.cpp similarity index 99% rename from operators/full_stokes/noarch/P2ElementwiseFullStokes_0_0_toMatrix_macro_3D.cpp rename to operators/full_stokes/noarch/P2ElementwiseFullStokes_0_0_toMatrix_P2ElementwiseFullStokes_0_0_macro_3D.cpp index 89a2608e..79fc6e1b 100644 --- a/operators/full_stokes/noarch/P2ElementwiseFullStokes_0_0_toMatrix_macro_3D.cpp +++ b/operators/full_stokes/noarch/P2ElementwiseFullStokes_0_0_toMatrix_P2ElementwiseFullStokes_0_0_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseFullStokes_0_0::toMatrix_macro_3D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseFullStokes_0_0::toMatrix_P2ElementwiseFullStokes_0_0_macro_3D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.041666666666666657, 0.041666666666666657, 0.041666666666666657, 0.041666666666666657}; diff --git a/operators/full_stokes/noarch/P2ElementwiseFullStokes_0_1_apply_macro_2D.cpp b/operators/full_stokes/noarch/P2ElementwiseFullStokes_0_1_apply_P2ElementwiseFullStokes_0_1_macro_2D.cpp similarity index 99% rename from operators/full_stokes/noarch/P2ElementwiseFullStokes_0_1_apply_macro_2D.cpp rename to operators/full_stokes/noarch/P2ElementwiseFullStokes_0_1_apply_P2ElementwiseFullStokes_0_1_macro_2D.cpp index 60d01a6e..d7f1666e 100644 --- a/operators/full_stokes/noarch/P2ElementwiseFullStokes_0_1_apply_macro_2D.cpp +++ b/operators/full_stokes/noarch/P2ElementwiseFullStokes_0_1_apply_P2ElementwiseFullStokes_0_1_macro_2D.cpp @@ -41,6 +41,10 @@ + + + + @@ -54,7 +58,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseFullStokes_0_1::apply_macro_2D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseFullStokes_0_1::apply_P2ElementwiseFullStokes_0_1_macro_2D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.16666666666666666, 0.16666666666666666, 0.16666666666666666}; diff --git a/operators/full_stokes/noarch/P2ElementwiseFullStokes_0_1_apply_macro_3D.cpp b/operators/full_stokes/noarch/P2ElementwiseFullStokes_0_1_apply_P2ElementwiseFullStokes_0_1_macro_3D.cpp similarity index 99% rename from operators/full_stokes/noarch/P2ElementwiseFullStokes_0_1_apply_macro_3D.cpp rename to operators/full_stokes/noarch/P2ElementwiseFullStokes_0_1_apply_P2ElementwiseFullStokes_0_1_macro_3D.cpp index 270ad16c..bfdfc243 100644 --- a/operators/full_stokes/noarch/P2ElementwiseFullStokes_0_1_apply_macro_3D.cpp +++ b/operators/full_stokes/noarch/P2ElementwiseFullStokes_0_1_apply_P2ElementwiseFullStokes_0_1_macro_3D.cpp @@ -41,6 +41,10 @@ + + + + @@ -54,7 +58,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseFullStokes_0_1::apply_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseFullStokes_0_1::apply_P2ElementwiseFullStokes_0_1_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.041666666666666657, 0.041666666666666657, 0.041666666666666657, 0.041666666666666657}; diff --git a/operators/full_stokes/noarch/P2ElementwiseFullStokes_0_1_toMatrix_macro_2D.cpp b/operators/full_stokes/noarch/P2ElementwiseFullStokes_0_1_toMatrix_P2ElementwiseFullStokes_0_1_macro_2D.cpp similarity index 99% rename from operators/full_stokes/noarch/P2ElementwiseFullStokes_0_1_toMatrix_macro_2D.cpp rename to operators/full_stokes/noarch/P2ElementwiseFullStokes_0_1_toMatrix_P2ElementwiseFullStokes_0_1_macro_2D.cpp index 9c47d963..d01b881d 100644 --- a/operators/full_stokes/noarch/P2ElementwiseFullStokes_0_1_toMatrix_macro_2D.cpp +++ b/operators/full_stokes/noarch/P2ElementwiseFullStokes_0_1_toMatrix_P2ElementwiseFullStokes_0_1_macro_2D.cpp @@ -41,6 +41,10 @@ + + + + @@ -54,7 +58,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseFullStokes_0_1::toMatrix_macro_2D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseFullStokes_0_1::toMatrix_P2ElementwiseFullStokes_0_1_macro_2D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.16666666666666666, 0.16666666666666666, 0.16666666666666666}; diff --git a/operators/full_stokes/noarch/P2ElementwiseFullStokes_0_1_toMatrix_macro_3D.cpp b/operators/full_stokes/noarch/P2ElementwiseFullStokes_0_1_toMatrix_P2ElementwiseFullStokes_0_1_macro_3D.cpp similarity index 99% rename from operators/full_stokes/noarch/P2ElementwiseFullStokes_0_1_toMatrix_macro_3D.cpp rename to operators/full_stokes/noarch/P2ElementwiseFullStokes_0_1_toMatrix_P2ElementwiseFullStokes_0_1_macro_3D.cpp index 03de4a31..f787ed05 100644 --- a/operators/full_stokes/noarch/P2ElementwiseFullStokes_0_1_toMatrix_macro_3D.cpp +++ b/operators/full_stokes/noarch/P2ElementwiseFullStokes_0_1_toMatrix_P2ElementwiseFullStokes_0_1_macro_3D.cpp @@ -41,6 +41,10 @@ + + + + @@ -54,7 +58,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseFullStokes_0_1::toMatrix_macro_3D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseFullStokes_0_1::toMatrix_P2ElementwiseFullStokes_0_1_macro_3D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.041666666666666657, 0.041666666666666657, 0.041666666666666657, 0.041666666666666657}; diff --git a/operators/full_stokes/noarch/P2ElementwiseFullStokes_0_2_apply_macro_3D.cpp b/operators/full_stokes/noarch/P2ElementwiseFullStokes_0_2_apply_P2ElementwiseFullStokes_0_2_macro_3D.cpp similarity index 99% rename from operators/full_stokes/noarch/P2ElementwiseFullStokes_0_2_apply_macro_3D.cpp rename to operators/full_stokes/noarch/P2ElementwiseFullStokes_0_2_apply_P2ElementwiseFullStokes_0_2_macro_3D.cpp index 119aef26..e6eb52a2 100644 --- a/operators/full_stokes/noarch/P2ElementwiseFullStokes_0_2_apply_macro_3D.cpp +++ b/operators/full_stokes/noarch/P2ElementwiseFullStokes_0_2_apply_P2ElementwiseFullStokes_0_2_macro_3D.cpp @@ -41,6 +41,10 @@ + + + + @@ -54,7 +58,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseFullStokes_0_2::apply_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseFullStokes_0_2::apply_P2ElementwiseFullStokes_0_2_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.041666666666666657, 0.041666666666666657, 0.041666666666666657, 0.041666666666666657}; diff --git a/operators/full_stokes/noarch/P2ElementwiseFullStokes_0_2_toMatrix_macro_3D.cpp b/operators/full_stokes/noarch/P2ElementwiseFullStokes_0_2_toMatrix_P2ElementwiseFullStokes_0_2_macro_3D.cpp similarity index 99% rename from operators/full_stokes/noarch/P2ElementwiseFullStokes_0_2_toMatrix_macro_3D.cpp rename to operators/full_stokes/noarch/P2ElementwiseFullStokes_0_2_toMatrix_P2ElementwiseFullStokes_0_2_macro_3D.cpp index 4db54ae7..9b37c459 100644 --- a/operators/full_stokes/noarch/P2ElementwiseFullStokes_0_2_toMatrix_macro_3D.cpp +++ b/operators/full_stokes/noarch/P2ElementwiseFullStokes_0_2_toMatrix_P2ElementwiseFullStokes_0_2_macro_3D.cpp @@ -41,6 +41,10 @@ + + + + @@ -54,7 +58,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseFullStokes_0_2::toMatrix_macro_3D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseFullStokes_0_2::toMatrix_P2ElementwiseFullStokes_0_2_macro_3D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.041666666666666657, 0.041666666666666657, 0.041666666666666657, 0.041666666666666657}; diff --git a/operators/full_stokes/noarch/P2ElementwiseFullStokes_1_0_apply_macro_2D.cpp b/operators/full_stokes/noarch/P2ElementwiseFullStokes_1_0_apply_P2ElementwiseFullStokes_1_0_macro_2D.cpp similarity index 99% rename from operators/full_stokes/noarch/P2ElementwiseFullStokes_1_0_apply_macro_2D.cpp rename to operators/full_stokes/noarch/P2ElementwiseFullStokes_1_0_apply_P2ElementwiseFullStokes_1_0_macro_2D.cpp index b592dcf8..7ac7272a 100644 --- a/operators/full_stokes/noarch/P2ElementwiseFullStokes_1_0_apply_macro_2D.cpp +++ b/operators/full_stokes/noarch/P2ElementwiseFullStokes_1_0_apply_P2ElementwiseFullStokes_1_0_macro_2D.cpp @@ -41,6 +41,10 @@ + + + + @@ -54,7 +58,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseFullStokes_1_0::apply_macro_2D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseFullStokes_1_0::apply_P2ElementwiseFullStokes_1_0_macro_2D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.16666666666666666, 0.16666666666666666, 0.16666666666666666}; diff --git a/operators/full_stokes/noarch/P2ElementwiseFullStokes_1_0_apply_macro_3D.cpp b/operators/full_stokes/noarch/P2ElementwiseFullStokes_1_0_apply_P2ElementwiseFullStokes_1_0_macro_3D.cpp similarity index 99% rename from operators/full_stokes/noarch/P2ElementwiseFullStokes_1_0_apply_macro_3D.cpp rename to operators/full_stokes/noarch/P2ElementwiseFullStokes_1_0_apply_P2ElementwiseFullStokes_1_0_macro_3D.cpp index c41d6827..292a8079 100644 --- a/operators/full_stokes/noarch/P2ElementwiseFullStokes_1_0_apply_macro_3D.cpp +++ b/operators/full_stokes/noarch/P2ElementwiseFullStokes_1_0_apply_P2ElementwiseFullStokes_1_0_macro_3D.cpp @@ -41,6 +41,10 @@ + + + + @@ -54,7 +58,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseFullStokes_1_0::apply_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseFullStokes_1_0::apply_P2ElementwiseFullStokes_1_0_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.041666666666666657, 0.041666666666666657, 0.041666666666666657, 0.041666666666666657}; diff --git a/operators/full_stokes/noarch/P2ElementwiseFullStokes_1_0_toMatrix_macro_2D.cpp b/operators/full_stokes/noarch/P2ElementwiseFullStokes_1_0_toMatrix_P2ElementwiseFullStokes_1_0_macro_2D.cpp similarity index 99% rename from operators/full_stokes/noarch/P2ElementwiseFullStokes_1_0_toMatrix_macro_2D.cpp rename to operators/full_stokes/noarch/P2ElementwiseFullStokes_1_0_toMatrix_P2ElementwiseFullStokes_1_0_macro_2D.cpp index 3a614c4d..3f8e5d1c 100644 --- a/operators/full_stokes/noarch/P2ElementwiseFullStokes_1_0_toMatrix_macro_2D.cpp +++ b/operators/full_stokes/noarch/P2ElementwiseFullStokes_1_0_toMatrix_P2ElementwiseFullStokes_1_0_macro_2D.cpp @@ -41,6 +41,10 @@ + + + + @@ -54,7 +58,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseFullStokes_1_0::toMatrix_macro_2D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseFullStokes_1_0::toMatrix_P2ElementwiseFullStokes_1_0_macro_2D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.16666666666666666, 0.16666666666666666, 0.16666666666666666}; diff --git a/operators/full_stokes/noarch/P2ElementwiseFullStokes_1_0_toMatrix_macro_3D.cpp b/operators/full_stokes/noarch/P2ElementwiseFullStokes_1_0_toMatrix_P2ElementwiseFullStokes_1_0_macro_3D.cpp similarity index 99% rename from operators/full_stokes/noarch/P2ElementwiseFullStokes_1_0_toMatrix_macro_3D.cpp rename to operators/full_stokes/noarch/P2ElementwiseFullStokes_1_0_toMatrix_P2ElementwiseFullStokes_1_0_macro_3D.cpp index 8fe55dc1..67f69136 100644 --- a/operators/full_stokes/noarch/P2ElementwiseFullStokes_1_0_toMatrix_macro_3D.cpp +++ b/operators/full_stokes/noarch/P2ElementwiseFullStokes_1_0_toMatrix_P2ElementwiseFullStokes_1_0_macro_3D.cpp @@ -41,6 +41,10 @@ + + + + @@ -54,7 +58,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseFullStokes_1_0::toMatrix_macro_3D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseFullStokes_1_0::toMatrix_P2ElementwiseFullStokes_1_0_macro_3D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.041666666666666657, 0.041666666666666657, 0.041666666666666657, 0.041666666666666657}; diff --git a/operators/full_stokes/noarch/P2ElementwiseFullStokes_1_1_apply_macro_2D.cpp b/operators/full_stokes/noarch/P2ElementwiseFullStokes_1_1_apply_P2ElementwiseFullStokes_1_1_macro_2D.cpp similarity index 99% rename from operators/full_stokes/noarch/P2ElementwiseFullStokes_1_1_apply_macro_2D.cpp rename to operators/full_stokes/noarch/P2ElementwiseFullStokes_1_1_apply_P2ElementwiseFullStokes_1_1_macro_2D.cpp index 09aaf0e8..18733b28 100644 --- a/operators/full_stokes/noarch/P2ElementwiseFullStokes_1_1_apply_macro_2D.cpp +++ b/operators/full_stokes/noarch/P2ElementwiseFullStokes_1_1_apply_P2ElementwiseFullStokes_1_1_macro_2D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseFullStokes_1_1::apply_macro_2D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseFullStokes_1_1::apply_P2ElementwiseFullStokes_1_1_macro_2D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.16666666666666666, 0.16666666666666666, 0.16666666666666666}; diff --git a/operators/full_stokes/noarch/P2ElementwiseFullStokes_1_1_apply_macro_3D.cpp b/operators/full_stokes/noarch/P2ElementwiseFullStokes_1_1_apply_P2ElementwiseFullStokes_1_1_macro_3D.cpp similarity index 99% rename from operators/full_stokes/noarch/P2ElementwiseFullStokes_1_1_apply_macro_3D.cpp rename to operators/full_stokes/noarch/P2ElementwiseFullStokes_1_1_apply_P2ElementwiseFullStokes_1_1_macro_3D.cpp index 17f5451f..c53209c5 100644 --- a/operators/full_stokes/noarch/P2ElementwiseFullStokes_1_1_apply_macro_3D.cpp +++ b/operators/full_stokes/noarch/P2ElementwiseFullStokes_1_1_apply_P2ElementwiseFullStokes_1_1_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseFullStokes_1_1::apply_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseFullStokes_1_1::apply_P2ElementwiseFullStokes_1_1_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.041666666666666657, 0.041666666666666657, 0.041666666666666657, 0.041666666666666657}; diff --git a/operators/full_stokes/noarch/P2ElementwiseFullStokes_1_1_computeInverseDiagonalOperatorValues_macro_2D.cpp b/operators/full_stokes/noarch/P2ElementwiseFullStokes_1_1_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokes_1_1_macro_2D.cpp similarity index 99% rename from operators/full_stokes/noarch/P2ElementwiseFullStokes_1_1_computeInverseDiagonalOperatorValues_macro_2D.cpp rename to operators/full_stokes/noarch/P2ElementwiseFullStokes_1_1_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokes_1_1_macro_2D.cpp index 8bde675a..7ab2a66b 100644 --- a/operators/full_stokes/noarch/P2ElementwiseFullStokes_1_1_computeInverseDiagonalOperatorValues_macro_2D.cpp +++ b/operators/full_stokes/noarch/P2ElementwiseFullStokes_1_1_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokes_1_1_macro_2D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseFullStokes_1_1::computeInverseDiagonalOperatorValues_macro_2D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseFullStokes_1_1::computeInverseDiagonalOperatorValues_P2ElementwiseFullStokes_1_1_macro_2D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.16666666666666666, 0.16666666666666666, 0.16666666666666666}; diff --git a/operators/full_stokes/noarch/P2ElementwiseFullStokes_1_1_computeInverseDiagonalOperatorValues_macro_3D.cpp b/operators/full_stokes/noarch/P2ElementwiseFullStokes_1_1_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokes_1_1_macro_3D.cpp similarity index 99% rename from operators/full_stokes/noarch/P2ElementwiseFullStokes_1_1_computeInverseDiagonalOperatorValues_macro_3D.cpp rename to operators/full_stokes/noarch/P2ElementwiseFullStokes_1_1_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokes_1_1_macro_3D.cpp index d7f4d343..5083d902 100644 --- a/operators/full_stokes/noarch/P2ElementwiseFullStokes_1_1_computeInverseDiagonalOperatorValues_macro_3D.cpp +++ b/operators/full_stokes/noarch/P2ElementwiseFullStokes_1_1_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokes_1_1_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseFullStokes_1_1::computeInverseDiagonalOperatorValues_macro_3D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseFullStokes_1_1::computeInverseDiagonalOperatorValues_P2ElementwiseFullStokes_1_1_macro_3D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.041666666666666657, 0.041666666666666657, 0.041666666666666657, 0.041666666666666657}; diff --git a/operators/full_stokes/noarch/P2ElementwiseFullStokes_1_1_toMatrix_macro_2D.cpp b/operators/full_stokes/noarch/P2ElementwiseFullStokes_1_1_toMatrix_P2ElementwiseFullStokes_1_1_macro_2D.cpp similarity index 99% rename from operators/full_stokes/noarch/P2ElementwiseFullStokes_1_1_toMatrix_macro_2D.cpp rename to operators/full_stokes/noarch/P2ElementwiseFullStokes_1_1_toMatrix_P2ElementwiseFullStokes_1_1_macro_2D.cpp index 350f2fee..e91096a4 100644 --- a/operators/full_stokes/noarch/P2ElementwiseFullStokes_1_1_toMatrix_macro_2D.cpp +++ b/operators/full_stokes/noarch/P2ElementwiseFullStokes_1_1_toMatrix_P2ElementwiseFullStokes_1_1_macro_2D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseFullStokes_1_1::toMatrix_macro_2D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseFullStokes_1_1::toMatrix_P2ElementwiseFullStokes_1_1_macro_2D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.16666666666666666, 0.16666666666666666, 0.16666666666666666}; diff --git a/operators/full_stokes/noarch/P2ElementwiseFullStokes_1_1_toMatrix_macro_3D.cpp b/operators/full_stokes/noarch/P2ElementwiseFullStokes_1_1_toMatrix_P2ElementwiseFullStokes_1_1_macro_3D.cpp similarity index 99% rename from operators/full_stokes/noarch/P2ElementwiseFullStokes_1_1_toMatrix_macro_3D.cpp rename to operators/full_stokes/noarch/P2ElementwiseFullStokes_1_1_toMatrix_P2ElementwiseFullStokes_1_1_macro_3D.cpp index 99e17bce..461c648c 100644 --- a/operators/full_stokes/noarch/P2ElementwiseFullStokes_1_1_toMatrix_macro_3D.cpp +++ b/operators/full_stokes/noarch/P2ElementwiseFullStokes_1_1_toMatrix_P2ElementwiseFullStokes_1_1_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseFullStokes_1_1::toMatrix_macro_3D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseFullStokes_1_1::toMatrix_P2ElementwiseFullStokes_1_1_macro_3D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.041666666666666657, 0.041666666666666657, 0.041666666666666657, 0.041666666666666657}; diff --git a/operators/full_stokes/noarch/P2ElementwiseFullStokes_1_2_apply_macro_3D.cpp b/operators/full_stokes/noarch/P2ElementwiseFullStokes_1_2_apply_P2ElementwiseFullStokes_1_2_macro_3D.cpp similarity index 99% rename from operators/full_stokes/noarch/P2ElementwiseFullStokes_1_2_apply_macro_3D.cpp rename to operators/full_stokes/noarch/P2ElementwiseFullStokes_1_2_apply_P2ElementwiseFullStokes_1_2_macro_3D.cpp index 1a178923..ccf6d3d2 100644 --- a/operators/full_stokes/noarch/P2ElementwiseFullStokes_1_2_apply_macro_3D.cpp +++ b/operators/full_stokes/noarch/P2ElementwiseFullStokes_1_2_apply_P2ElementwiseFullStokes_1_2_macro_3D.cpp @@ -41,6 +41,10 @@ + + + + @@ -54,7 +58,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseFullStokes_1_2::apply_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseFullStokes_1_2::apply_P2ElementwiseFullStokes_1_2_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.041666666666666657, 0.041666666666666657, 0.041666666666666657, 0.041666666666666657}; diff --git a/operators/full_stokes/noarch/P2ElementwiseFullStokes_1_2_toMatrix_macro_3D.cpp b/operators/full_stokes/noarch/P2ElementwiseFullStokes_1_2_toMatrix_P2ElementwiseFullStokes_1_2_macro_3D.cpp similarity index 99% rename from operators/full_stokes/noarch/P2ElementwiseFullStokes_1_2_toMatrix_macro_3D.cpp rename to operators/full_stokes/noarch/P2ElementwiseFullStokes_1_2_toMatrix_P2ElementwiseFullStokes_1_2_macro_3D.cpp index e8dc95d6..c7a48f23 100644 --- a/operators/full_stokes/noarch/P2ElementwiseFullStokes_1_2_toMatrix_macro_3D.cpp +++ b/operators/full_stokes/noarch/P2ElementwiseFullStokes_1_2_toMatrix_P2ElementwiseFullStokes_1_2_macro_3D.cpp @@ -41,6 +41,10 @@ + + + + @@ -54,7 +58,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseFullStokes_1_2::toMatrix_macro_3D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseFullStokes_1_2::toMatrix_P2ElementwiseFullStokes_1_2_macro_3D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.041666666666666657, 0.041666666666666657, 0.041666666666666657, 0.041666666666666657}; diff --git a/operators/full_stokes/noarch/P2ElementwiseFullStokes_2_0_apply_macro_3D.cpp b/operators/full_stokes/noarch/P2ElementwiseFullStokes_2_0_apply_P2ElementwiseFullStokes_2_0_macro_3D.cpp similarity index 99% rename from operators/full_stokes/noarch/P2ElementwiseFullStokes_2_0_apply_macro_3D.cpp rename to operators/full_stokes/noarch/P2ElementwiseFullStokes_2_0_apply_P2ElementwiseFullStokes_2_0_macro_3D.cpp index bc456082..1f1340c8 100644 --- a/operators/full_stokes/noarch/P2ElementwiseFullStokes_2_0_apply_macro_3D.cpp +++ b/operators/full_stokes/noarch/P2ElementwiseFullStokes_2_0_apply_P2ElementwiseFullStokes_2_0_macro_3D.cpp @@ -41,6 +41,10 @@ + + + + @@ -54,7 +58,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseFullStokes_2_0::apply_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseFullStokes_2_0::apply_P2ElementwiseFullStokes_2_0_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.041666666666666657, 0.041666666666666657, 0.041666666666666657, 0.041666666666666657}; diff --git a/operators/full_stokes/noarch/P2ElementwiseFullStokes_2_0_toMatrix_macro_3D.cpp b/operators/full_stokes/noarch/P2ElementwiseFullStokes_2_0_toMatrix_P2ElementwiseFullStokes_2_0_macro_3D.cpp similarity index 99% rename from operators/full_stokes/noarch/P2ElementwiseFullStokes_2_0_toMatrix_macro_3D.cpp rename to operators/full_stokes/noarch/P2ElementwiseFullStokes_2_0_toMatrix_P2ElementwiseFullStokes_2_0_macro_3D.cpp index f3ef7329..8ae974b7 100644 --- a/operators/full_stokes/noarch/P2ElementwiseFullStokes_2_0_toMatrix_macro_3D.cpp +++ b/operators/full_stokes/noarch/P2ElementwiseFullStokes_2_0_toMatrix_P2ElementwiseFullStokes_2_0_macro_3D.cpp @@ -41,6 +41,10 @@ + + + + @@ -54,7 +58,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseFullStokes_2_0::toMatrix_macro_3D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseFullStokes_2_0::toMatrix_P2ElementwiseFullStokes_2_0_macro_3D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.041666666666666657, 0.041666666666666657, 0.041666666666666657, 0.041666666666666657}; diff --git a/operators/full_stokes/noarch/P2ElementwiseFullStokes_2_1_apply_macro_3D.cpp b/operators/full_stokes/noarch/P2ElementwiseFullStokes_2_1_apply_P2ElementwiseFullStokes_2_1_macro_3D.cpp similarity index 99% rename from operators/full_stokes/noarch/P2ElementwiseFullStokes_2_1_apply_macro_3D.cpp rename to operators/full_stokes/noarch/P2ElementwiseFullStokes_2_1_apply_P2ElementwiseFullStokes_2_1_macro_3D.cpp index 7246ec9f..bda6c1e4 100644 --- a/operators/full_stokes/noarch/P2ElementwiseFullStokes_2_1_apply_macro_3D.cpp +++ b/operators/full_stokes/noarch/P2ElementwiseFullStokes_2_1_apply_P2ElementwiseFullStokes_2_1_macro_3D.cpp @@ -41,6 +41,10 @@ + + + + @@ -54,7 +58,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseFullStokes_2_1::apply_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseFullStokes_2_1::apply_P2ElementwiseFullStokes_2_1_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.041666666666666657, 0.041666666666666657, 0.041666666666666657, 0.041666666666666657}; diff --git a/operators/full_stokes/noarch/P2ElementwiseFullStokes_2_1_toMatrix_macro_3D.cpp b/operators/full_stokes/noarch/P2ElementwiseFullStokes_2_1_toMatrix_P2ElementwiseFullStokes_2_1_macro_3D.cpp similarity index 99% rename from operators/full_stokes/noarch/P2ElementwiseFullStokes_2_1_toMatrix_macro_3D.cpp rename to operators/full_stokes/noarch/P2ElementwiseFullStokes_2_1_toMatrix_P2ElementwiseFullStokes_2_1_macro_3D.cpp index 0f363c91..8626314a 100644 --- a/operators/full_stokes/noarch/P2ElementwiseFullStokes_2_1_toMatrix_macro_3D.cpp +++ b/operators/full_stokes/noarch/P2ElementwiseFullStokes_2_1_toMatrix_P2ElementwiseFullStokes_2_1_macro_3D.cpp @@ -41,6 +41,10 @@ + + + + @@ -54,7 +58,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseFullStokes_2_1::toMatrix_macro_3D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseFullStokes_2_1::toMatrix_P2ElementwiseFullStokes_2_1_macro_3D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.041666666666666657, 0.041666666666666657, 0.041666666666666657, 0.041666666666666657}; diff --git a/operators/full_stokes/noarch/P2ElementwiseFullStokes_2_2_apply_macro_3D.cpp b/operators/full_stokes/noarch/P2ElementwiseFullStokes_2_2_apply_P2ElementwiseFullStokes_2_2_macro_3D.cpp similarity index 99% rename from operators/full_stokes/noarch/P2ElementwiseFullStokes_2_2_apply_macro_3D.cpp rename to operators/full_stokes/noarch/P2ElementwiseFullStokes_2_2_apply_P2ElementwiseFullStokes_2_2_macro_3D.cpp index af6ddf52..06c0ca22 100644 --- a/operators/full_stokes/noarch/P2ElementwiseFullStokes_2_2_apply_macro_3D.cpp +++ b/operators/full_stokes/noarch/P2ElementwiseFullStokes_2_2_apply_P2ElementwiseFullStokes_2_2_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseFullStokes_2_2::apply_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseFullStokes_2_2::apply_P2ElementwiseFullStokes_2_2_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.041666666666666657, 0.041666666666666657, 0.041666666666666657, 0.041666666666666657}; diff --git a/operators/full_stokes/noarch/P2ElementwiseFullStokes_2_2_computeInverseDiagonalOperatorValues_macro_3D.cpp b/operators/full_stokes/noarch/P2ElementwiseFullStokes_2_2_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokes_2_2_macro_3D.cpp similarity index 99% rename from operators/full_stokes/noarch/P2ElementwiseFullStokes_2_2_computeInverseDiagonalOperatorValues_macro_3D.cpp rename to operators/full_stokes/noarch/P2ElementwiseFullStokes_2_2_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokes_2_2_macro_3D.cpp index 927aa9d5..19d3c294 100644 --- a/operators/full_stokes/noarch/P2ElementwiseFullStokes_2_2_computeInverseDiagonalOperatorValues_macro_3D.cpp +++ b/operators/full_stokes/noarch/P2ElementwiseFullStokes_2_2_computeInverseDiagonalOperatorValues_P2ElementwiseFullStokes_2_2_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseFullStokes_2_2::computeInverseDiagonalOperatorValues_macro_3D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseFullStokes_2_2::computeInverseDiagonalOperatorValues_P2ElementwiseFullStokes_2_2_macro_3D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.041666666666666657, 0.041666666666666657, 0.041666666666666657, 0.041666666666666657}; diff --git a/operators/full_stokes/noarch/P2ElementwiseFullStokes_2_2_toMatrix_macro_3D.cpp b/operators/full_stokes/noarch/P2ElementwiseFullStokes_2_2_toMatrix_P2ElementwiseFullStokes_2_2_macro_3D.cpp similarity index 99% rename from operators/full_stokes/noarch/P2ElementwiseFullStokes_2_2_toMatrix_macro_3D.cpp rename to operators/full_stokes/noarch/P2ElementwiseFullStokes_2_2_toMatrix_P2ElementwiseFullStokes_2_2_macro_3D.cpp index b1323923..84eeba13 100644 --- a/operators/full_stokes/noarch/P2ElementwiseFullStokes_2_2_toMatrix_macro_3D.cpp +++ b/operators/full_stokes/noarch/P2ElementwiseFullStokes_2_2_toMatrix_P2ElementwiseFullStokes_2_2_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseFullStokes_2_2::toMatrix_macro_3D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseFullStokes_2_2::toMatrix_P2ElementwiseFullStokes_2_2_macro_3D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.041666666666666657, 0.041666666666666657, 0.041666666666666657, 0.041666666666666657}; diff --git a/operators/grad_rho_by_rho_dot_u/CMakeLists.txt b/operators/grad_rho_by_rho_dot_u/CMakeLists.txt index 7660ec90..dcb22f8a 100644 --- a/operators/grad_rho_by_rho_dot_u/CMakeLists.txt +++ b/operators/grad_rho_by_rho_dot_u/CMakeLists.txt @@ -8,17 +8,45 @@ add_library( opgen-grad_rho_by_rho_dot_u P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap.hpp ) -target_sources(opgen-grad_rho_by_rho_dot_u PRIVATE - - noarch/P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap_apply_macro_2D.cpp - noarch/P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap_toMatrix_macro_2D.cpp - noarch/P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap_apply_macro_3D.cpp - noarch/P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap_toMatrix_macro_3D.cpp - noarch/P2VectorToP1ElementwiseGradRhoByRhoDotU_apply_macro_2D.cpp - noarch/P2VectorToP1ElementwiseGradRhoByRhoDotU_apply_macro_3D.cpp - noarch/P2VectorToP1ElementwiseGradRhoByRhoDotU_toMatrix_macro_2D.cpp - noarch/P2VectorToP1ElementwiseGradRhoByRhoDotU_toMatrix_macro_3D.cpp -) +if(HYTEG_BUILD_WITH_AVX AND WALBERLA_DOUBLE_ACCURACY) + target_sources(opgen-grad_rho_by_rho_dot_u PRIVATE + + avx/P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap_apply_P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap_macro_2D.cpp + avx/P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap_apply_P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap_macro_3D.cpp + avx/P2VectorToP1ElementwiseGradRhoByRhoDotU_apply_P2VectorToP1ElementwiseGradRhoByRhoDotU_macro_2D.cpp + avx/P2VectorToP1ElementwiseGradRhoByRhoDotU_apply_P2VectorToP1ElementwiseGradRhoByRhoDotU_macro_3D.cpp + noarch/P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap_toMatrix_P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap_macro_2D.cpp + noarch/P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap_toMatrix_P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap_macro_3D.cpp + noarch/P2VectorToP1ElementwiseGradRhoByRhoDotU_toMatrix_P2VectorToP1ElementwiseGradRhoByRhoDotU_macro_2D.cpp + noarch/P2VectorToP1ElementwiseGradRhoByRhoDotU_toMatrix_P2VectorToP1ElementwiseGradRhoByRhoDotU_macro_3D.cpp + ) + + set_source_files_properties( + + avx/P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap_apply_P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap_macro_2D.cpp + avx/P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap_apply_P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap_macro_3D.cpp + avx/P2VectorToP1ElementwiseGradRhoByRhoDotU_apply_P2VectorToP1ElementwiseGradRhoByRhoDotU_macro_2D.cpp + avx/P2VectorToP1ElementwiseGradRhoByRhoDotU_apply_P2VectorToP1ElementwiseGradRhoByRhoDotU_macro_3D.cpp + + PROPERTIES COMPILE_OPTIONS ${HYTEG_COMPILER_NATIVE_FLAGS} + ) +else() + if(HYTEG_BUILD_WITH_AVX AND NOT WALBERLA_DOUBLE_ACCURACY) + message(WARNING "AVX vectorization only available in double precision. Using scalar kernels.") + endif() + + target_sources(opgen-grad_rho_by_rho_dot_u PRIVATE + + noarch/P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap_apply_P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap_macro_2D.cpp + noarch/P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap_toMatrix_P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap_macro_2D.cpp + noarch/P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap_apply_P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap_macro_3D.cpp + noarch/P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap_toMatrix_P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap_macro_3D.cpp + noarch/P2VectorToP1ElementwiseGradRhoByRhoDotU_apply_P2VectorToP1ElementwiseGradRhoByRhoDotU_macro_2D.cpp + noarch/P2VectorToP1ElementwiseGradRhoByRhoDotU_apply_P2VectorToP1ElementwiseGradRhoByRhoDotU_macro_3D.cpp + noarch/P2VectorToP1ElementwiseGradRhoByRhoDotU_toMatrix_P2VectorToP1ElementwiseGradRhoByRhoDotU_macro_2D.cpp + noarch/P2VectorToP1ElementwiseGradRhoByRhoDotU_toMatrix_P2VectorToP1ElementwiseGradRhoByRhoDotU_macro_3D.cpp + ) +endif() if (HYTEG_BUILD_WITH_PETSC) target_link_libraries(opgen-grad_rho_by_rho_dot_u PUBLIC PETSc::PETSc) diff --git a/operators/grad_rho_by_rho_dot_u/P2VectorToP1ElementwiseGradRhoByRhoDotU.cpp b/operators/grad_rho_by_rho_dot_u/P2VectorToP1ElementwiseGradRhoByRhoDotU.cpp index 43cad945..130fed36 100644 --- a/operators/grad_rho_by_rho_dot_u/P2VectorToP1ElementwiseGradRhoByRhoDotU.cpp +++ b/operators/grad_rho_by_rho_dot_u/P2VectorToP1ElementwiseGradRhoByRhoDotU.cpp @@ -147,7 +147,7 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotU::apply( const P2VectorFunction< rea this->timingTree_->start( "kernel" ); - apply_macro_3D( + apply_P2VectorToP1ElementwiseGradRhoByRhoDotU_macro_3D( _data_dst, _data_rhoEdge, @@ -172,6 +172,7 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotU::apply( const P2VectorFunction< rea macro_vertex_coord_id_3comp2, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } @@ -225,7 +226,7 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotU::apply( const P2VectorFunction< rea this->timingTree_->start( "kernel" ); - apply_macro_2D( + apply_P2VectorToP1ElementwiseGradRhoByRhoDotU_macro_2D( _data_dst, _data_rhoEdge, @@ -242,6 +243,7 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotU::apply( const P2VectorFunction< rea macro_vertex_coord_id_2comp1, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } @@ -312,7 +314,7 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotU::toMatrix( const std::shared_ptr< S this->timingTree_->start( "kernel" ); - toMatrix_macro_3D( + toMatrix_P2VectorToP1ElementwiseGradRhoByRhoDotU_macro_3D( _data_dst, _data_rhoEdge, @@ -338,6 +340,7 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotU::toMatrix( const std::shared_ptr< S mat, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } } @@ -372,7 +375,7 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotU::toMatrix( const std::shared_ptr< S this->timingTree_->start( "kernel" ); - toMatrix_macro_2D( + toMatrix_P2VectorToP1ElementwiseGradRhoByRhoDotU_macro_2D( _data_dst, _data_rhoEdge, @@ -390,6 +393,7 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotU::toMatrix( const std::shared_ptr< S mat, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } } diff --git a/operators/grad_rho_by_rho_dot_u/P2VectorToP1ElementwiseGradRhoByRhoDotU.hpp b/operators/grad_rho_by_rho_dot_u/P2VectorToP1ElementwiseGradRhoByRhoDotU.hpp index 6328e35f..1b858953 100644 --- a/operators/grad_rho_by_rho_dot_u/P2VectorToP1ElementwiseGradRhoByRhoDotU.hpp +++ b/operators/grad_rho_by_rho_dot_u/P2VectorToP1ElementwiseGradRhoByRhoDotU.hpp @@ -29,6 +29,7 @@ #include "core/DataTypes.h" #include "hyteg/LikwidWrapper.hpp" +#include "hyteg/boundary/BoundaryConditions.hpp" #include "hyteg/communication/Syncing.hpp" #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp" #include "hyteg/operators/Operator.hpp" @@ -37,6 +38,7 @@ #include "hyteg/p2functionspace/P2VectorFunction.hpp" #include "hyteg/primitivestorage/PrimitiveStorage.hpp" #include "hyteg/sparseassembly/SparseMatrixProxy.hpp" +#include "hyteg/types/types.hpp" #define FUNC_PREFIX @@ -50,7 +52,7 @@ namespace operatorgeneration { /// /// Weak formulation /// -/// u: trial function (space: TensorialVectorSpace(Lagrange, degree: 2)) +/// u: trial function (vectorial space: TensorialVectorSpace(Lagrange, degree: 2)) /// v: test function (space: Lagrange, degree: 1) /// rho: coefficient (space: Lagrange, degree: 2) /// @@ -78,108 +80,127 @@ class P2VectorToP1ElementwiseGradRhoByRhoDotU : public Operator< P2VectorFunctio protected: private: - /// Kernel type: apply + /// Integral: P2VectorToP1ElementwiseGradRhoByRhoDotU + /// - volume element: triangle, dim: 2, vertices: 3, spacedim: 2 + /// - kernel type: apply + /// - loop strategy: SAWTOOTH /// - quadrature rule: Dunavant 3 | points: 4, degree: 3 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- - /// 356 380 16 0 0 0 0 0 - void apply_macro_2D( real_t* RESTRICT _data_dst, - real_t* RESTRICT _data_rhoEdge, - real_t* RESTRICT _data_rhoVertex, - real_t* RESTRICT _data_src_edge_0, - real_t* RESTRICT _data_src_edge_1, - real_t* RESTRICT _data_src_vertex_0, - real_t* RESTRICT _data_src_vertex_1, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; - /// Kernel type: apply + /// 356 384 16 0 0 0 0 1 + void apply_P2VectorToP1ElementwiseGradRhoByRhoDotU_macro_2D( real_t* RESTRICT _data_dst, + real_t* RESTRICT _data_rhoEdge, + real_t* RESTRICT _data_rhoVertex, + real_t* RESTRICT _data_src_edge_0, + real_t* RESTRICT _data_src_edge_1, + real_t* RESTRICT _data_src_vertex_0, + real_t* RESTRICT _data_src_vertex_1, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; + + /// Integral: P2VectorToP1ElementwiseGradRhoByRhoDotU + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: apply + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 3 | points: 5, degree: 3 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- - /// 1153 1162 41 0 0 0 0 0 - void apply_macro_3D( real_t* RESTRICT _data_dst, - real_t* RESTRICT _data_rhoEdge, - real_t* RESTRICT _data_rhoVertex, - real_t* RESTRICT _data_src_edge_0, - real_t* RESTRICT _data_src_edge_1, - real_t* RESTRICT _data_src_edge_2, - real_t* RESTRICT _data_src_vertex_0, - real_t* RESTRICT _data_src_vertex_1, - real_t* RESTRICT _data_src_vertex_2, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; - /// Kernel type: toMatrix + /// 1153 1167 41 0 0 0 0 1 + void apply_P2VectorToP1ElementwiseGradRhoByRhoDotU_macro_3D( real_t* RESTRICT _data_dst, + real_t* RESTRICT _data_rhoEdge, + real_t* RESTRICT _data_rhoVertex, + real_t* RESTRICT _data_src_edge_0, + real_t* RESTRICT _data_src_edge_1, + real_t* RESTRICT _data_src_edge_2, + real_t* RESTRICT _data_src_vertex_0, + real_t* RESTRICT _data_src_vertex_1, + real_t* RESTRICT _data_src_vertex_2, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; + + /// Integral: P2VectorToP1ElementwiseGradRhoByRhoDotU + /// - volume element: triangle, dim: 2, vertices: 3, spacedim: 2 + /// - kernel type: toMatrix + /// - loop strategy: SAWTOOTH /// - quadrature rule: Dunavant 3 | points: 4, degree: 3 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- - /// 320 344 16 0 0 0 0 3 - void toMatrix_macro_2D( idx_t* RESTRICT _data_dst, - real_t* RESTRICT _data_rhoEdge, - real_t* RESTRICT _data_rhoVertex, - idx_t* RESTRICT _data_src_edge_0, - idx_t* RESTRICT _data_src_edge_1, - idx_t* RESTRICT _data_src_vertex_0, - idx_t* RESTRICT _data_src_vertex_1, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - std::shared_ptr< SparseMatrixProxy > mat, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; - /// Kernel type: toMatrix + /// 320 348 16 0 0 0 0 4 + void toMatrix_P2VectorToP1ElementwiseGradRhoByRhoDotU_macro_2D( idx_t* RESTRICT _data_dst, + real_t* RESTRICT _data_rhoEdge, + real_t* RESTRICT _data_rhoVertex, + idx_t* RESTRICT _data_src_edge_0, + idx_t* RESTRICT _data_src_edge_1, + idx_t* RESTRICT _data_src_vertex_0, + idx_t* RESTRICT _data_src_vertex_1, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + std::shared_ptr< SparseMatrixProxy > mat, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; + + /// Integral: P2VectorToP1ElementwiseGradRhoByRhoDotU + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: toMatrix + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 3 | points: 5, degree: 3 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- - /// 1033 1042 41 0 0 0 0 3 - void toMatrix_macro_3D( idx_t* RESTRICT _data_dst, - real_t* RESTRICT _data_rhoEdge, - real_t* RESTRICT _data_rhoVertex, - idx_t* RESTRICT _data_src_edge_0, - idx_t* RESTRICT _data_src_edge_1, - idx_t* RESTRICT _data_src_edge_2, - idx_t* RESTRICT _data_src_vertex_0, - idx_t* RESTRICT _data_src_vertex_1, - idx_t* RESTRICT _data_src_vertex_2, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - std::shared_ptr< SparseMatrixProxy > mat, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; + /// 1033 1047 41 0 0 0 0 4 + void toMatrix_P2VectorToP1ElementwiseGradRhoByRhoDotU_macro_3D( idx_t* RESTRICT _data_dst, + real_t* RESTRICT _data_rhoEdge, + real_t* RESTRICT _data_rhoVertex, + idx_t* RESTRICT _data_src_edge_0, + idx_t* RESTRICT _data_src_edge_1, + idx_t* RESTRICT _data_src_edge_2, + idx_t* RESTRICT _data_src_vertex_0, + idx_t* RESTRICT _data_src_vertex_1, + idx_t* RESTRICT _data_src_vertex_2, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + std::shared_ptr< SparseMatrixProxy > mat, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; P2Function< real_t > rho; }; diff --git a/operators/grad_rho_by_rho_dot_u/P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap.cpp b/operators/grad_rho_by_rho_dot_u/P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap.cpp index 25714200..88271405 100644 --- a/operators/grad_rho_by_rho_dot_u/P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap.cpp +++ b/operators/grad_rho_by_rho_dot_u/P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap.cpp @@ -141,7 +141,7 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap::apply( const P2VectorFun this->timingTree_->start( "kernel" ); - apply_macro_2D( + apply_P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap_macro_2D( _data_dst, _data_rhoEdge, @@ -166,6 +166,7 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap::apply( const P2VectorFun refVertex_1, thrVertex_0, thrVertex_1 ); + this->timingTree_->stop( "kernel" ); } @@ -247,7 +248,7 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap::toMatrix( const std::sha this->timingTree_->start( "kernel" ); - toMatrix_macro_2D( + toMatrix_P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap_macro_2D( _data_dst, _data_rhoEdge, @@ -273,6 +274,7 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap::toMatrix( const std::sha refVertex_1, thrVertex_0, thrVertex_1 ); + this->timingTree_->stop( "kernel" ); } } diff --git a/operators/grad_rho_by_rho_dot_u/P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap.hpp b/operators/grad_rho_by_rho_dot_u/P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap.hpp index e94fb265..64fadf55 100644 --- a/operators/grad_rho_by_rho_dot_u/P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap.hpp +++ b/operators/grad_rho_by_rho_dot_u/P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap.hpp @@ -29,6 +29,7 @@ #include "core/DataTypes.h" #include "hyteg/LikwidWrapper.hpp" +#include "hyteg/boundary/BoundaryConditions.hpp" #include "hyteg/communication/Syncing.hpp" #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp" #include "hyteg/geometry/AnnulusMap.hpp" @@ -38,6 +39,7 @@ #include "hyteg/p2functionspace/P2VectorFunction.hpp" #include "hyteg/primitivestorage/PrimitiveStorage.hpp" #include "hyteg/sparseassembly/SparseMatrixProxy.hpp" +#include "hyteg/types/types.hpp" #define FUNC_PREFIX @@ -51,7 +53,7 @@ namespace operatorgeneration { /// /// Weak formulation /// -/// u: trial function (space: TensorialVectorSpace(Lagrange, degree: 2)) +/// u: trial function (vectorial space: TensorialVectorSpace(Lagrange, degree: 2)) /// v: test function (space: Lagrange, degree: 1) /// rho: coefficient (space: Lagrange, degree: 2) /// @@ -79,65 +81,74 @@ class P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap : public Operator< P2Vec protected: private: - /// Kernel type: apply + /// Integral: P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap + /// - volume element: triangle, dim: 2, vertices: 3, spacedim: 2 + /// - kernel type: apply + /// - loop strategy: SAWTOOTH /// - quadrature rule: Dunavant 3 | points: 4, degree: 3 + /// - blending map: AnnulusMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- - /// 560 732 24 12 0 0 0 0 - void apply_macro_2D( real_t* RESTRICT _data_dst, - real_t* RESTRICT _data_rhoEdge, - real_t* RESTRICT _data_rhoVertex, - real_t* RESTRICT _data_src_edge_0, - real_t* RESTRICT _data_src_edge_1, - real_t* RESTRICT _data_src_vertex_0, - real_t* RESTRICT _data_src_vertex_1, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t refVertex_0, - real_t refVertex_1, - real_t thrVertex_0, - real_t thrVertex_1 ) const; - /// Kernel type: toMatrix + /// 560 740 24 12 0 0 0 1 + void apply_P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap_macro_2D( real_t* RESTRICT _data_dst, + real_t* RESTRICT _data_rhoEdge, + real_t* RESTRICT _data_rhoVertex, + real_t* RESTRICT _data_src_edge_0, + real_t* RESTRICT _data_src_edge_1, + real_t* RESTRICT _data_src_vertex_0, + real_t* RESTRICT _data_src_vertex_1, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t refVertex_0, + real_t refVertex_1, + real_t thrVertex_0, + real_t thrVertex_1 ) const; + + /// Integral: P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap + /// - volume element: triangle, dim: 2, vertices: 3, spacedim: 2 + /// - kernel type: toMatrix + /// - loop strategy: SAWTOOTH /// - quadrature rule: Dunavant 3 | points: 4, degree: 3 + /// - blending map: AnnulusMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- - /// 524 696 24 12 0 0 0 3 - void toMatrix_macro_2D( idx_t* RESTRICT _data_dst, - real_t* RESTRICT _data_rhoEdge, - real_t* RESTRICT _data_rhoVertex, - idx_t* RESTRICT _data_src_edge_0, - idx_t* RESTRICT _data_src_edge_1, - idx_t* RESTRICT _data_src_vertex_0, - idx_t* RESTRICT _data_src_vertex_1, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - std::shared_ptr< SparseMatrixProxy > mat, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t refVertex_0, - real_t refVertex_1, - real_t thrVertex_0, - real_t thrVertex_1 ) const; + /// 524 704 24 12 0 0 0 4 + void toMatrix_P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap_macro_2D( idx_t* RESTRICT _data_dst, + real_t* RESTRICT _data_rhoEdge, + real_t* RESTRICT _data_rhoVertex, + idx_t* RESTRICT _data_src_edge_0, + idx_t* RESTRICT _data_src_edge_1, + idx_t* RESTRICT _data_src_vertex_0, + idx_t* RESTRICT _data_src_vertex_1, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + std::shared_ptr< SparseMatrixProxy > mat, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t refVertex_0, + real_t refVertex_1, + real_t thrVertex_0, + real_t thrVertex_1 ) const; P2Function< real_t > rho; }; diff --git a/operators/grad_rho_by_rho_dot_u/P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap.cpp b/operators/grad_rho_by_rho_dot_u/P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap.cpp index 2a68954c..7f7040c3 100644 --- a/operators/grad_rho_by_rho_dot_u/P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap.cpp +++ b/operators/grad_rho_by_rho_dot_u/P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap.cpp @@ -163,7 +163,7 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap::apply( const P2 this->timingTree_->start( "kernel" ); - apply_macro_3D( + apply_P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap_macro_3D( _data_dst, _data_rhoEdge, @@ -202,6 +202,7 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap::apply( const P2 thrVertex_0, thrVertex_1, thrVertex_2 ); + this->timingTree_->stop( "kernel" ); } @@ -294,7 +295,7 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap::toMatrix( const this->timingTree_->start( "kernel" ); - toMatrix_macro_3D( + toMatrix_P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap_macro_3D( _data_dst, _data_rhoEdge, @@ -334,6 +335,7 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap::toMatrix( const thrVertex_0, thrVertex_1, thrVertex_2 ); + this->timingTree_->stop( "kernel" ); } } diff --git a/operators/grad_rho_by_rho_dot_u/P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap.hpp b/operators/grad_rho_by_rho_dot_u/P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap.hpp index 0166e62c..a209370a 100644 --- a/operators/grad_rho_by_rho_dot_u/P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap.hpp +++ b/operators/grad_rho_by_rho_dot_u/P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap.hpp @@ -29,6 +29,7 @@ #include "core/DataTypes.h" #include "hyteg/LikwidWrapper.hpp" +#include "hyteg/boundary/BoundaryConditions.hpp" #include "hyteg/communication/Syncing.hpp" #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp" #include "hyteg/geometry/IcosahedralShellMap.hpp" @@ -38,6 +39,7 @@ #include "hyteg/p2functionspace/P2VectorFunction.hpp" #include "hyteg/primitivestorage/PrimitiveStorage.hpp" #include "hyteg/sparseassembly/SparseMatrixProxy.hpp" +#include "hyteg/types/types.hpp" #define FUNC_PREFIX @@ -51,7 +53,7 @@ namespace operatorgeneration { /// /// Weak formulation /// -/// u: trial function (space: TensorialVectorSpace(Lagrange, degree: 2)) +/// u: trial function (vectorial space: TensorialVectorSpace(Lagrange, degree: 2)) /// v: test function (space: Lagrange, degree: 1) /// rho: coefficient (space: Lagrange, degree: 2) /// @@ -80,93 +82,102 @@ class P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap protected: private: - /// Kernel type: apply + /// Integral: P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: apply + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 3 | points: 5, degree: 3 + /// - blending map: IcosahedralShellMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- - /// 1938 2537 51 10 0 0 0 0 - void apply_macro_3D( real_t* RESTRICT _data_dst, - real_t* RESTRICT _data_rhoEdge, - real_t* RESTRICT _data_rhoVertex, - real_t* RESTRICT _data_src_edge_0, - real_t* RESTRICT _data_src_edge_1, - real_t* RESTRICT _data_src_edge_2, - real_t* RESTRICT _data_src_vertex_0, - real_t* RESTRICT _data_src_vertex_1, - real_t* RESTRICT _data_src_vertex_2, - real_t forVertex_0, - real_t forVertex_1, - real_t forVertex_2, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t rayVertex_2, - real_t refVertex_0, - real_t refVertex_1, - real_t refVertex_2, - real_t thrVertex_0, - real_t thrVertex_1, - real_t thrVertex_2 ) const; - /// Kernel type: toMatrix + /// 1938 2547 51 10 0 0 0 1 + void apply_P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap_macro_3D( real_t* RESTRICT _data_dst, + real_t* RESTRICT _data_rhoEdge, + real_t* RESTRICT _data_rhoVertex, + real_t* RESTRICT _data_src_edge_0, + real_t* RESTRICT _data_src_edge_1, + real_t* RESTRICT _data_src_edge_2, + real_t* RESTRICT _data_src_vertex_0, + real_t* RESTRICT _data_src_vertex_1, + real_t* RESTRICT _data_src_vertex_2, + real_t forVertex_0, + real_t forVertex_1, + real_t forVertex_2, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t rayVertex_2, + real_t refVertex_0, + real_t refVertex_1, + real_t refVertex_2, + real_t thrVertex_0, + real_t thrVertex_1, + real_t thrVertex_2 ) const; + + /// Integral: P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: toMatrix + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 3 | points: 5, degree: 3 + /// - blending map: IcosahedralShellMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- - /// 1818 2417 51 10 0 0 0 3 - void toMatrix_macro_3D( idx_t* RESTRICT _data_dst, - real_t* RESTRICT _data_rhoEdge, - real_t* RESTRICT _data_rhoVertex, - idx_t* RESTRICT _data_src_edge_0, - idx_t* RESTRICT _data_src_edge_1, - idx_t* RESTRICT _data_src_edge_2, - idx_t* RESTRICT _data_src_vertex_0, - idx_t* RESTRICT _data_src_vertex_1, - idx_t* RESTRICT _data_src_vertex_2, - real_t forVertex_0, - real_t forVertex_1, - real_t forVertex_2, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - std::shared_ptr< SparseMatrixProxy > mat, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t rayVertex_2, - real_t refVertex_0, - real_t refVertex_1, - real_t refVertex_2, - real_t thrVertex_0, - real_t thrVertex_1, - real_t thrVertex_2 ) const; + /// 1818 2427 51 10 0 0 0 4 + void toMatrix_P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap_macro_3D( idx_t* RESTRICT _data_dst, + real_t* RESTRICT _data_rhoEdge, + real_t* RESTRICT _data_rhoVertex, + idx_t* RESTRICT _data_src_edge_0, + idx_t* RESTRICT _data_src_edge_1, + idx_t* RESTRICT _data_src_edge_2, + idx_t* RESTRICT _data_src_vertex_0, + idx_t* RESTRICT _data_src_vertex_1, + idx_t* RESTRICT _data_src_vertex_2, + real_t forVertex_0, + real_t forVertex_1, + real_t forVertex_2, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + std::shared_ptr< SparseMatrixProxy > mat, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t rayVertex_2, + real_t refVertex_0, + real_t refVertex_1, + real_t refVertex_2, + real_t thrVertex_0, + real_t thrVertex_1, + real_t thrVertex_2 ) const; P2Function< real_t > rho; }; diff --git a/operators/grad_rho_by_rho_dot_u/avx/P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap_apply_P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap_macro_2D.cpp b/operators/grad_rho_by_rho_dot_u/avx/P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap_apply_P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap_macro_2D.cpp new file mode 100644 index 00000000..1afdcebe --- /dev/null +++ b/operators/grad_rho_by_rho_dot_u/avx/P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap_apply_P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap_macro_2D.cpp @@ -0,0 +1,1071 @@ +/* +* Copyright (c) 2017-2024 Nils Kohl, Daniel Bauer, Fabian Böhm. +* +* This file is part of HyTeG +* (see https://i10git.cs.fau.de/hyteg/hyteg). +* +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. +* +* You should have received a copy of the GNU General Public License +* along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +/* +* The entire file was generated with the HyTeG Operator Generator. +* +* Avoid modifying this file. If buggy, consider fixing the generator itself. +*/ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +#include "../P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap.hpp" + +#define FUNC_PREFIX + +namespace hyteg { + +namespace operatorgeneration { + +void P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap::apply_P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap_macro_2D( real_t * RESTRICT _data_dst, real_t * RESTRICT _data_rhoEdge, real_t * RESTRICT _data_rhoVertex, real_t * RESTRICT _data_src_edge_0, real_t * RESTRICT _data_src_edge_1, real_t * RESTRICT _data_src_vertex_0, real_t * RESTRICT _data_src_vertex_1, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const +{ + { + const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669}; + + const real_t _data_q_p_0 [] = {0.33333333333333331, 0.20000000000000001, 0.59999999999999998, 0.20000000000000001}; + + const real_t _data_q_p_1 [] = {0.33333333333333331, 0.59999999999999998, 0.20000000000000001, 0.20000000000000001}; + + const real_t tmp_coords_jac_0_GRAY = 1.0 / (micro_edges_per_macro_edge_float)*1.0; + const real_t p_affine_const_0_0_GRAY = macro_vertex_coord_id_0comp0; + const real_t p_affine_const_0_1_GRAY = macro_vertex_coord_id_0comp1; + const real_t p_affine_const_1_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0); + const real_t p_affine_const_1_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1); + const real_t p_affine_const_2_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0); + const real_t p_affine_const_2_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1); + const real_t jac_affine_0_0_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_1_0_GRAY; + const real_t jac_affine_0_1_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_2_0_GRAY; + const real_t jac_affine_1_0_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_1_1_GRAY; + const real_t jac_affine_1_1_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_2_1_GRAY; + const real_t tmp_coords_jac_1_GRAY = jac_affine_0_0_GRAY*jac_affine_1_1_GRAY - jac_affine_0_1_GRAY*jac_affine_1_0_GRAY; + const real_t tmp_coords_jac_2_GRAY = 1.0 / (tmp_coords_jac_1_GRAY); + const real_t jac_affine_inv_0_0_GRAY = jac_affine_1_1_GRAY*tmp_coords_jac_2_GRAY; + const real_t jac_affine_inv_0_1_GRAY = -jac_affine_0_1_GRAY*tmp_coords_jac_2_GRAY; + const real_t jac_affine_inv_1_0_GRAY = -jac_affine_1_0_GRAY*tmp_coords_jac_2_GRAY; + const real_t jac_affine_inv_1_1_GRAY = jac_affine_0_0_GRAY*tmp_coords_jac_2_GRAY; + const real_t abs_det_jac_affine_GRAY = abs(tmp_coords_jac_1_GRAY); + const real_t tmp_qloop_1 = -rayVertex_1 + thrVertex_1; + const real_t tmp_qloop_7 = rayVertex_0 - thrVertex_0; + const real_t tmp_qloop_8 = -tmp_qloop_7; + const real_t tmp_qloop_9 = 1.0 / (tmp_qloop_1*(-rayVertex_0 + refVertex_0) - tmp_qloop_8*(-rayVertex_1 + refVertex_1)); + const real_t tmp_qloop_10 = -radRayVertex + radRefVertex; + const real_t tmp_qloop_11 = tmp_qloop_10*tmp_qloop_9; + const real_t tmp_qloop_12 = tmp_qloop_11*1.0; + { + /* FaceType.GRAY */ + for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1) + { + for (int64_t ctr_0 = 0; ctr_0 < (int64_t)((-ctr_1 + micro_edges_per_macro_edge) / (4)) * (4); ctr_0 += 4) + { + + const int64_t phantom_ctr_0 = ctr_0; + real_t _data_float_loop_ctr_array_dim_0[4]; + _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0; + _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1; + _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2; + _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3; + real_t _data_float_loop_ctr_array_dim_1[4]; + _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1; + + const __m256d p_affine_0_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)); + const __m256d p_affine_0_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)); + const __m256d p_affine_1_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)); + const __m256d p_affine_1_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)); + const __m256d p_affine_2_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)); + const __m256d p_affine_2_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)); + const __m256d src_dof_0 = _mm256_loadu_pd(& _data_src_vertex_0[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]); + const __m256d src_dof_1 = _mm256_loadu_pd(& _data_src_vertex_0[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]); + const __m256d src_dof_2 = _mm256_loadu_pd(& _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]); + const __m256d src_dof_3 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]); + const __m256d src_dof_4 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]); + const __m256d src_dof_5 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]); + const __m256d src_dof_6 = _mm256_loadu_pd(& _data_src_vertex_1[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]); + const __m256d src_dof_7 = _mm256_loadu_pd(& _data_src_vertex_1[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]); + const __m256d src_dof_8 = _mm256_loadu_pd(& _data_src_vertex_1[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]); + const __m256d src_dof_9 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]); + const __m256d src_dof_10 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]); + const __m256d src_dof_11 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]); + const __m256d rho_dof_0 = _mm256_loadu_pd(& _data_rhoVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]); + const __m256d rho_dof_1 = _mm256_loadu_pd(& _data_rhoVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]); + const __m256d rho_dof_2 = _mm256_loadu_pd(& _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]); + const __m256d rho_dof_3 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]); + const __m256d rho_dof_4 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]); + const __m256d rho_dof_5 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]); + __m256d q_acc_0_0 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_1 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_2 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_3 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_4 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_5 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_6 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_7 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_8 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_9 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_10 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_11 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_0 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_1 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_2 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_3 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_4 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_5 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_6 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_7 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_8 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_9 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_10 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_11 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_0 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_1 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_2 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_3 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_4 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_5 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_6 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_7 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_8 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_9 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_10 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_11 = _mm256_set_pd(0.0,0.0,0.0,0.0); + for (int64_t q = 0; q < 4; q += 1) + { + const __m256d tmp_qloop_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_1_0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_2_0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),p_affine_0_0); + const __m256d tmp_qloop_2 = _mm256_mul_pd(tmp_qloop_0,tmp_qloop_0); + const __m256d tmp_qloop_3 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_1_1),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_2_1),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),p_affine_0_1); + const __m256d tmp_qloop_4 = _mm256_mul_pd(tmp_qloop_3,tmp_qloop_3); + const __m256d tmp_qloop_5 = _mm256_add_pd(tmp_qloop_2,tmp_qloop_4); + const __m256d tmp_qloop_6 = _mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_sqrt_pd(tmp_qloop_5)); + const __m256d tmp_qloop_13 = _mm256_mul_pd(tmp_qloop_6,_mm256_set_pd(tmp_qloop_12,tmp_qloop_12,tmp_qloop_12,tmp_qloop_12)); + const __m256d tmp_qloop_14 = _mm256_mul_pd(tmp_qloop_13,_mm256_set_pd(tmp_qloop_1,tmp_qloop_1,tmp_qloop_1,tmp_qloop_1)); + const __m256d tmp_qloop_15 = _mm256_div_pd(_mm256_sqrt_pd(tmp_qloop_5),_mm256_mul_pd(tmp_qloop_5,tmp_qloop_5)); + const __m256d tmp_qloop_16 = _mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(rayVertex_0,rayVertex_0,rayVertex_0,rayVertex_0)),tmp_qloop_0),_mm256_set_pd(tmp_qloop_1,tmp_qloop_1,tmp_qloop_1,tmp_qloop_1)),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(rayVertex_1,rayVertex_1,rayVertex_1,rayVertex_1)),tmp_qloop_3),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_8,tmp_qloop_8,tmp_qloop_8,tmp_qloop_8))),_mm256_set_pd(tmp_qloop_11,tmp_qloop_11,tmp_qloop_11,tmp_qloop_11)),_mm256_set_pd(radRayVertex,radRayVertex,radRayVertex,radRayVertex)); + const __m256d tmp_qloop_17 = _mm256_mul_pd(tmp_qloop_15,tmp_qloop_16); + const __m256d tmp_qloop_18 = _mm256_mul_pd(tmp_qloop_17,_mm256_set_pd(1.0,1.0,1.0,1.0)); + const __m256d tmp_qloop_19 = _mm256_mul_pd(tmp_qloop_13,_mm256_set_pd(tmp_qloop_8,tmp_qloop_8,tmp_qloop_8,tmp_qloop_8)); + const __m256d tmp_qloop_20 = _mm256_mul_pd(tmp_qloop_0,tmp_qloop_18); + const __m256d tmp_qloop_23 = _mm256_mul_pd(tmp_qloop_15,_mm256_set_pd(tmp_qloop_12,tmp_qloop_12,tmp_qloop_12,tmp_qloop_12)); + const __m256d tmp_qloop_24 = _mm256_mul_pd(tmp_qloop_23,_mm256_set_pd(tmp_qloop_1,tmp_qloop_1,tmp_qloop_1,tmp_qloop_1)); + const __m256d tmp_qloop_25 = _mm256_mul_pd(tmp_qloop_2,tmp_qloop_24); + const __m256d tmp_qloop_26 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_16,_mm256_div_pd(_mm256_sqrt_pd(tmp_qloop_5),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_5,tmp_qloop_5),tmp_qloop_5))),_mm256_set_pd(3.0,3.0,3.0,3.0)); + const __m256d tmp_qloop_27 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_0,tmp_qloop_26),tmp_qloop_4); + const __m256d tmp_qloop_28 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_27,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_24,tmp_qloop_4)); + const __m256d tmp_qloop_29 = _mm256_mul_pd(tmp_qloop_23,_mm256_set_pd(tmp_qloop_7,tmp_qloop_7,tmp_qloop_7,tmp_qloop_7)); + const __m256d tmp_qloop_30 = _mm256_mul_pd(tmp_qloop_0,tmp_qloop_3); + const __m256d tmp_qloop_31 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_30); + const __m256d tmp_qloop_32 = _mm256_mul_pd(tmp_qloop_17,_mm256_set_pd(2.0,2.0,2.0,2.0)); + const __m256d tmp_qloop_33 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_2,tmp_qloop_26),tmp_qloop_3); + const __m256d tmp_qloop_34 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_33,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_18,tmp_qloop_3)); + const __m256d tmp_qloop_35 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_30); + const __m256d tmp_qloop_36 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_23,tmp_qloop_30),_mm256_set_pd(tmp_qloop_8,tmp_qloop_8,tmp_qloop_8,tmp_qloop_8)); + const __m256d tmp_qloop_37 = _mm256_mul_pd(_mm256_set_pd(4.0,4.0,4.0,4.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])); + const __m256d tmp_qloop_38 = _mm256_mul_pd(tmp_qloop_37,_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])); + const __m256d tmp_qloop_39 = _mm256_mul_pd(_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])); + const __m256d tmp_qloop_40 = _mm256_mul_pd(tmp_qloop_39,_mm256_set_pd(2.0,2.0,2.0,2.0)); + const __m256d tmp_qloop_41 = _mm256_mul_pd(_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])); + const __m256d tmp_qloop_42 = _mm256_mul_pd(tmp_qloop_41,_mm256_set_pd(2.0,2.0,2.0,2.0)); + const __m256d tmp_qloop_43 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),tmp_qloop_38),tmp_qloop_40),tmp_qloop_42); + const __m256d tmp_qloop_44 = _mm256_mul_pd(_mm256_set_pd(4.0,4.0,4.0,4.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])); + const __m256d tmp_qloop_45 = _mm256_mul_pd(rho_dof_0,_mm256_add_pd(_mm256_add_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),tmp_qloop_37),tmp_qloop_44)); + const __m256d tmp_qloop_46 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(rho_dof_1,_mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),tmp_qloop_37)),_mm256_mul_pd(rho_dof_5,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-8.0,-8.0,-8.0,-8.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(tmp_qloop_44,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_set_pd(4.0,4.0,4.0,4.0)))),_mm256_mul_pd(rho_dof_3,tmp_qloop_44)),_mm256_mul_pd(_mm256_mul_pd(rho_dof_4,tmp_qloop_44),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_45); + const __m256d tmp_qloop_47 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(rho_dof_2,_mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),tmp_qloop_44)),_mm256_mul_pd(rho_dof_4,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-8.0,-8.0,-8.0,-8.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])),_mm256_mul_pd(tmp_qloop_37,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_set_pd(4.0,4.0,4.0,4.0)))),_mm256_mul_pd(rho_dof_3,tmp_qloop_37)),_mm256_mul_pd(_mm256_mul_pd(rho_dof_5,tmp_qloop_37),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_45); + const __m256d tmp_qloop_49 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),tmp_qloop_40); + const __m256d tmp_qloop_50 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])),tmp_qloop_42); + const __m256d tmp_qloop_51 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_41,_mm256_set_pd(-4.0,-4.0,-4.0,-4.0)),_mm256_mul_pd(tmp_qloop_38,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_44); + const __m256d tmp_qloop_52 = _mm256_mul_pd(tmp_qloop_39,_mm256_set_pd(4.0,4.0,4.0,4.0)); + const __m256d tmp_qloop_53 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_38,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_52,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_37); + const __m256d jac_blending_0_0 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_0,tmp_qloop_14),_mm256_mul_pd(tmp_qloop_18,tmp_qloop_4)); + const __m256d jac_blending_0_1 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_0,tmp_qloop_19),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_0,tmp_qloop_15),tmp_qloop_16),tmp_qloop_3),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))); + const __m256d jac_blending_1_0 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_14,tmp_qloop_3),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_20,tmp_qloop_3),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))); + const __m256d jac_blending_1_1 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_19,tmp_qloop_3),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_15,tmp_qloop_16),tmp_qloop_2),_mm256_set_pd(1.0,1.0,1.0,1.0))); + const __m256d tmp_qloop_21 = _mm256_add_pd(_mm256_mul_pd(jac_blending_0_0,jac_blending_1_1),_mm256_mul_pd(_mm256_mul_pd(jac_blending_0_1,jac_blending_1_0),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))); + const __m256d tmp_qloop_22 = _mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),tmp_qloop_21); + const __m256d abs_det_jac_blending = tmp_qloop_21; + const __m256d tmp_qloop_54 = _mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(abs_det_jac_blending,_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(rho_dof_0,tmp_qloop_43),_mm256_mul_pd(rho_dof_1,tmp_qloop_49)),_mm256_mul_pd(rho_dof_2,tmp_qloop_50)),_mm256_mul_pd(rho_dof_3,tmp_qloop_38)),_mm256_mul_pd(rho_dof_4,tmp_qloop_51)),_mm256_mul_pd(rho_dof_5,tmp_qloop_53)))),_mm256_set_pd(_data_q_w[q],_data_q_w[q],_data_q_w[q],_data_q_w[q])),_mm256_set_pd(abs_det_jac_affine_GRAY,abs_det_jac_affine_GRAY,abs_det_jac_affine_GRAY,abs_det_jac_affine_GRAY)); + const __m256d tmp_qloop_55 = _mm256_mul_pd(tmp_qloop_54,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),_mm256_set_pd(1.0,1.0,1.0,1.0))); + const __m256d tmp_qloop_59 = _mm256_mul_pd(tmp_qloop_54,_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])); + const __m256d tmp_qloop_61 = _mm256_mul_pd(tmp_qloop_54,_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])); + const __m256d tmp_qloop_65 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_37,tmp_qloop_41),tmp_qloop_54); + const __m256d jac_blending_inv_0_0 = _mm256_mul_pd(jac_blending_1_1,tmp_qloop_22); + const __m256d jac_blending_inv_0_1 = _mm256_mul_pd(_mm256_mul_pd(jac_blending_0_1,tmp_qloop_22),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)); + const __m256d jac_blending_inv_1_0 = _mm256_mul_pd(_mm256_mul_pd(jac_blending_1_0,tmp_qloop_22),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)); + const __m256d tmp_qloop_48 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_46,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,_mm256_set_pd(jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY)),_mm256_mul_pd(jac_blending_inv_1_0,_mm256_set_pd(jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY)))),_mm256_mul_pd(tmp_qloop_47,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,_mm256_set_pd(jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY)),_mm256_mul_pd(jac_blending_inv_1_0,_mm256_set_pd(jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY))))); + const __m256d tmp_qloop_56 = _mm256_mul_pd(tmp_qloop_48,tmp_qloop_55); + const __m256d tmp_qloop_60 = _mm256_mul_pd(tmp_qloop_48,tmp_qloop_59); + const __m256d tmp_qloop_62 = _mm256_mul_pd(tmp_qloop_48,tmp_qloop_61); + const __m256d jac_blending_inv_1_1 = _mm256_mul_pd(jac_blending_0_0,tmp_qloop_22); + const __m256d tmp_qloop_57 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_46,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,_mm256_set_pd(jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY)),_mm256_mul_pd(jac_blending_inv_1_1,_mm256_set_pd(jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY)))),_mm256_mul_pd(tmp_qloop_47,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,_mm256_set_pd(jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY)),_mm256_mul_pd(jac_blending_inv_1_1,_mm256_set_pd(jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY))))); + const __m256d tmp_qloop_58 = _mm256_mul_pd(tmp_qloop_55,tmp_qloop_57); + const __m256d tmp_qloop_63 = _mm256_mul_pd(tmp_qloop_57,tmp_qloop_59); + const __m256d tmp_qloop_64 = _mm256_mul_pd(tmp_qloop_57,tmp_qloop_61); + const __m256d hessian_blending_0_0_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_25,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),tmp_qloop_14),tmp_qloop_28); + const __m256d hessian_blending_1_0_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_31,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_29,tmp_qloop_4)),_mm256_mul_pd(tmp_qloop_3,tmp_qloop_32)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_26,_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_3,tmp_qloop_3),tmp_qloop_3)),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))); + const __m256d hessian_blending_0_0_1 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_34,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_15,tmp_qloop_30),_mm256_set_pd(-2.0,-2.0,-2.0,-2.0)),_mm256_set_pd(tmp_qloop_1,tmp_qloop_1,tmp_qloop_1,tmp_qloop_1)),_mm256_set_pd(tmp_qloop_11,tmp_qloop_11,tmp_qloop_11,tmp_qloop_11))); + const __m256d hessian_blending_1_0_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_20,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_28,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(tmp_qloop_35,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_6,_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_1,tmp_qloop_1,tmp_qloop_1,tmp_qloop_1)),_mm256_set_pd(tmp_qloop_10,tmp_qloop_10,tmp_qloop_10,tmp_qloop_10)),_mm256_set_pd(tmp_qloop_9,tmp_qloop_9,tmp_qloop_9,tmp_qloop_9))); + const __m256d hessian_blending_0_1_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_19,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_31,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(tmp_qloop_34,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_15,tmp_qloop_2),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_10,tmp_qloop_10,tmp_qloop_10,tmp_qloop_10)),_mm256_set_pd(tmp_qloop_8,tmp_qloop_8,tmp_qloop_8,tmp_qloop_8)),_mm256_set_pd(tmp_qloop_9,tmp_qloop_9,tmp_qloop_9,tmp_qloop_9))); + const __m256d hessian_blending_1_1_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_20,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_35,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_27),tmp_qloop_36); + const __m256d hessian_blending_0_1_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_0,tmp_qloop_32),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_0,tmp_qloop_0),tmp_qloop_0),tmp_qloop_26),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_25),tmp_qloop_36); + const __m256d hessian_blending_1_1_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_19,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_33,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_15,tmp_qloop_2),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_10,tmp_qloop_10,tmp_qloop_10,tmp_qloop_10)),_mm256_set_pd(tmp_qloop_7,tmp_qloop_7,tmp_qloop_7,tmp_qloop_7)),_mm256_set_pd(tmp_qloop_9,tmp_qloop_9,tmp_qloop_9,tmp_qloop_9))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_15,tmp_qloop_4),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_10,tmp_qloop_10,tmp_qloop_10,tmp_qloop_10)),_mm256_set_pd(tmp_qloop_8,tmp_qloop_8,tmp_qloop_8,tmp_qloop_8)),_mm256_set_pd(tmp_qloop_9,tmp_qloop_9,tmp_qloop_9,tmp_qloop_9))); + const __m256d q_tmp_0_0 = _mm256_mul_pd(tmp_qloop_43,tmp_qloop_56); + const __m256d q_tmp_0_1 = _mm256_mul_pd(tmp_qloop_49,tmp_qloop_56); + const __m256d q_tmp_0_2 = _mm256_mul_pd(tmp_qloop_50,tmp_qloop_56); + const __m256d q_tmp_0_3 = _mm256_mul_pd(tmp_qloop_38,tmp_qloop_56); + const __m256d q_tmp_0_4 = _mm256_mul_pd(tmp_qloop_51,tmp_qloop_56); + const __m256d q_tmp_0_5 = _mm256_mul_pd(tmp_qloop_53,tmp_qloop_56); + const __m256d q_tmp_0_6 = _mm256_mul_pd(tmp_qloop_43,tmp_qloop_58); + const __m256d q_tmp_0_7 = _mm256_mul_pd(tmp_qloop_49,tmp_qloop_58); + const __m256d q_tmp_0_8 = _mm256_mul_pd(tmp_qloop_50,tmp_qloop_58); + const __m256d q_tmp_0_9 = _mm256_mul_pd(tmp_qloop_38,tmp_qloop_58); + const __m256d q_tmp_0_10 = _mm256_mul_pd(tmp_qloop_51,tmp_qloop_58); + const __m256d q_tmp_0_11 = _mm256_mul_pd(tmp_qloop_53,tmp_qloop_58); + const __m256d q_tmp_1_0 = _mm256_mul_pd(tmp_qloop_43,tmp_qloop_60); + const __m256d q_tmp_1_1 = _mm256_mul_pd(tmp_qloop_49,tmp_qloop_60); + const __m256d q_tmp_1_2 = _mm256_mul_pd(tmp_qloop_50,tmp_qloop_60); + const __m256d q_tmp_1_3 = _mm256_mul_pd(tmp_qloop_52,tmp_qloop_62); + const __m256d q_tmp_1_4 = _mm256_mul_pd(tmp_qloop_51,tmp_qloop_60); + const __m256d q_tmp_1_5 = _mm256_mul_pd(tmp_qloop_53,tmp_qloop_60); + const __m256d q_tmp_1_6 = _mm256_mul_pd(tmp_qloop_43,tmp_qloop_63); + const __m256d q_tmp_1_7 = _mm256_mul_pd(tmp_qloop_49,tmp_qloop_63); + const __m256d q_tmp_1_8 = _mm256_mul_pd(tmp_qloop_50,tmp_qloop_63); + const __m256d q_tmp_1_9 = _mm256_mul_pd(tmp_qloop_52,tmp_qloop_64); + const __m256d q_tmp_1_10 = _mm256_mul_pd(tmp_qloop_51,tmp_qloop_63); + const __m256d q_tmp_1_11 = _mm256_mul_pd(tmp_qloop_53,tmp_qloop_63); + const __m256d q_tmp_2_0 = _mm256_mul_pd(tmp_qloop_43,tmp_qloop_62); + const __m256d q_tmp_2_1 = _mm256_mul_pd(tmp_qloop_49,tmp_qloop_62); + const __m256d q_tmp_2_2 = _mm256_mul_pd(tmp_qloop_50,tmp_qloop_62); + const __m256d q_tmp_2_3 = _mm256_mul_pd(tmp_qloop_48,tmp_qloop_65); + const __m256d q_tmp_2_4 = _mm256_mul_pd(tmp_qloop_51,tmp_qloop_62); + const __m256d q_tmp_2_5 = _mm256_mul_pd(tmp_qloop_53,tmp_qloop_62); + const __m256d q_tmp_2_6 = _mm256_mul_pd(tmp_qloop_43,tmp_qloop_64); + const __m256d q_tmp_2_7 = _mm256_mul_pd(tmp_qloop_49,tmp_qloop_64); + const __m256d q_tmp_2_8 = _mm256_mul_pd(tmp_qloop_50,tmp_qloop_64); + const __m256d q_tmp_2_9 = _mm256_mul_pd(tmp_qloop_57,tmp_qloop_65); + const __m256d q_tmp_2_10 = _mm256_mul_pd(tmp_qloop_51,tmp_qloop_64); + const __m256d q_tmp_2_11 = _mm256_mul_pd(tmp_qloop_53,tmp_qloop_64); + q_acc_0_0 = _mm256_add_pd(q_acc_0_0,q_tmp_0_0); + q_acc_0_1 = _mm256_add_pd(q_acc_0_1,q_tmp_0_1); + q_acc_0_2 = _mm256_add_pd(q_acc_0_2,q_tmp_0_2); + q_acc_0_3 = _mm256_add_pd(q_acc_0_3,q_tmp_0_3); + q_acc_0_4 = _mm256_add_pd(q_acc_0_4,q_tmp_0_4); + q_acc_0_5 = _mm256_add_pd(q_acc_0_5,q_tmp_0_5); + q_acc_0_6 = _mm256_add_pd(q_acc_0_6,q_tmp_0_6); + q_acc_0_7 = _mm256_add_pd(q_acc_0_7,q_tmp_0_7); + q_acc_0_8 = _mm256_add_pd(q_acc_0_8,q_tmp_0_8); + q_acc_0_9 = _mm256_add_pd(q_acc_0_9,q_tmp_0_9); + q_acc_0_10 = _mm256_add_pd(q_acc_0_10,q_tmp_0_10); + q_acc_0_11 = _mm256_add_pd(q_acc_0_11,q_tmp_0_11); + q_acc_1_0 = _mm256_add_pd(q_acc_1_0,q_tmp_1_0); + q_acc_1_1 = _mm256_add_pd(q_acc_1_1,q_tmp_1_1); + q_acc_1_2 = _mm256_add_pd(q_acc_1_2,q_tmp_1_2); + q_acc_1_3 = _mm256_add_pd(q_acc_1_3,q_tmp_1_3); + q_acc_1_4 = _mm256_add_pd(q_acc_1_4,q_tmp_1_4); + q_acc_1_5 = _mm256_add_pd(q_acc_1_5,q_tmp_1_5); + q_acc_1_6 = _mm256_add_pd(q_acc_1_6,q_tmp_1_6); + q_acc_1_7 = _mm256_add_pd(q_acc_1_7,q_tmp_1_7); + q_acc_1_8 = _mm256_add_pd(q_acc_1_8,q_tmp_1_8); + q_acc_1_9 = _mm256_add_pd(q_acc_1_9,q_tmp_1_9); + q_acc_1_10 = _mm256_add_pd(q_acc_1_10,q_tmp_1_10); + q_acc_1_11 = _mm256_add_pd(q_acc_1_11,q_tmp_1_11); + q_acc_2_0 = _mm256_add_pd(q_acc_2_0,q_tmp_2_0); + q_acc_2_1 = _mm256_add_pd(q_acc_2_1,q_tmp_2_1); + q_acc_2_2 = _mm256_add_pd(q_acc_2_2,q_tmp_2_2); + q_acc_2_3 = _mm256_add_pd(q_acc_2_3,q_tmp_2_3); + q_acc_2_4 = _mm256_add_pd(q_acc_2_4,q_tmp_2_4); + q_acc_2_5 = _mm256_add_pd(q_acc_2_5,q_tmp_2_5); + q_acc_2_6 = _mm256_add_pd(q_acc_2_6,q_tmp_2_6); + q_acc_2_7 = _mm256_add_pd(q_acc_2_7,q_tmp_2_7); + q_acc_2_8 = _mm256_add_pd(q_acc_2_8,q_tmp_2_8); + q_acc_2_9 = _mm256_add_pd(q_acc_2_9,q_tmp_2_9); + q_acc_2_10 = _mm256_add_pd(q_acc_2_10,q_tmp_2_10); + q_acc_2_11 = _mm256_add_pd(q_acc_2_11,q_tmp_2_11); + } + const __m256d elMatVec_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_0_0,src_dof_0),_mm256_mul_pd(q_acc_0_1,src_dof_1)),_mm256_mul_pd(q_acc_0_10,src_dof_10)),_mm256_mul_pd(q_acc_0_11,src_dof_11)),_mm256_mul_pd(q_acc_0_2,src_dof_2)),_mm256_mul_pd(q_acc_0_3,src_dof_3)),_mm256_mul_pd(q_acc_0_4,src_dof_4)),_mm256_mul_pd(q_acc_0_5,src_dof_5)),_mm256_mul_pd(q_acc_0_6,src_dof_6)),_mm256_mul_pd(q_acc_0_7,src_dof_7)),_mm256_mul_pd(q_acc_0_8,src_dof_8)),_mm256_mul_pd(q_acc_0_9,src_dof_9)); + const __m256d elMatVec_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_1_0,src_dof_0),_mm256_mul_pd(q_acc_1_1,src_dof_1)),_mm256_mul_pd(q_acc_1_10,src_dof_10)),_mm256_mul_pd(q_acc_1_11,src_dof_11)),_mm256_mul_pd(q_acc_1_2,src_dof_2)),_mm256_mul_pd(q_acc_1_3,src_dof_3)),_mm256_mul_pd(q_acc_1_4,src_dof_4)),_mm256_mul_pd(q_acc_1_5,src_dof_5)),_mm256_mul_pd(q_acc_1_6,src_dof_6)),_mm256_mul_pd(q_acc_1_7,src_dof_7)),_mm256_mul_pd(q_acc_1_8,src_dof_8)),_mm256_mul_pd(q_acc_1_9,src_dof_9)); + const __m256d elMatVec_2 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_2_0,src_dof_0),_mm256_mul_pd(q_acc_2_1,src_dof_1)),_mm256_mul_pd(q_acc_2_10,src_dof_10)),_mm256_mul_pd(q_acc_2_11,src_dof_11)),_mm256_mul_pd(q_acc_2_2,src_dof_2)),_mm256_mul_pd(q_acc_2_3,src_dof_3)),_mm256_mul_pd(q_acc_2_4,src_dof_4)),_mm256_mul_pd(q_acc_2_5,src_dof_5)),_mm256_mul_pd(q_acc_2_6,src_dof_6)),_mm256_mul_pd(q_acc_2_7,src_dof_7)),_mm256_mul_pd(q_acc_2_8,src_dof_8)),_mm256_mul_pd(q_acc_2_9,src_dof_9)); + _mm256_storeu_pd(&_data_dst[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))],_mm256_add_pd(elMatVec_0,_mm256_loadu_pd(& _data_dst[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]))); + _mm256_storeu_pd(&_data_dst[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1],_mm256_add_pd(elMatVec_1,_mm256_loadu_pd(& _data_dst[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]))); + _mm256_storeu_pd(&_data_dst[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))],_mm256_add_pd(elMatVec_2,_mm256_loadu_pd(& _data_dst[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]))); + } + for (int64_t ctr_0 = (int64_t)((-ctr_1 + micro_edges_per_macro_edge) / (4)) * (4); ctr_0 < -ctr_1 + micro_edges_per_macro_edge; ctr_0 += 1) + { + + const int64_t phantom_ctr_0 = ctr_0; + real_t _data_float_loop_ctr_array_dim_0[4]; + _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0; + _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1; + _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2; + _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3; + real_t _data_float_loop_ctr_array_dim_1[4]; + _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1; + + const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; + const real_t src_dof_0 = _data_src_vertex_0[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]; + const real_t src_dof_1 = _data_src_vertex_0[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + const real_t src_dof_2 = _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t src_dof_3 = _data_src_edge_0[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t src_dof_4 = _data_src_edge_0[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t src_dof_5 = _data_src_edge_0[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]; + const real_t src_dof_6 = _data_src_vertex_1[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]; + const real_t src_dof_7 = _data_src_vertex_1[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + const real_t src_dof_8 = _data_src_vertex_1[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t src_dof_9 = _data_src_edge_1[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t src_dof_10 = _data_src_edge_1[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t src_dof_11 = _data_src_edge_1[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]; + const real_t rho_dof_0 = _data_rhoVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]; + const real_t rho_dof_1 = _data_rhoVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + const real_t rho_dof_2 = _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t rho_dof_3 = _data_rhoEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t rho_dof_4 = _data_rhoEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t rho_dof_5 = _data_rhoEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]; + real_t q_acc_0_0 = 0.0; + real_t q_acc_0_1 = 0.0; + real_t q_acc_0_2 = 0.0; + real_t q_acc_0_3 = 0.0; + real_t q_acc_0_4 = 0.0; + real_t q_acc_0_5 = 0.0; + real_t q_acc_0_6 = 0.0; + real_t q_acc_0_7 = 0.0; + real_t q_acc_0_8 = 0.0; + real_t q_acc_0_9 = 0.0; + real_t q_acc_0_10 = 0.0; + real_t q_acc_0_11 = 0.0; + real_t q_acc_1_0 = 0.0; + real_t q_acc_1_1 = 0.0; + real_t q_acc_1_2 = 0.0; + real_t q_acc_1_3 = 0.0; + real_t q_acc_1_4 = 0.0; + real_t q_acc_1_5 = 0.0; + real_t q_acc_1_6 = 0.0; + real_t q_acc_1_7 = 0.0; + real_t q_acc_1_8 = 0.0; + real_t q_acc_1_9 = 0.0; + real_t q_acc_1_10 = 0.0; + real_t q_acc_1_11 = 0.0; + real_t q_acc_2_0 = 0.0; + real_t q_acc_2_1 = 0.0; + real_t q_acc_2_2 = 0.0; + real_t q_acc_2_3 = 0.0; + real_t q_acc_2_4 = 0.0; + real_t q_acc_2_5 = 0.0; + real_t q_acc_2_6 = 0.0; + real_t q_acc_2_7 = 0.0; + real_t q_acc_2_8 = 0.0; + real_t q_acc_2_9 = 0.0; + real_t q_acc_2_10 = 0.0; + real_t q_acc_2_11 = 0.0; + for (int64_t q = 0; q < 4; q += 1) + { + const real_t tmp_qloop_0 = p_affine_0_0 + (-p_affine_0_0 + p_affine_1_0)*_data_q_p_0[q] + (-p_affine_0_0 + p_affine_2_0)*_data_q_p_1[q]; + const real_t tmp_qloop_2 = (tmp_qloop_0*tmp_qloop_0); + const real_t tmp_qloop_3 = p_affine_0_1 + (-p_affine_0_1 + p_affine_1_1)*_data_q_p_0[q] + (-p_affine_0_1 + p_affine_2_1)*_data_q_p_1[q]; + const real_t tmp_qloop_4 = (tmp_qloop_3*tmp_qloop_3); + const real_t tmp_qloop_5 = tmp_qloop_2 + tmp_qloop_4; + const real_t tmp_qloop_6 = pow(tmp_qloop_5, -0.50000000000000000); + const real_t tmp_qloop_13 = tmp_qloop_12*tmp_qloop_6; + const real_t tmp_qloop_14 = tmp_qloop_1*tmp_qloop_13; + const real_t tmp_qloop_15 = pow(tmp_qloop_5, -1.5000000000000000); + const real_t tmp_qloop_16 = radRayVertex + tmp_qloop_11*(tmp_qloop_1*(-rayVertex_0 + tmp_qloop_0) - tmp_qloop_8*(-rayVertex_1 + tmp_qloop_3)); + const real_t tmp_qloop_17 = tmp_qloop_15*tmp_qloop_16; + const real_t tmp_qloop_18 = tmp_qloop_17*1.0; + const real_t tmp_qloop_19 = tmp_qloop_13*tmp_qloop_8; + const real_t tmp_qloop_20 = tmp_qloop_0*tmp_qloop_18; + const real_t tmp_qloop_23 = tmp_qloop_12*tmp_qloop_15; + const real_t tmp_qloop_24 = tmp_qloop_1*tmp_qloop_23; + const real_t tmp_qloop_25 = tmp_qloop_2*tmp_qloop_24; + const real_t tmp_qloop_26 = tmp_qloop_16*pow(tmp_qloop_5, -2.5000000000000000)*3.0; + const real_t tmp_qloop_27 = tmp_qloop_0*tmp_qloop_26*tmp_qloop_4; + const real_t tmp_qloop_28 = tmp_qloop_24*tmp_qloop_4 - tmp_qloop_27; + const real_t tmp_qloop_29 = tmp_qloop_23*tmp_qloop_7; + const real_t tmp_qloop_30 = tmp_qloop_0*tmp_qloop_3; + const real_t tmp_qloop_31 = tmp_qloop_24*tmp_qloop_30; + const real_t tmp_qloop_32 = tmp_qloop_17*2.0; + const real_t tmp_qloop_33 = tmp_qloop_2*tmp_qloop_26*tmp_qloop_3; + const real_t tmp_qloop_34 = tmp_qloop_18*tmp_qloop_3 - tmp_qloop_33; + const real_t tmp_qloop_35 = tmp_qloop_29*tmp_qloop_30; + const real_t tmp_qloop_36 = tmp_qloop_23*tmp_qloop_30*tmp_qloop_8; + const real_t tmp_qloop_37 = 4.0*_data_q_p_0[q]; + const real_t tmp_qloop_38 = tmp_qloop_37*_data_q_p_1[q]; + const real_t tmp_qloop_39 = (_data_q_p_0[q]*_data_q_p_0[q]); + const real_t tmp_qloop_40 = tmp_qloop_39*2.0; + const real_t tmp_qloop_41 = (_data_q_p_1[q]*_data_q_p_1[q]); + const real_t tmp_qloop_42 = tmp_qloop_41*2.0; + const real_t tmp_qloop_43 = tmp_qloop_38 + tmp_qloop_40 + tmp_qloop_42 - 3.0*_data_q_p_0[q] - 3.0*_data_q_p_1[q] + 1.0; + const real_t tmp_qloop_44 = 4.0*_data_q_p_1[q]; + const real_t tmp_qloop_45 = rho_dof_0*(tmp_qloop_37 + tmp_qloop_44 - 3.0); + const real_t tmp_qloop_46 = rho_dof_1*(tmp_qloop_37 - 1.0) + rho_dof_3*tmp_qloop_44 - rho_dof_4*tmp_qloop_44 + rho_dof_5*(-tmp_qloop_44 - 8.0*_data_q_p_0[q] + 4.0) + tmp_qloop_45; + const real_t tmp_qloop_47 = rho_dof_2*(tmp_qloop_44 - 1.0) + rho_dof_3*tmp_qloop_37 + rho_dof_4*(-tmp_qloop_37 - 8.0*_data_q_p_1[q] + 4.0) - rho_dof_5*tmp_qloop_37 + tmp_qloop_45; + const real_t tmp_qloop_49 = tmp_qloop_40 - _data_q_p_0[q]; + const real_t tmp_qloop_50 = tmp_qloop_42 - _data_q_p_1[q]; + const real_t tmp_qloop_51 = -tmp_qloop_38 + tmp_qloop_41*-4.0 + tmp_qloop_44; + const real_t tmp_qloop_52 = tmp_qloop_39*4.0; + const real_t tmp_qloop_53 = tmp_qloop_37 - tmp_qloop_38 - tmp_qloop_52; + const real_t jac_blending_0_0 = tmp_qloop_0*tmp_qloop_14 + tmp_qloop_18*tmp_qloop_4; + const real_t jac_blending_0_1 = -tmp_qloop_0*tmp_qloop_15*tmp_qloop_16*tmp_qloop_3 - tmp_qloop_0*tmp_qloop_19; + const real_t jac_blending_1_0 = tmp_qloop_14*tmp_qloop_3 - tmp_qloop_20*tmp_qloop_3; + const real_t jac_blending_1_1 = tmp_qloop_15*tmp_qloop_16*tmp_qloop_2*1.0 - tmp_qloop_19*tmp_qloop_3; + const real_t tmp_qloop_21 = jac_blending_0_0*jac_blending_1_1 - jac_blending_0_1*jac_blending_1_0; + const real_t tmp_qloop_22 = 1.0 / (tmp_qloop_21); + const real_t abs_det_jac_blending = tmp_qloop_21; + const real_t tmp_qloop_54 = abs_det_jac_affine_GRAY*abs_det_jac_blending*1.0 / (rho_dof_0*tmp_qloop_43 + rho_dof_1*tmp_qloop_49 + rho_dof_2*tmp_qloop_50 + rho_dof_3*tmp_qloop_38 + rho_dof_4*tmp_qloop_51 + rho_dof_5*tmp_qloop_53)*_data_q_w[q]; + const real_t tmp_qloop_55 = tmp_qloop_54*(1.0 - _data_q_p_0[q] - _data_q_p_1[q]); + const real_t tmp_qloop_59 = tmp_qloop_54*_data_q_p_0[q]; + const real_t tmp_qloop_61 = tmp_qloop_54*_data_q_p_1[q]; + const real_t tmp_qloop_65 = tmp_qloop_37*tmp_qloop_41*tmp_qloop_54; + const real_t jac_blending_inv_0_0 = jac_blending_1_1*tmp_qloop_22; + const real_t jac_blending_inv_0_1 = -jac_blending_0_1*tmp_qloop_22; + const real_t jac_blending_inv_1_0 = -jac_blending_1_0*tmp_qloop_22; + const real_t tmp_qloop_48 = tmp_qloop_46*(jac_affine_inv_0_0_GRAY*jac_blending_inv_0_0 + jac_affine_inv_0_1_GRAY*jac_blending_inv_1_0) + tmp_qloop_47*(jac_affine_inv_1_0_GRAY*jac_blending_inv_0_0 + jac_affine_inv_1_1_GRAY*jac_blending_inv_1_0); + const real_t tmp_qloop_56 = tmp_qloop_48*tmp_qloop_55; + const real_t tmp_qloop_60 = tmp_qloop_48*tmp_qloop_59; + const real_t tmp_qloop_62 = tmp_qloop_48*tmp_qloop_61; + const real_t jac_blending_inv_1_1 = jac_blending_0_0*tmp_qloop_22; + const real_t tmp_qloop_57 = tmp_qloop_46*(jac_affine_inv_0_0_GRAY*jac_blending_inv_0_1 + jac_affine_inv_0_1_GRAY*jac_blending_inv_1_1) + tmp_qloop_47*(jac_affine_inv_1_0_GRAY*jac_blending_inv_0_1 + jac_affine_inv_1_1_GRAY*jac_blending_inv_1_1); + const real_t tmp_qloop_58 = tmp_qloop_55*tmp_qloop_57; + const real_t tmp_qloop_63 = tmp_qloop_57*tmp_qloop_59; + const real_t tmp_qloop_64 = tmp_qloop_57*tmp_qloop_61; + const real_t hessian_blending_0_0_0 = tmp_qloop_14 - tmp_qloop_25 + tmp_qloop_28; + const real_t hessian_blending_1_0_0 = -tmp_qloop_26*(tmp_qloop_3*tmp_qloop_3*tmp_qloop_3) + tmp_qloop_29*tmp_qloop_4 + tmp_qloop_3*tmp_qloop_32 - tmp_qloop_31; + const real_t hessian_blending_0_0_1 = tmp_qloop_1*tmp_qloop_11*tmp_qloop_15*tmp_qloop_30*-2.0 - tmp_qloop_34; + const real_t hessian_blending_1_0_1 = tmp_qloop_1*tmp_qloop_10*tmp_qloop_6*tmp_qloop_9*1.0 - tmp_qloop_20 - tmp_qloop_28 - tmp_qloop_35; + const real_t hessian_blending_0_1_0 = tmp_qloop_10*tmp_qloop_15*tmp_qloop_2*tmp_qloop_8*tmp_qloop_9*1.0 - tmp_qloop_19 - tmp_qloop_31 - tmp_qloop_34; + const real_t hessian_blending_1_1_0 = -tmp_qloop_20 + tmp_qloop_27 - tmp_qloop_35 + tmp_qloop_36; + const real_t hessian_blending_0_1_1 = tmp_qloop_0*tmp_qloop_32 - (tmp_qloop_0*tmp_qloop_0*tmp_qloop_0)*tmp_qloop_26 + tmp_qloop_25 + tmp_qloop_36; + const real_t hessian_blending_1_1_1 = tmp_qloop_10*tmp_qloop_15*tmp_qloop_2*tmp_qloop_7*tmp_qloop_9*1.0 + tmp_qloop_10*tmp_qloop_15*tmp_qloop_4*tmp_qloop_8*tmp_qloop_9*1.0 - tmp_qloop_19 - tmp_qloop_33; + const real_t q_tmp_0_0 = tmp_qloop_43*tmp_qloop_56; + const real_t q_tmp_0_1 = tmp_qloop_49*tmp_qloop_56; + const real_t q_tmp_0_2 = tmp_qloop_50*tmp_qloop_56; + const real_t q_tmp_0_3 = tmp_qloop_38*tmp_qloop_56; + const real_t q_tmp_0_4 = tmp_qloop_51*tmp_qloop_56; + const real_t q_tmp_0_5 = tmp_qloop_53*tmp_qloop_56; + const real_t q_tmp_0_6 = tmp_qloop_43*tmp_qloop_58; + const real_t q_tmp_0_7 = tmp_qloop_49*tmp_qloop_58; + const real_t q_tmp_0_8 = tmp_qloop_50*tmp_qloop_58; + const real_t q_tmp_0_9 = tmp_qloop_38*tmp_qloop_58; + const real_t q_tmp_0_10 = tmp_qloop_51*tmp_qloop_58; + const real_t q_tmp_0_11 = tmp_qloop_53*tmp_qloop_58; + const real_t q_tmp_1_0 = tmp_qloop_43*tmp_qloop_60; + const real_t q_tmp_1_1 = tmp_qloop_49*tmp_qloop_60; + const real_t q_tmp_1_2 = tmp_qloop_50*tmp_qloop_60; + const real_t q_tmp_1_3 = tmp_qloop_52*tmp_qloop_62; + const real_t q_tmp_1_4 = tmp_qloop_51*tmp_qloop_60; + const real_t q_tmp_1_5 = tmp_qloop_53*tmp_qloop_60; + const real_t q_tmp_1_6 = tmp_qloop_43*tmp_qloop_63; + const real_t q_tmp_1_7 = tmp_qloop_49*tmp_qloop_63; + const real_t q_tmp_1_8 = tmp_qloop_50*tmp_qloop_63; + const real_t q_tmp_1_9 = tmp_qloop_52*tmp_qloop_64; + const real_t q_tmp_1_10 = tmp_qloop_51*tmp_qloop_63; + const real_t q_tmp_1_11 = tmp_qloop_53*tmp_qloop_63; + const real_t q_tmp_2_0 = tmp_qloop_43*tmp_qloop_62; + const real_t q_tmp_2_1 = tmp_qloop_49*tmp_qloop_62; + const real_t q_tmp_2_2 = tmp_qloop_50*tmp_qloop_62; + const real_t q_tmp_2_3 = tmp_qloop_48*tmp_qloop_65; + const real_t q_tmp_2_4 = tmp_qloop_51*tmp_qloop_62; + const real_t q_tmp_2_5 = tmp_qloop_53*tmp_qloop_62; + const real_t q_tmp_2_6 = tmp_qloop_43*tmp_qloop_64; + const real_t q_tmp_2_7 = tmp_qloop_49*tmp_qloop_64; + const real_t q_tmp_2_8 = tmp_qloop_50*tmp_qloop_64; + const real_t q_tmp_2_9 = tmp_qloop_57*tmp_qloop_65; + const real_t q_tmp_2_10 = tmp_qloop_51*tmp_qloop_64; + const real_t q_tmp_2_11 = tmp_qloop_53*tmp_qloop_64; + q_acc_0_0 = q_acc_0_0 + q_tmp_0_0; + q_acc_0_1 = q_acc_0_1 + q_tmp_0_1; + q_acc_0_2 = q_acc_0_2 + q_tmp_0_2; + q_acc_0_3 = q_acc_0_3 + q_tmp_0_3; + q_acc_0_4 = q_acc_0_4 + q_tmp_0_4; + q_acc_0_5 = q_acc_0_5 + q_tmp_0_5; + q_acc_0_6 = q_acc_0_6 + q_tmp_0_6; + q_acc_0_7 = q_acc_0_7 + q_tmp_0_7; + q_acc_0_8 = q_acc_0_8 + q_tmp_0_8; + q_acc_0_9 = q_acc_0_9 + q_tmp_0_9; + q_acc_0_10 = q_acc_0_10 + q_tmp_0_10; + q_acc_0_11 = q_acc_0_11 + q_tmp_0_11; + q_acc_1_0 = q_acc_1_0 + q_tmp_1_0; + q_acc_1_1 = q_acc_1_1 + q_tmp_1_1; + q_acc_1_2 = q_acc_1_2 + q_tmp_1_2; + q_acc_1_3 = q_acc_1_3 + q_tmp_1_3; + q_acc_1_4 = q_acc_1_4 + q_tmp_1_4; + q_acc_1_5 = q_acc_1_5 + q_tmp_1_5; + q_acc_1_6 = q_acc_1_6 + q_tmp_1_6; + q_acc_1_7 = q_acc_1_7 + q_tmp_1_7; + q_acc_1_8 = q_acc_1_8 + q_tmp_1_8; + q_acc_1_9 = q_acc_1_9 + q_tmp_1_9; + q_acc_1_10 = q_acc_1_10 + q_tmp_1_10; + q_acc_1_11 = q_acc_1_11 + q_tmp_1_11; + q_acc_2_0 = q_acc_2_0 + q_tmp_2_0; + q_acc_2_1 = q_acc_2_1 + q_tmp_2_1; + q_acc_2_2 = q_acc_2_2 + q_tmp_2_2; + q_acc_2_3 = q_acc_2_3 + q_tmp_2_3; + q_acc_2_4 = q_acc_2_4 + q_tmp_2_4; + q_acc_2_5 = q_acc_2_5 + q_tmp_2_5; + q_acc_2_6 = q_acc_2_6 + q_tmp_2_6; + q_acc_2_7 = q_acc_2_7 + q_tmp_2_7; + q_acc_2_8 = q_acc_2_8 + q_tmp_2_8; + q_acc_2_9 = q_acc_2_9 + q_tmp_2_9; + q_acc_2_10 = q_acc_2_10 + q_tmp_2_10; + q_acc_2_11 = q_acc_2_11 + q_tmp_2_11; + } + const real_t elMatVec_0 = q_acc_0_0*src_dof_0 + q_acc_0_1*src_dof_1 + q_acc_0_10*src_dof_10 + q_acc_0_11*src_dof_11 + q_acc_0_2*src_dof_2 + q_acc_0_3*src_dof_3 + q_acc_0_4*src_dof_4 + q_acc_0_5*src_dof_5 + q_acc_0_6*src_dof_6 + q_acc_0_7*src_dof_7 + q_acc_0_8*src_dof_8 + q_acc_0_9*src_dof_9; + const real_t elMatVec_1 = q_acc_1_0*src_dof_0 + q_acc_1_1*src_dof_1 + q_acc_1_10*src_dof_10 + q_acc_1_11*src_dof_11 + q_acc_1_2*src_dof_2 + q_acc_1_3*src_dof_3 + q_acc_1_4*src_dof_4 + q_acc_1_5*src_dof_5 + q_acc_1_6*src_dof_6 + q_acc_1_7*src_dof_7 + q_acc_1_8*src_dof_8 + q_acc_1_9*src_dof_9; + const real_t elMatVec_2 = q_acc_2_0*src_dof_0 + q_acc_2_1*src_dof_1 + q_acc_2_10*src_dof_10 + q_acc_2_11*src_dof_11 + q_acc_2_2*src_dof_2 + q_acc_2_3*src_dof_3 + q_acc_2_4*src_dof_4 + q_acc_2_5*src_dof_5 + q_acc_2_6*src_dof_6 + q_acc_2_7*src_dof_7 + q_acc_2_8*src_dof_8 + q_acc_2_9*src_dof_9; + _data_dst[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))] = elMatVec_0 + _data_dst[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]; + _data_dst[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1] = elMatVec_1 + _data_dst[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + _data_dst[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatVec_2 + _data_dst[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + } + } + } + const real_t tmp_coords_jac_0_BLUE = 1.0 / (micro_edges_per_macro_edge_float)*1.0; + const real_t tmp_coords_jac_1_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0); + const real_t tmp_coords_jac_2_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1); + const real_t tmp_coords_jac_3_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0); + const real_t tmp_coords_jac_4_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1); + const real_t p_affine_const_0_0_BLUE = tmp_coords_jac_1_BLUE; + const real_t p_affine_const_0_1_BLUE = tmp_coords_jac_2_BLUE; + const real_t p_affine_const_1_0_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_3_BLUE; + const real_t p_affine_const_1_1_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_4_BLUE; + const real_t p_affine_const_2_0_BLUE = tmp_coords_jac_1_BLUE + tmp_coords_jac_3_BLUE; + const real_t p_affine_const_2_1_BLUE = tmp_coords_jac_2_BLUE + tmp_coords_jac_4_BLUE; + const real_t jac_affine_0_0_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_1_0_BLUE; + const real_t jac_affine_0_1_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_2_0_BLUE; + const real_t jac_affine_1_0_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_1_1_BLUE; + const real_t jac_affine_1_1_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_2_1_BLUE; + const real_t tmp_coords_jac_5_BLUE = jac_affine_0_0_BLUE*jac_affine_1_1_BLUE - jac_affine_0_1_BLUE*jac_affine_1_0_BLUE; + const real_t tmp_coords_jac_6_BLUE = 1.0 / (tmp_coords_jac_5_BLUE); + const real_t jac_affine_inv_0_0_BLUE = jac_affine_1_1_BLUE*tmp_coords_jac_6_BLUE; + const real_t jac_affine_inv_0_1_BLUE = -jac_affine_0_1_BLUE*tmp_coords_jac_6_BLUE; + const real_t jac_affine_inv_1_0_BLUE = -jac_affine_1_0_BLUE*tmp_coords_jac_6_BLUE; + const real_t jac_affine_inv_1_1_BLUE = jac_affine_0_0_BLUE*tmp_coords_jac_6_BLUE; + const real_t abs_det_jac_affine_BLUE = abs(tmp_coords_jac_5_BLUE); + { + /* FaceType.BLUE */ + for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1) + { + for (int64_t ctr_0 = 0; ctr_0 < (int64_t)((-ctr_1 + micro_edges_per_macro_edge - 1) / (4)) * (4); ctr_0 += 4) + { + + const int64_t phantom_ctr_0 = ctr_0; + real_t _data_float_loop_ctr_array_dim_0[4]; + _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0; + _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1; + _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2; + _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3; + real_t _data_float_loop_ctr_array_dim_1[4]; + _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1; + + const __m256d p_affine_0_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)); + const __m256d p_affine_0_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)); + const __m256d p_affine_1_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)); + const __m256d p_affine_1_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)); + const __m256d p_affine_2_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)); + const __m256d p_affine_2_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)); + const __m256d src_dof_0 = _mm256_loadu_pd(& _data_src_vertex_0[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]); + const __m256d src_dof_1 = _mm256_loadu_pd(& _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]); + const __m256d src_dof_2 = _mm256_loadu_pd(& _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]); + const __m256d src_dof_3 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]); + const __m256d src_dof_4 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]); + const __m256d src_dof_5 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]); + const __m256d src_dof_6 = _mm256_loadu_pd(& _data_src_vertex_1[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]); + const __m256d src_dof_7 = _mm256_loadu_pd(& _data_src_vertex_1[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]); + const __m256d src_dof_8 = _mm256_loadu_pd(& _data_src_vertex_1[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]); + const __m256d src_dof_9 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]); + const __m256d src_dof_10 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]); + const __m256d src_dof_11 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]); + const __m256d rho_dof_0 = _mm256_loadu_pd(& _data_rhoVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]); + const __m256d rho_dof_1 = _mm256_loadu_pd(& _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]); + const __m256d rho_dof_2 = _mm256_loadu_pd(& _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]); + const __m256d rho_dof_3 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]); + const __m256d rho_dof_4 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]); + const __m256d rho_dof_5 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]); + __m256d q_acc_0_0 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_1 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_2 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_3 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_4 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_5 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_6 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_7 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_8 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_9 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_10 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_11 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_0 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_1 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_2 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_3 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_4 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_5 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_6 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_7 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_8 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_9 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_10 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_11 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_0 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_1 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_2 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_3 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_4 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_5 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_6 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_7 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_8 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_9 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_10 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_11 = _mm256_set_pd(0.0,0.0,0.0,0.0); + for (int64_t q = 0; q < 4; q += 1) + { + const __m256d tmp_qloop_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_1_0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_2_0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),p_affine_0_0); + const __m256d tmp_qloop_2 = _mm256_mul_pd(tmp_qloop_0,tmp_qloop_0); + const __m256d tmp_qloop_3 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_1_1),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_2_1),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),p_affine_0_1); + const __m256d tmp_qloop_4 = _mm256_mul_pd(tmp_qloop_3,tmp_qloop_3); + const __m256d tmp_qloop_5 = _mm256_add_pd(tmp_qloop_2,tmp_qloop_4); + const __m256d tmp_qloop_6 = _mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_sqrt_pd(tmp_qloop_5)); + const __m256d tmp_qloop_13 = _mm256_mul_pd(tmp_qloop_6,_mm256_set_pd(tmp_qloop_12,tmp_qloop_12,tmp_qloop_12,tmp_qloop_12)); + const __m256d tmp_qloop_14 = _mm256_mul_pd(tmp_qloop_13,_mm256_set_pd(tmp_qloop_1,tmp_qloop_1,tmp_qloop_1,tmp_qloop_1)); + const __m256d tmp_qloop_15 = _mm256_div_pd(_mm256_sqrt_pd(tmp_qloop_5),_mm256_mul_pd(tmp_qloop_5,tmp_qloop_5)); + const __m256d tmp_qloop_16 = _mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(rayVertex_0,rayVertex_0,rayVertex_0,rayVertex_0)),tmp_qloop_0),_mm256_set_pd(tmp_qloop_1,tmp_qloop_1,tmp_qloop_1,tmp_qloop_1)),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(rayVertex_1,rayVertex_1,rayVertex_1,rayVertex_1)),tmp_qloop_3),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_8,tmp_qloop_8,tmp_qloop_8,tmp_qloop_8))),_mm256_set_pd(tmp_qloop_11,tmp_qloop_11,tmp_qloop_11,tmp_qloop_11)),_mm256_set_pd(radRayVertex,radRayVertex,radRayVertex,radRayVertex)); + const __m256d tmp_qloop_17 = _mm256_mul_pd(tmp_qloop_15,tmp_qloop_16); + const __m256d tmp_qloop_18 = _mm256_mul_pd(tmp_qloop_17,_mm256_set_pd(1.0,1.0,1.0,1.0)); + const __m256d tmp_qloop_19 = _mm256_mul_pd(tmp_qloop_13,_mm256_set_pd(tmp_qloop_8,tmp_qloop_8,tmp_qloop_8,tmp_qloop_8)); + const __m256d tmp_qloop_20 = _mm256_mul_pd(tmp_qloop_0,tmp_qloop_18); + const __m256d tmp_qloop_23 = _mm256_mul_pd(tmp_qloop_15,_mm256_set_pd(tmp_qloop_12,tmp_qloop_12,tmp_qloop_12,tmp_qloop_12)); + const __m256d tmp_qloop_24 = _mm256_mul_pd(tmp_qloop_23,_mm256_set_pd(tmp_qloop_1,tmp_qloop_1,tmp_qloop_1,tmp_qloop_1)); + const __m256d tmp_qloop_25 = _mm256_mul_pd(tmp_qloop_2,tmp_qloop_24); + const __m256d tmp_qloop_26 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_16,_mm256_div_pd(_mm256_sqrt_pd(tmp_qloop_5),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_5,tmp_qloop_5),tmp_qloop_5))),_mm256_set_pd(3.0,3.0,3.0,3.0)); + const __m256d tmp_qloop_27 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_0,tmp_qloop_26),tmp_qloop_4); + const __m256d tmp_qloop_28 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_27,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_24,tmp_qloop_4)); + const __m256d tmp_qloop_29 = _mm256_mul_pd(tmp_qloop_23,_mm256_set_pd(tmp_qloop_7,tmp_qloop_7,tmp_qloop_7,tmp_qloop_7)); + const __m256d tmp_qloop_30 = _mm256_mul_pd(tmp_qloop_0,tmp_qloop_3); + const __m256d tmp_qloop_31 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_30); + const __m256d tmp_qloop_32 = _mm256_mul_pd(tmp_qloop_17,_mm256_set_pd(2.0,2.0,2.0,2.0)); + const __m256d tmp_qloop_33 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_2,tmp_qloop_26),tmp_qloop_3); + const __m256d tmp_qloop_34 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_33,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_18,tmp_qloop_3)); + const __m256d tmp_qloop_35 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_30); + const __m256d tmp_qloop_36 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_23,tmp_qloop_30),_mm256_set_pd(tmp_qloop_8,tmp_qloop_8,tmp_qloop_8,tmp_qloop_8)); + const __m256d tmp_qloop_37 = _mm256_mul_pd(_mm256_set_pd(4.0,4.0,4.0,4.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])); + const __m256d tmp_qloop_38 = _mm256_mul_pd(tmp_qloop_37,_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])); + const __m256d tmp_qloop_39 = _mm256_mul_pd(_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])); + const __m256d tmp_qloop_40 = _mm256_mul_pd(tmp_qloop_39,_mm256_set_pd(2.0,2.0,2.0,2.0)); + const __m256d tmp_qloop_41 = _mm256_mul_pd(_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])); + const __m256d tmp_qloop_42 = _mm256_mul_pd(tmp_qloop_41,_mm256_set_pd(2.0,2.0,2.0,2.0)); + const __m256d tmp_qloop_43 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),tmp_qloop_38),tmp_qloop_40),tmp_qloop_42); + const __m256d tmp_qloop_44 = _mm256_mul_pd(_mm256_set_pd(4.0,4.0,4.0,4.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])); + const __m256d tmp_qloop_45 = _mm256_mul_pd(rho_dof_0,_mm256_add_pd(_mm256_add_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),tmp_qloop_37),tmp_qloop_44)); + const __m256d tmp_qloop_46 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(rho_dof_1,_mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),tmp_qloop_37)),_mm256_mul_pd(rho_dof_5,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-8.0,-8.0,-8.0,-8.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(tmp_qloop_44,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_set_pd(4.0,4.0,4.0,4.0)))),_mm256_mul_pd(rho_dof_3,tmp_qloop_44)),_mm256_mul_pd(_mm256_mul_pd(rho_dof_4,tmp_qloop_44),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_45); + const __m256d tmp_qloop_47 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(rho_dof_2,_mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),tmp_qloop_44)),_mm256_mul_pd(rho_dof_4,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-8.0,-8.0,-8.0,-8.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])),_mm256_mul_pd(tmp_qloop_37,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_set_pd(4.0,4.0,4.0,4.0)))),_mm256_mul_pd(rho_dof_3,tmp_qloop_37)),_mm256_mul_pd(_mm256_mul_pd(rho_dof_5,tmp_qloop_37),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_45); + const __m256d tmp_qloop_49 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),tmp_qloop_40); + const __m256d tmp_qloop_50 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])),tmp_qloop_42); + const __m256d tmp_qloop_51 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_41,_mm256_set_pd(-4.0,-4.0,-4.0,-4.0)),_mm256_mul_pd(tmp_qloop_38,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_44); + const __m256d tmp_qloop_52 = _mm256_mul_pd(tmp_qloop_39,_mm256_set_pd(4.0,4.0,4.0,4.0)); + const __m256d tmp_qloop_53 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_38,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_52,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_37); + const __m256d jac_blending_0_0 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_0,tmp_qloop_14),_mm256_mul_pd(tmp_qloop_18,tmp_qloop_4)); + const __m256d jac_blending_0_1 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_0,tmp_qloop_19),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_0,tmp_qloop_15),tmp_qloop_16),tmp_qloop_3),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))); + const __m256d jac_blending_1_0 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_14,tmp_qloop_3),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_20,tmp_qloop_3),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))); + const __m256d jac_blending_1_1 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_19,tmp_qloop_3),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_15,tmp_qloop_16),tmp_qloop_2),_mm256_set_pd(1.0,1.0,1.0,1.0))); + const __m256d tmp_qloop_21 = _mm256_add_pd(_mm256_mul_pd(jac_blending_0_0,jac_blending_1_1),_mm256_mul_pd(_mm256_mul_pd(jac_blending_0_1,jac_blending_1_0),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))); + const __m256d tmp_qloop_22 = _mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),tmp_qloop_21); + const __m256d abs_det_jac_blending = tmp_qloop_21; + const __m256d tmp_qloop_54 = _mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(abs_det_jac_blending,_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(rho_dof_0,tmp_qloop_43),_mm256_mul_pd(rho_dof_1,tmp_qloop_49)),_mm256_mul_pd(rho_dof_2,tmp_qloop_50)),_mm256_mul_pd(rho_dof_3,tmp_qloop_38)),_mm256_mul_pd(rho_dof_4,tmp_qloop_51)),_mm256_mul_pd(rho_dof_5,tmp_qloop_53)))),_mm256_set_pd(_data_q_w[q],_data_q_w[q],_data_q_w[q],_data_q_w[q])),_mm256_set_pd(abs_det_jac_affine_BLUE,abs_det_jac_affine_BLUE,abs_det_jac_affine_BLUE,abs_det_jac_affine_BLUE)); + const __m256d tmp_qloop_55 = _mm256_mul_pd(tmp_qloop_54,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),_mm256_set_pd(1.0,1.0,1.0,1.0))); + const __m256d tmp_qloop_59 = _mm256_mul_pd(tmp_qloop_54,_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])); + const __m256d tmp_qloop_61 = _mm256_mul_pd(tmp_qloop_54,_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])); + const __m256d tmp_qloop_65 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_37,tmp_qloop_41),tmp_qloop_54); + const __m256d jac_blending_inv_0_0 = _mm256_mul_pd(jac_blending_1_1,tmp_qloop_22); + const __m256d jac_blending_inv_0_1 = _mm256_mul_pd(_mm256_mul_pd(jac_blending_0_1,tmp_qloop_22),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)); + const __m256d jac_blending_inv_1_0 = _mm256_mul_pd(_mm256_mul_pd(jac_blending_1_0,tmp_qloop_22),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)); + const __m256d tmp_qloop_48 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_46,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,_mm256_set_pd(jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE)),_mm256_mul_pd(jac_blending_inv_1_0,_mm256_set_pd(jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE)))),_mm256_mul_pd(tmp_qloop_47,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,_mm256_set_pd(jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE)),_mm256_mul_pd(jac_blending_inv_1_0,_mm256_set_pd(jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE))))); + const __m256d tmp_qloop_56 = _mm256_mul_pd(tmp_qloop_48,tmp_qloop_55); + const __m256d tmp_qloop_60 = _mm256_mul_pd(tmp_qloop_48,tmp_qloop_59); + const __m256d tmp_qloop_62 = _mm256_mul_pd(tmp_qloop_48,tmp_qloop_61); + const __m256d jac_blending_inv_1_1 = _mm256_mul_pd(jac_blending_0_0,tmp_qloop_22); + const __m256d tmp_qloop_57 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_46,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,_mm256_set_pd(jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE)),_mm256_mul_pd(jac_blending_inv_1_1,_mm256_set_pd(jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE)))),_mm256_mul_pd(tmp_qloop_47,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,_mm256_set_pd(jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE)),_mm256_mul_pd(jac_blending_inv_1_1,_mm256_set_pd(jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE))))); + const __m256d tmp_qloop_58 = _mm256_mul_pd(tmp_qloop_55,tmp_qloop_57); + const __m256d tmp_qloop_63 = _mm256_mul_pd(tmp_qloop_57,tmp_qloop_59); + const __m256d tmp_qloop_64 = _mm256_mul_pd(tmp_qloop_57,tmp_qloop_61); + const __m256d hessian_blending_0_0_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_25,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),tmp_qloop_14),tmp_qloop_28); + const __m256d hessian_blending_1_0_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_31,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_29,tmp_qloop_4)),_mm256_mul_pd(tmp_qloop_3,tmp_qloop_32)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_26,_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_3,tmp_qloop_3),tmp_qloop_3)),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))); + const __m256d hessian_blending_0_0_1 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_34,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_15,tmp_qloop_30),_mm256_set_pd(-2.0,-2.0,-2.0,-2.0)),_mm256_set_pd(tmp_qloop_1,tmp_qloop_1,tmp_qloop_1,tmp_qloop_1)),_mm256_set_pd(tmp_qloop_11,tmp_qloop_11,tmp_qloop_11,tmp_qloop_11))); + const __m256d hessian_blending_1_0_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_20,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_28,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(tmp_qloop_35,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_6,_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_1,tmp_qloop_1,tmp_qloop_1,tmp_qloop_1)),_mm256_set_pd(tmp_qloop_10,tmp_qloop_10,tmp_qloop_10,tmp_qloop_10)),_mm256_set_pd(tmp_qloop_9,tmp_qloop_9,tmp_qloop_9,tmp_qloop_9))); + const __m256d hessian_blending_0_1_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_19,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_31,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(tmp_qloop_34,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_15,tmp_qloop_2),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_10,tmp_qloop_10,tmp_qloop_10,tmp_qloop_10)),_mm256_set_pd(tmp_qloop_8,tmp_qloop_8,tmp_qloop_8,tmp_qloop_8)),_mm256_set_pd(tmp_qloop_9,tmp_qloop_9,tmp_qloop_9,tmp_qloop_9))); + const __m256d hessian_blending_1_1_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_20,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_35,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_27),tmp_qloop_36); + const __m256d hessian_blending_0_1_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_0,tmp_qloop_32),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_0,tmp_qloop_0),tmp_qloop_0),tmp_qloop_26),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_25),tmp_qloop_36); + const __m256d hessian_blending_1_1_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_19,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_33,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_15,tmp_qloop_2),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_10,tmp_qloop_10,tmp_qloop_10,tmp_qloop_10)),_mm256_set_pd(tmp_qloop_7,tmp_qloop_7,tmp_qloop_7,tmp_qloop_7)),_mm256_set_pd(tmp_qloop_9,tmp_qloop_9,tmp_qloop_9,tmp_qloop_9))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_15,tmp_qloop_4),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_10,tmp_qloop_10,tmp_qloop_10,tmp_qloop_10)),_mm256_set_pd(tmp_qloop_8,tmp_qloop_8,tmp_qloop_8,tmp_qloop_8)),_mm256_set_pd(tmp_qloop_9,tmp_qloop_9,tmp_qloop_9,tmp_qloop_9))); + const __m256d q_tmp_0_0 = _mm256_mul_pd(tmp_qloop_43,tmp_qloop_56); + const __m256d q_tmp_0_1 = _mm256_mul_pd(tmp_qloop_49,tmp_qloop_56); + const __m256d q_tmp_0_2 = _mm256_mul_pd(tmp_qloop_50,tmp_qloop_56); + const __m256d q_tmp_0_3 = _mm256_mul_pd(tmp_qloop_38,tmp_qloop_56); + const __m256d q_tmp_0_4 = _mm256_mul_pd(tmp_qloop_51,tmp_qloop_56); + const __m256d q_tmp_0_5 = _mm256_mul_pd(tmp_qloop_53,tmp_qloop_56); + const __m256d q_tmp_0_6 = _mm256_mul_pd(tmp_qloop_43,tmp_qloop_58); + const __m256d q_tmp_0_7 = _mm256_mul_pd(tmp_qloop_49,tmp_qloop_58); + const __m256d q_tmp_0_8 = _mm256_mul_pd(tmp_qloop_50,tmp_qloop_58); + const __m256d q_tmp_0_9 = _mm256_mul_pd(tmp_qloop_38,tmp_qloop_58); + const __m256d q_tmp_0_10 = _mm256_mul_pd(tmp_qloop_51,tmp_qloop_58); + const __m256d q_tmp_0_11 = _mm256_mul_pd(tmp_qloop_53,tmp_qloop_58); + const __m256d q_tmp_1_0 = _mm256_mul_pd(tmp_qloop_43,tmp_qloop_60); + const __m256d q_tmp_1_1 = _mm256_mul_pd(tmp_qloop_49,tmp_qloop_60); + const __m256d q_tmp_1_2 = _mm256_mul_pd(tmp_qloop_50,tmp_qloop_60); + const __m256d q_tmp_1_3 = _mm256_mul_pd(tmp_qloop_52,tmp_qloop_62); + const __m256d q_tmp_1_4 = _mm256_mul_pd(tmp_qloop_51,tmp_qloop_60); + const __m256d q_tmp_1_5 = _mm256_mul_pd(tmp_qloop_53,tmp_qloop_60); + const __m256d q_tmp_1_6 = _mm256_mul_pd(tmp_qloop_43,tmp_qloop_63); + const __m256d q_tmp_1_7 = _mm256_mul_pd(tmp_qloop_49,tmp_qloop_63); + const __m256d q_tmp_1_8 = _mm256_mul_pd(tmp_qloop_50,tmp_qloop_63); + const __m256d q_tmp_1_9 = _mm256_mul_pd(tmp_qloop_52,tmp_qloop_64); + const __m256d q_tmp_1_10 = _mm256_mul_pd(tmp_qloop_51,tmp_qloop_63); + const __m256d q_tmp_1_11 = _mm256_mul_pd(tmp_qloop_53,tmp_qloop_63); + const __m256d q_tmp_2_0 = _mm256_mul_pd(tmp_qloop_43,tmp_qloop_62); + const __m256d q_tmp_2_1 = _mm256_mul_pd(tmp_qloop_49,tmp_qloop_62); + const __m256d q_tmp_2_2 = _mm256_mul_pd(tmp_qloop_50,tmp_qloop_62); + const __m256d q_tmp_2_3 = _mm256_mul_pd(tmp_qloop_48,tmp_qloop_65); + const __m256d q_tmp_2_4 = _mm256_mul_pd(tmp_qloop_51,tmp_qloop_62); + const __m256d q_tmp_2_5 = _mm256_mul_pd(tmp_qloop_53,tmp_qloop_62); + const __m256d q_tmp_2_6 = _mm256_mul_pd(tmp_qloop_43,tmp_qloop_64); + const __m256d q_tmp_2_7 = _mm256_mul_pd(tmp_qloop_49,tmp_qloop_64); + const __m256d q_tmp_2_8 = _mm256_mul_pd(tmp_qloop_50,tmp_qloop_64); + const __m256d q_tmp_2_9 = _mm256_mul_pd(tmp_qloop_57,tmp_qloop_65); + const __m256d q_tmp_2_10 = _mm256_mul_pd(tmp_qloop_51,tmp_qloop_64); + const __m256d q_tmp_2_11 = _mm256_mul_pd(tmp_qloop_53,tmp_qloop_64); + q_acc_0_0 = _mm256_add_pd(q_acc_0_0,q_tmp_0_0); + q_acc_0_1 = _mm256_add_pd(q_acc_0_1,q_tmp_0_1); + q_acc_0_2 = _mm256_add_pd(q_acc_0_2,q_tmp_0_2); + q_acc_0_3 = _mm256_add_pd(q_acc_0_3,q_tmp_0_3); + q_acc_0_4 = _mm256_add_pd(q_acc_0_4,q_tmp_0_4); + q_acc_0_5 = _mm256_add_pd(q_acc_0_5,q_tmp_0_5); + q_acc_0_6 = _mm256_add_pd(q_acc_0_6,q_tmp_0_6); + q_acc_0_7 = _mm256_add_pd(q_acc_0_7,q_tmp_0_7); + q_acc_0_8 = _mm256_add_pd(q_acc_0_8,q_tmp_0_8); + q_acc_0_9 = _mm256_add_pd(q_acc_0_9,q_tmp_0_9); + q_acc_0_10 = _mm256_add_pd(q_acc_0_10,q_tmp_0_10); + q_acc_0_11 = _mm256_add_pd(q_acc_0_11,q_tmp_0_11); + q_acc_1_0 = _mm256_add_pd(q_acc_1_0,q_tmp_1_0); + q_acc_1_1 = _mm256_add_pd(q_acc_1_1,q_tmp_1_1); + q_acc_1_2 = _mm256_add_pd(q_acc_1_2,q_tmp_1_2); + q_acc_1_3 = _mm256_add_pd(q_acc_1_3,q_tmp_1_3); + q_acc_1_4 = _mm256_add_pd(q_acc_1_4,q_tmp_1_4); + q_acc_1_5 = _mm256_add_pd(q_acc_1_5,q_tmp_1_5); + q_acc_1_6 = _mm256_add_pd(q_acc_1_6,q_tmp_1_6); + q_acc_1_7 = _mm256_add_pd(q_acc_1_7,q_tmp_1_7); + q_acc_1_8 = _mm256_add_pd(q_acc_1_8,q_tmp_1_8); + q_acc_1_9 = _mm256_add_pd(q_acc_1_9,q_tmp_1_9); + q_acc_1_10 = _mm256_add_pd(q_acc_1_10,q_tmp_1_10); + q_acc_1_11 = _mm256_add_pd(q_acc_1_11,q_tmp_1_11); + q_acc_2_0 = _mm256_add_pd(q_acc_2_0,q_tmp_2_0); + q_acc_2_1 = _mm256_add_pd(q_acc_2_1,q_tmp_2_1); + q_acc_2_2 = _mm256_add_pd(q_acc_2_2,q_tmp_2_2); + q_acc_2_3 = _mm256_add_pd(q_acc_2_3,q_tmp_2_3); + q_acc_2_4 = _mm256_add_pd(q_acc_2_4,q_tmp_2_4); + q_acc_2_5 = _mm256_add_pd(q_acc_2_5,q_tmp_2_5); + q_acc_2_6 = _mm256_add_pd(q_acc_2_6,q_tmp_2_6); + q_acc_2_7 = _mm256_add_pd(q_acc_2_7,q_tmp_2_7); + q_acc_2_8 = _mm256_add_pd(q_acc_2_8,q_tmp_2_8); + q_acc_2_9 = _mm256_add_pd(q_acc_2_9,q_tmp_2_9); + q_acc_2_10 = _mm256_add_pd(q_acc_2_10,q_tmp_2_10); + q_acc_2_11 = _mm256_add_pd(q_acc_2_11,q_tmp_2_11); + } + const __m256d elMatVec_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_0_0,src_dof_0),_mm256_mul_pd(q_acc_0_1,src_dof_1)),_mm256_mul_pd(q_acc_0_10,src_dof_10)),_mm256_mul_pd(q_acc_0_11,src_dof_11)),_mm256_mul_pd(q_acc_0_2,src_dof_2)),_mm256_mul_pd(q_acc_0_3,src_dof_3)),_mm256_mul_pd(q_acc_0_4,src_dof_4)),_mm256_mul_pd(q_acc_0_5,src_dof_5)),_mm256_mul_pd(q_acc_0_6,src_dof_6)),_mm256_mul_pd(q_acc_0_7,src_dof_7)),_mm256_mul_pd(q_acc_0_8,src_dof_8)),_mm256_mul_pd(q_acc_0_9,src_dof_9)); + const __m256d elMatVec_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_1_0,src_dof_0),_mm256_mul_pd(q_acc_1_1,src_dof_1)),_mm256_mul_pd(q_acc_1_10,src_dof_10)),_mm256_mul_pd(q_acc_1_11,src_dof_11)),_mm256_mul_pd(q_acc_1_2,src_dof_2)),_mm256_mul_pd(q_acc_1_3,src_dof_3)),_mm256_mul_pd(q_acc_1_4,src_dof_4)),_mm256_mul_pd(q_acc_1_5,src_dof_5)),_mm256_mul_pd(q_acc_1_6,src_dof_6)),_mm256_mul_pd(q_acc_1_7,src_dof_7)),_mm256_mul_pd(q_acc_1_8,src_dof_8)),_mm256_mul_pd(q_acc_1_9,src_dof_9)); + const __m256d elMatVec_2 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_2_0,src_dof_0),_mm256_mul_pd(q_acc_2_1,src_dof_1)),_mm256_mul_pd(q_acc_2_10,src_dof_10)),_mm256_mul_pd(q_acc_2_11,src_dof_11)),_mm256_mul_pd(q_acc_2_2,src_dof_2)),_mm256_mul_pd(q_acc_2_3,src_dof_3)),_mm256_mul_pd(q_acc_2_4,src_dof_4)),_mm256_mul_pd(q_acc_2_5,src_dof_5)),_mm256_mul_pd(q_acc_2_6,src_dof_6)),_mm256_mul_pd(q_acc_2_7,src_dof_7)),_mm256_mul_pd(q_acc_2_8,src_dof_8)),_mm256_mul_pd(q_acc_2_9,src_dof_9)); + _mm256_storeu_pd(&_data_dst[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1],_mm256_add_pd(elMatVec_0,_mm256_loadu_pd(& _data_dst[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]))); + _mm256_storeu_pd(&_data_dst[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))],_mm256_add_pd(elMatVec_1,_mm256_loadu_pd(& _data_dst[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]))); + _mm256_storeu_pd(&_data_dst[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1],_mm256_add_pd(elMatVec_2,_mm256_loadu_pd(& _data_dst[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]))); + } + for (int64_t ctr_0 = (int64_t)((-ctr_1 + micro_edges_per_macro_edge - 1) / (4)) * (4); ctr_0 < -ctr_1 + micro_edges_per_macro_edge - 1; ctr_0 += 1) + { + + const int64_t phantom_ctr_0 = ctr_0; + real_t _data_float_loop_ctr_array_dim_0[4]; + _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0; + _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1; + _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2; + _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3; + real_t _data_float_loop_ctr_array_dim_1[4]; + _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1; + + const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; + const real_t src_dof_0 = _data_src_vertex_0[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + const real_t src_dof_1 = _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t src_dof_2 = _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]; + const real_t src_dof_3 = _data_src_edge_0[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t src_dof_4 = _data_src_edge_0[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]; + const real_t src_dof_5 = _data_src_edge_0[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t src_dof_6 = _data_src_vertex_1[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + const real_t src_dof_7 = _data_src_vertex_1[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t src_dof_8 = _data_src_vertex_1[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]; + const real_t src_dof_9 = _data_src_edge_1[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t src_dof_10 = _data_src_edge_1[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]; + const real_t src_dof_11 = _data_src_edge_1[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t rho_dof_0 = _data_rhoVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + const real_t rho_dof_1 = _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t rho_dof_2 = _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]; + const real_t rho_dof_3 = _data_rhoEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t rho_dof_4 = _data_rhoEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]; + const real_t rho_dof_5 = _data_rhoEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + real_t q_acc_0_0 = 0.0; + real_t q_acc_0_1 = 0.0; + real_t q_acc_0_2 = 0.0; + real_t q_acc_0_3 = 0.0; + real_t q_acc_0_4 = 0.0; + real_t q_acc_0_5 = 0.0; + real_t q_acc_0_6 = 0.0; + real_t q_acc_0_7 = 0.0; + real_t q_acc_0_8 = 0.0; + real_t q_acc_0_9 = 0.0; + real_t q_acc_0_10 = 0.0; + real_t q_acc_0_11 = 0.0; + real_t q_acc_1_0 = 0.0; + real_t q_acc_1_1 = 0.0; + real_t q_acc_1_2 = 0.0; + real_t q_acc_1_3 = 0.0; + real_t q_acc_1_4 = 0.0; + real_t q_acc_1_5 = 0.0; + real_t q_acc_1_6 = 0.0; + real_t q_acc_1_7 = 0.0; + real_t q_acc_1_8 = 0.0; + real_t q_acc_1_9 = 0.0; + real_t q_acc_1_10 = 0.0; + real_t q_acc_1_11 = 0.0; + real_t q_acc_2_0 = 0.0; + real_t q_acc_2_1 = 0.0; + real_t q_acc_2_2 = 0.0; + real_t q_acc_2_3 = 0.0; + real_t q_acc_2_4 = 0.0; + real_t q_acc_2_5 = 0.0; + real_t q_acc_2_6 = 0.0; + real_t q_acc_2_7 = 0.0; + real_t q_acc_2_8 = 0.0; + real_t q_acc_2_9 = 0.0; + real_t q_acc_2_10 = 0.0; + real_t q_acc_2_11 = 0.0; + for (int64_t q = 0; q < 4; q += 1) + { + const real_t tmp_qloop_0 = p_affine_0_0 + (-p_affine_0_0 + p_affine_1_0)*_data_q_p_0[q] + (-p_affine_0_0 + p_affine_2_0)*_data_q_p_1[q]; + const real_t tmp_qloop_2 = (tmp_qloop_0*tmp_qloop_0); + const real_t tmp_qloop_3 = p_affine_0_1 + (-p_affine_0_1 + p_affine_1_1)*_data_q_p_0[q] + (-p_affine_0_1 + p_affine_2_1)*_data_q_p_1[q]; + const real_t tmp_qloop_4 = (tmp_qloop_3*tmp_qloop_3); + const real_t tmp_qloop_5 = tmp_qloop_2 + tmp_qloop_4; + const real_t tmp_qloop_6 = pow(tmp_qloop_5, -0.50000000000000000); + const real_t tmp_qloop_13 = tmp_qloop_12*tmp_qloop_6; + const real_t tmp_qloop_14 = tmp_qloop_1*tmp_qloop_13; + const real_t tmp_qloop_15 = pow(tmp_qloop_5, -1.5000000000000000); + const real_t tmp_qloop_16 = radRayVertex + tmp_qloop_11*(tmp_qloop_1*(-rayVertex_0 + tmp_qloop_0) - tmp_qloop_8*(-rayVertex_1 + tmp_qloop_3)); + const real_t tmp_qloop_17 = tmp_qloop_15*tmp_qloop_16; + const real_t tmp_qloop_18 = tmp_qloop_17*1.0; + const real_t tmp_qloop_19 = tmp_qloop_13*tmp_qloop_8; + const real_t tmp_qloop_20 = tmp_qloop_0*tmp_qloop_18; + const real_t tmp_qloop_23 = tmp_qloop_12*tmp_qloop_15; + const real_t tmp_qloop_24 = tmp_qloop_1*tmp_qloop_23; + const real_t tmp_qloop_25 = tmp_qloop_2*tmp_qloop_24; + const real_t tmp_qloop_26 = tmp_qloop_16*pow(tmp_qloop_5, -2.5000000000000000)*3.0; + const real_t tmp_qloop_27 = tmp_qloop_0*tmp_qloop_26*tmp_qloop_4; + const real_t tmp_qloop_28 = tmp_qloop_24*tmp_qloop_4 - tmp_qloop_27; + const real_t tmp_qloop_29 = tmp_qloop_23*tmp_qloop_7; + const real_t tmp_qloop_30 = tmp_qloop_0*tmp_qloop_3; + const real_t tmp_qloop_31 = tmp_qloop_24*tmp_qloop_30; + const real_t tmp_qloop_32 = tmp_qloop_17*2.0; + const real_t tmp_qloop_33 = tmp_qloop_2*tmp_qloop_26*tmp_qloop_3; + const real_t tmp_qloop_34 = tmp_qloop_18*tmp_qloop_3 - tmp_qloop_33; + const real_t tmp_qloop_35 = tmp_qloop_29*tmp_qloop_30; + const real_t tmp_qloop_36 = tmp_qloop_23*tmp_qloop_30*tmp_qloop_8; + const real_t tmp_qloop_37 = 4.0*_data_q_p_0[q]; + const real_t tmp_qloop_38 = tmp_qloop_37*_data_q_p_1[q]; + const real_t tmp_qloop_39 = (_data_q_p_0[q]*_data_q_p_0[q]); + const real_t tmp_qloop_40 = tmp_qloop_39*2.0; + const real_t tmp_qloop_41 = (_data_q_p_1[q]*_data_q_p_1[q]); + const real_t tmp_qloop_42 = tmp_qloop_41*2.0; + const real_t tmp_qloop_43 = tmp_qloop_38 + tmp_qloop_40 + tmp_qloop_42 - 3.0*_data_q_p_0[q] - 3.0*_data_q_p_1[q] + 1.0; + const real_t tmp_qloop_44 = 4.0*_data_q_p_1[q]; + const real_t tmp_qloop_45 = rho_dof_0*(tmp_qloop_37 + tmp_qloop_44 - 3.0); + const real_t tmp_qloop_46 = rho_dof_1*(tmp_qloop_37 - 1.0) + rho_dof_3*tmp_qloop_44 - rho_dof_4*tmp_qloop_44 + rho_dof_5*(-tmp_qloop_44 - 8.0*_data_q_p_0[q] + 4.0) + tmp_qloop_45; + const real_t tmp_qloop_47 = rho_dof_2*(tmp_qloop_44 - 1.0) + rho_dof_3*tmp_qloop_37 + rho_dof_4*(-tmp_qloop_37 - 8.0*_data_q_p_1[q] + 4.0) - rho_dof_5*tmp_qloop_37 + tmp_qloop_45; + const real_t tmp_qloop_49 = tmp_qloop_40 - _data_q_p_0[q]; + const real_t tmp_qloop_50 = tmp_qloop_42 - _data_q_p_1[q]; + const real_t tmp_qloop_51 = -tmp_qloop_38 + tmp_qloop_41*-4.0 + tmp_qloop_44; + const real_t tmp_qloop_52 = tmp_qloop_39*4.0; + const real_t tmp_qloop_53 = tmp_qloop_37 - tmp_qloop_38 - tmp_qloop_52; + const real_t jac_blending_0_0 = tmp_qloop_0*tmp_qloop_14 + tmp_qloop_18*tmp_qloop_4; + const real_t jac_blending_0_1 = -tmp_qloop_0*tmp_qloop_15*tmp_qloop_16*tmp_qloop_3 - tmp_qloop_0*tmp_qloop_19; + const real_t jac_blending_1_0 = tmp_qloop_14*tmp_qloop_3 - tmp_qloop_20*tmp_qloop_3; + const real_t jac_blending_1_1 = tmp_qloop_15*tmp_qloop_16*tmp_qloop_2*1.0 - tmp_qloop_19*tmp_qloop_3; + const real_t tmp_qloop_21 = jac_blending_0_0*jac_blending_1_1 - jac_blending_0_1*jac_blending_1_0; + const real_t tmp_qloop_22 = 1.0 / (tmp_qloop_21); + const real_t abs_det_jac_blending = tmp_qloop_21; + const real_t tmp_qloop_54 = abs_det_jac_affine_BLUE*abs_det_jac_blending*1.0 / (rho_dof_0*tmp_qloop_43 + rho_dof_1*tmp_qloop_49 + rho_dof_2*tmp_qloop_50 + rho_dof_3*tmp_qloop_38 + rho_dof_4*tmp_qloop_51 + rho_dof_5*tmp_qloop_53)*_data_q_w[q]; + const real_t tmp_qloop_55 = tmp_qloop_54*(1.0 - _data_q_p_0[q] - _data_q_p_1[q]); + const real_t tmp_qloop_59 = tmp_qloop_54*_data_q_p_0[q]; + const real_t tmp_qloop_61 = tmp_qloop_54*_data_q_p_1[q]; + const real_t tmp_qloop_65 = tmp_qloop_37*tmp_qloop_41*tmp_qloop_54; + const real_t jac_blending_inv_0_0 = jac_blending_1_1*tmp_qloop_22; + const real_t jac_blending_inv_0_1 = -jac_blending_0_1*tmp_qloop_22; + const real_t jac_blending_inv_1_0 = -jac_blending_1_0*tmp_qloop_22; + const real_t tmp_qloop_48 = tmp_qloop_46*(jac_affine_inv_0_0_BLUE*jac_blending_inv_0_0 + jac_affine_inv_0_1_BLUE*jac_blending_inv_1_0) + tmp_qloop_47*(jac_affine_inv_1_0_BLUE*jac_blending_inv_0_0 + jac_affine_inv_1_1_BLUE*jac_blending_inv_1_0); + const real_t tmp_qloop_56 = tmp_qloop_48*tmp_qloop_55; + const real_t tmp_qloop_60 = tmp_qloop_48*tmp_qloop_59; + const real_t tmp_qloop_62 = tmp_qloop_48*tmp_qloop_61; + const real_t jac_blending_inv_1_1 = jac_blending_0_0*tmp_qloop_22; + const real_t tmp_qloop_57 = tmp_qloop_46*(jac_affine_inv_0_0_BLUE*jac_blending_inv_0_1 + jac_affine_inv_0_1_BLUE*jac_blending_inv_1_1) + tmp_qloop_47*(jac_affine_inv_1_0_BLUE*jac_blending_inv_0_1 + jac_affine_inv_1_1_BLUE*jac_blending_inv_1_1); + const real_t tmp_qloop_58 = tmp_qloop_55*tmp_qloop_57; + const real_t tmp_qloop_63 = tmp_qloop_57*tmp_qloop_59; + const real_t tmp_qloop_64 = tmp_qloop_57*tmp_qloop_61; + const real_t hessian_blending_0_0_0 = tmp_qloop_14 - tmp_qloop_25 + tmp_qloop_28; + const real_t hessian_blending_1_0_0 = -tmp_qloop_26*(tmp_qloop_3*tmp_qloop_3*tmp_qloop_3) + tmp_qloop_29*tmp_qloop_4 + tmp_qloop_3*tmp_qloop_32 - tmp_qloop_31; + const real_t hessian_blending_0_0_1 = tmp_qloop_1*tmp_qloop_11*tmp_qloop_15*tmp_qloop_30*-2.0 - tmp_qloop_34; + const real_t hessian_blending_1_0_1 = tmp_qloop_1*tmp_qloop_10*tmp_qloop_6*tmp_qloop_9*1.0 - tmp_qloop_20 - tmp_qloop_28 - tmp_qloop_35; + const real_t hessian_blending_0_1_0 = tmp_qloop_10*tmp_qloop_15*tmp_qloop_2*tmp_qloop_8*tmp_qloop_9*1.0 - tmp_qloop_19 - tmp_qloop_31 - tmp_qloop_34; + const real_t hessian_blending_1_1_0 = -tmp_qloop_20 + tmp_qloop_27 - tmp_qloop_35 + tmp_qloop_36; + const real_t hessian_blending_0_1_1 = tmp_qloop_0*tmp_qloop_32 - (tmp_qloop_0*tmp_qloop_0*tmp_qloop_0)*tmp_qloop_26 + tmp_qloop_25 + tmp_qloop_36; + const real_t hessian_blending_1_1_1 = tmp_qloop_10*tmp_qloop_15*tmp_qloop_2*tmp_qloop_7*tmp_qloop_9*1.0 + tmp_qloop_10*tmp_qloop_15*tmp_qloop_4*tmp_qloop_8*tmp_qloop_9*1.0 - tmp_qloop_19 - tmp_qloop_33; + const real_t q_tmp_0_0 = tmp_qloop_43*tmp_qloop_56; + const real_t q_tmp_0_1 = tmp_qloop_49*tmp_qloop_56; + const real_t q_tmp_0_2 = tmp_qloop_50*tmp_qloop_56; + const real_t q_tmp_0_3 = tmp_qloop_38*tmp_qloop_56; + const real_t q_tmp_0_4 = tmp_qloop_51*tmp_qloop_56; + const real_t q_tmp_0_5 = tmp_qloop_53*tmp_qloop_56; + const real_t q_tmp_0_6 = tmp_qloop_43*tmp_qloop_58; + const real_t q_tmp_0_7 = tmp_qloop_49*tmp_qloop_58; + const real_t q_tmp_0_8 = tmp_qloop_50*tmp_qloop_58; + const real_t q_tmp_0_9 = tmp_qloop_38*tmp_qloop_58; + const real_t q_tmp_0_10 = tmp_qloop_51*tmp_qloop_58; + const real_t q_tmp_0_11 = tmp_qloop_53*tmp_qloop_58; + const real_t q_tmp_1_0 = tmp_qloop_43*tmp_qloop_60; + const real_t q_tmp_1_1 = tmp_qloop_49*tmp_qloop_60; + const real_t q_tmp_1_2 = tmp_qloop_50*tmp_qloop_60; + const real_t q_tmp_1_3 = tmp_qloop_52*tmp_qloop_62; + const real_t q_tmp_1_4 = tmp_qloop_51*tmp_qloop_60; + const real_t q_tmp_1_5 = tmp_qloop_53*tmp_qloop_60; + const real_t q_tmp_1_6 = tmp_qloop_43*tmp_qloop_63; + const real_t q_tmp_1_7 = tmp_qloop_49*tmp_qloop_63; + const real_t q_tmp_1_8 = tmp_qloop_50*tmp_qloop_63; + const real_t q_tmp_1_9 = tmp_qloop_52*tmp_qloop_64; + const real_t q_tmp_1_10 = tmp_qloop_51*tmp_qloop_63; + const real_t q_tmp_1_11 = tmp_qloop_53*tmp_qloop_63; + const real_t q_tmp_2_0 = tmp_qloop_43*tmp_qloop_62; + const real_t q_tmp_2_1 = tmp_qloop_49*tmp_qloop_62; + const real_t q_tmp_2_2 = tmp_qloop_50*tmp_qloop_62; + const real_t q_tmp_2_3 = tmp_qloop_48*tmp_qloop_65; + const real_t q_tmp_2_4 = tmp_qloop_51*tmp_qloop_62; + const real_t q_tmp_2_5 = tmp_qloop_53*tmp_qloop_62; + const real_t q_tmp_2_6 = tmp_qloop_43*tmp_qloop_64; + const real_t q_tmp_2_7 = tmp_qloop_49*tmp_qloop_64; + const real_t q_tmp_2_8 = tmp_qloop_50*tmp_qloop_64; + const real_t q_tmp_2_9 = tmp_qloop_57*tmp_qloop_65; + const real_t q_tmp_2_10 = tmp_qloop_51*tmp_qloop_64; + const real_t q_tmp_2_11 = tmp_qloop_53*tmp_qloop_64; + q_acc_0_0 = q_acc_0_0 + q_tmp_0_0; + q_acc_0_1 = q_acc_0_1 + q_tmp_0_1; + q_acc_0_2 = q_acc_0_2 + q_tmp_0_2; + q_acc_0_3 = q_acc_0_3 + q_tmp_0_3; + q_acc_0_4 = q_acc_0_4 + q_tmp_0_4; + q_acc_0_5 = q_acc_0_5 + q_tmp_0_5; + q_acc_0_6 = q_acc_0_6 + q_tmp_0_6; + q_acc_0_7 = q_acc_0_7 + q_tmp_0_7; + q_acc_0_8 = q_acc_0_8 + q_tmp_0_8; + q_acc_0_9 = q_acc_0_9 + q_tmp_0_9; + q_acc_0_10 = q_acc_0_10 + q_tmp_0_10; + q_acc_0_11 = q_acc_0_11 + q_tmp_0_11; + q_acc_1_0 = q_acc_1_0 + q_tmp_1_0; + q_acc_1_1 = q_acc_1_1 + q_tmp_1_1; + q_acc_1_2 = q_acc_1_2 + q_tmp_1_2; + q_acc_1_3 = q_acc_1_3 + q_tmp_1_3; + q_acc_1_4 = q_acc_1_4 + q_tmp_1_4; + q_acc_1_5 = q_acc_1_5 + q_tmp_1_5; + q_acc_1_6 = q_acc_1_6 + q_tmp_1_6; + q_acc_1_7 = q_acc_1_7 + q_tmp_1_7; + q_acc_1_8 = q_acc_1_8 + q_tmp_1_8; + q_acc_1_9 = q_acc_1_9 + q_tmp_1_9; + q_acc_1_10 = q_acc_1_10 + q_tmp_1_10; + q_acc_1_11 = q_acc_1_11 + q_tmp_1_11; + q_acc_2_0 = q_acc_2_0 + q_tmp_2_0; + q_acc_2_1 = q_acc_2_1 + q_tmp_2_1; + q_acc_2_2 = q_acc_2_2 + q_tmp_2_2; + q_acc_2_3 = q_acc_2_3 + q_tmp_2_3; + q_acc_2_4 = q_acc_2_4 + q_tmp_2_4; + q_acc_2_5 = q_acc_2_5 + q_tmp_2_5; + q_acc_2_6 = q_acc_2_6 + q_tmp_2_6; + q_acc_2_7 = q_acc_2_7 + q_tmp_2_7; + q_acc_2_8 = q_acc_2_8 + q_tmp_2_8; + q_acc_2_9 = q_acc_2_9 + q_tmp_2_9; + q_acc_2_10 = q_acc_2_10 + q_tmp_2_10; + q_acc_2_11 = q_acc_2_11 + q_tmp_2_11; + } + const real_t elMatVec_0 = q_acc_0_0*src_dof_0 + q_acc_0_1*src_dof_1 + q_acc_0_10*src_dof_10 + q_acc_0_11*src_dof_11 + q_acc_0_2*src_dof_2 + q_acc_0_3*src_dof_3 + q_acc_0_4*src_dof_4 + q_acc_0_5*src_dof_5 + q_acc_0_6*src_dof_6 + q_acc_0_7*src_dof_7 + q_acc_0_8*src_dof_8 + q_acc_0_9*src_dof_9; + const real_t elMatVec_1 = q_acc_1_0*src_dof_0 + q_acc_1_1*src_dof_1 + q_acc_1_10*src_dof_10 + q_acc_1_11*src_dof_11 + q_acc_1_2*src_dof_2 + q_acc_1_3*src_dof_3 + q_acc_1_4*src_dof_4 + q_acc_1_5*src_dof_5 + q_acc_1_6*src_dof_6 + q_acc_1_7*src_dof_7 + q_acc_1_8*src_dof_8 + q_acc_1_9*src_dof_9; + const real_t elMatVec_2 = q_acc_2_0*src_dof_0 + q_acc_2_1*src_dof_1 + q_acc_2_10*src_dof_10 + q_acc_2_11*src_dof_11 + q_acc_2_2*src_dof_2 + q_acc_2_3*src_dof_3 + q_acc_2_4*src_dof_4 + q_acc_2_5*src_dof_5 + q_acc_2_6*src_dof_6 + q_acc_2_7*src_dof_7 + q_acc_2_8*src_dof_8 + q_acc_2_9*src_dof_9; + _data_dst[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1] = elMatVec_0 + _data_dst[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + _data_dst[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatVec_1 + _data_dst[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + _data_dst[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1] = elMatVec_2 + _data_dst[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]; + } + } + } + } +} +} // namespace operatorgeneration + +} // namespace hyteg diff --git a/operators/grad_rho_by_rho_dot_u/avx/P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap_apply_P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap_macro_3D.cpp b/operators/grad_rho_by_rho_dot_u/avx/P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap_apply_P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap_macro_3D.cpp new file mode 100644 index 00000000..a5d573ed --- /dev/null +++ b/operators/grad_rho_by_rho_dot_u/avx/P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap_apply_P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap_macro_3D.cpp @@ -0,0 +1,7929 @@ +/* +* Copyright (c) 2017-2024 Nils Kohl, Daniel Bauer, Fabian Böhm. +* +* This file is part of HyTeG +* (see https://i10git.cs.fau.de/hyteg/hyteg). +* +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. +* +* You should have received a copy of the GNU General Public License +* along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +/* +* The entire file was generated with the HyTeG Operator Generator. +* +* Avoid modifying this file. If buggy, consider fixing the generator itself. +*/ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +#include "../P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap.hpp" + +#define FUNC_PREFIX + +namespace hyteg { + +namespace operatorgeneration { + +void P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap::apply_P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap_macro_3D( real_t * RESTRICT _data_dst, real_t * RESTRICT _data_rhoEdge, real_t * RESTRICT _data_rhoVertex, real_t * RESTRICT _data_src_edge_0, real_t * RESTRICT _data_src_edge_1, real_t * RESTRICT _data_src_edge_2, real_t * RESTRICT _data_src_vertex_0, real_t * RESTRICT _data_src_vertex_1, real_t * RESTRICT _data_src_vertex_2, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const +{ + { + const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983}; + + const real_t _data_q_p_0 [] = {0.25, 0.16666666666666666, 0.16666666666666666, 0.5, 0.16666666666666666}; + + const real_t _data_q_p_1 [] = {0.25, 0.16666666666666666, 0.5, 0.16666666666666666, 0.16666666666666666}; + + const real_t _data_q_p_2 [] = {0.25, 0.5, 0.16666666666666666, 0.16666666666666666, 0.16666666666666666}; + + const real_t tmp_coords_jac_0_WHITE_UP = 1.0 / (micro_edges_per_macro_edge_float)*1.0; + const real_t p_affine_const_0_0_WHITE_UP = macro_vertex_coord_id_0comp0; + const real_t p_affine_const_0_1_WHITE_UP = macro_vertex_coord_id_0comp1; + const real_t p_affine_const_0_2_WHITE_UP = macro_vertex_coord_id_0comp2; + const real_t p_affine_const_1_0_WHITE_UP = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0); + const real_t p_affine_const_1_1_WHITE_UP = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1); + const real_t p_affine_const_1_2_WHITE_UP = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2); + const real_t p_affine_const_2_0_WHITE_UP = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0); + const real_t p_affine_const_2_1_WHITE_UP = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1); + const real_t p_affine_const_2_2_WHITE_UP = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2); + const real_t p_affine_const_3_0_WHITE_UP = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0); + const real_t p_affine_const_3_1_WHITE_UP = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1); + const real_t p_affine_const_3_2_WHITE_UP = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2); + const real_t jac_affine_0_0_WHITE_UP = -p_affine_const_0_0_WHITE_UP + p_affine_const_1_0_WHITE_UP; + const real_t jac_affine_0_1_WHITE_UP = -p_affine_const_0_0_WHITE_UP + p_affine_const_2_0_WHITE_UP; + const real_t jac_affine_0_2_WHITE_UP = -p_affine_const_0_0_WHITE_UP + p_affine_const_3_0_WHITE_UP; + const real_t jac_affine_1_0_WHITE_UP = -p_affine_const_0_1_WHITE_UP + p_affine_const_1_1_WHITE_UP; + const real_t jac_affine_1_1_WHITE_UP = -p_affine_const_0_1_WHITE_UP + p_affine_const_2_1_WHITE_UP; + const real_t tmp_coords_jac_5_WHITE_UP = jac_affine_0_2_WHITE_UP*jac_affine_1_1_WHITE_UP; + const real_t jac_affine_1_2_WHITE_UP = -p_affine_const_0_1_WHITE_UP + p_affine_const_3_1_WHITE_UP; + const real_t tmp_coords_jac_3_WHITE_UP = jac_affine_0_1_WHITE_UP*jac_affine_1_2_WHITE_UP; + const real_t jac_affine_2_0_WHITE_UP = -p_affine_const_0_2_WHITE_UP + p_affine_const_1_2_WHITE_UP; + const real_t jac_affine_2_1_WHITE_UP = -p_affine_const_0_2_WHITE_UP + p_affine_const_2_2_WHITE_UP; + const real_t tmp_coords_jac_2_WHITE_UP = jac_affine_1_2_WHITE_UP*jac_affine_2_1_WHITE_UP; + const real_t jac_affine_2_2_WHITE_UP = -p_affine_const_0_2_WHITE_UP + p_affine_const_3_2_WHITE_UP; + const real_t tmp_coords_jac_1_WHITE_UP = jac_affine_1_1_WHITE_UP*jac_affine_2_2_WHITE_UP; + const real_t tmp_coords_jac_4_WHITE_UP = jac_affine_0_1_WHITE_UP*jac_affine_2_2_WHITE_UP; + const real_t tmp_coords_jac_6_WHITE_UP = jac_affine_0_0_WHITE_UP*tmp_coords_jac_1_WHITE_UP - jac_affine_0_0_WHITE_UP*tmp_coords_jac_2_WHITE_UP + jac_affine_0_2_WHITE_UP*jac_affine_1_0_WHITE_UP*jac_affine_2_1_WHITE_UP - jac_affine_1_0_WHITE_UP*tmp_coords_jac_4_WHITE_UP + jac_affine_2_0_WHITE_UP*tmp_coords_jac_3_WHITE_UP - jac_affine_2_0_WHITE_UP*tmp_coords_jac_5_WHITE_UP; + const real_t tmp_coords_jac_7_WHITE_UP = 1.0 / (tmp_coords_jac_6_WHITE_UP); + const real_t jac_affine_inv_0_0_WHITE_UP = tmp_coords_jac_7_WHITE_UP*(tmp_coords_jac_1_WHITE_UP - tmp_coords_jac_2_WHITE_UP); + const real_t jac_affine_inv_0_1_WHITE_UP = tmp_coords_jac_7_WHITE_UP*(jac_affine_0_2_WHITE_UP*jac_affine_2_1_WHITE_UP - tmp_coords_jac_4_WHITE_UP); + const real_t jac_affine_inv_0_2_WHITE_UP = tmp_coords_jac_7_WHITE_UP*(tmp_coords_jac_3_WHITE_UP - tmp_coords_jac_5_WHITE_UP); + const real_t jac_affine_inv_1_0_WHITE_UP = tmp_coords_jac_7_WHITE_UP*(-jac_affine_1_0_WHITE_UP*jac_affine_2_2_WHITE_UP + jac_affine_1_2_WHITE_UP*jac_affine_2_0_WHITE_UP); + const real_t jac_affine_inv_1_1_WHITE_UP = tmp_coords_jac_7_WHITE_UP*(jac_affine_0_0_WHITE_UP*jac_affine_2_2_WHITE_UP - jac_affine_0_2_WHITE_UP*jac_affine_2_0_WHITE_UP); + const real_t jac_affine_inv_1_2_WHITE_UP = tmp_coords_jac_7_WHITE_UP*(-jac_affine_0_0_WHITE_UP*jac_affine_1_2_WHITE_UP + jac_affine_0_2_WHITE_UP*jac_affine_1_0_WHITE_UP); + const real_t jac_affine_inv_2_0_WHITE_UP = tmp_coords_jac_7_WHITE_UP*(jac_affine_1_0_WHITE_UP*jac_affine_2_1_WHITE_UP - jac_affine_1_1_WHITE_UP*jac_affine_2_0_WHITE_UP); + const real_t jac_affine_inv_2_1_WHITE_UP = tmp_coords_jac_7_WHITE_UP*(-jac_affine_0_0_WHITE_UP*jac_affine_2_1_WHITE_UP + jac_affine_0_1_WHITE_UP*jac_affine_2_0_WHITE_UP); + const real_t jac_affine_inv_2_2_WHITE_UP = tmp_coords_jac_7_WHITE_UP*(jac_affine_0_0_WHITE_UP*jac_affine_1_1_WHITE_UP - jac_affine_0_1_WHITE_UP*jac_affine_1_0_WHITE_UP); + const real_t abs_det_jac_affine_WHITE_UP = abs(tmp_coords_jac_6_WHITE_UP); + const real_t tmp_qloop_0 = rayVertex_1 - refVertex_1; + const real_t tmp_qloop_1 = -rayVertex_0; + const real_t tmp_qloop_2 = -forVertex_0 - tmp_qloop_1; + const real_t tmp_qloop_3 = rayVertex_2 - thrVertex_2; + const real_t tmp_qloop_4 = tmp_qloop_2*tmp_qloop_3; + const real_t tmp_qloop_5 = rayVertex_2 - refVertex_2; + const real_t tmp_qloop_6 = rayVertex_1 - thrVertex_1; + const real_t tmp_qloop_7 = tmp_qloop_2*tmp_qloop_6; + const real_t tmp_qloop_8 = -rayVertex_1; + const real_t tmp_qloop_9 = -forVertex_1 - tmp_qloop_8; + const real_t tmp_qloop_10 = rayVertex_0 - thrVertex_0; + const real_t tmp_qloop_11 = rayVertex_0 - refVertex_0; + const real_t tmp_qloop_12 = -rayVertex_2; + const real_t tmp_qloop_13 = -forVertex_2 - tmp_qloop_12; + const real_t tmp_qloop_14 = tmp_qloop_3*tmp_qloop_9; + const real_t tmp_qloop_15 = tmp_qloop_10*tmp_qloop_13; + const real_t tmp_qloop_16 = -tmp_qloop_0*tmp_qloop_15 + tmp_qloop_0*tmp_qloop_4 + tmp_qloop_10*tmp_qloop_5*tmp_qloop_9 + tmp_qloop_11*tmp_qloop_13*tmp_qloop_6 - tmp_qloop_11*tmp_qloop_14 - tmp_qloop_5*tmp_qloop_7; + const real_t tmp_qloop_17 = 1.0 / (tmp_qloop_16); + const real_t tmp_qloop_20 = radRayVertex*tmp_qloop_16; + const real_t tmp_qloop_21 = radRayVertex - radRefVertex; + const real_t tmp_qloop_32 = -tmp_qloop_13*tmp_qloop_6 + tmp_qloop_14; + const real_t tmp_qloop_33 = -tmp_qloop_21*tmp_qloop_32; + const real_t tmp_qloop_39 = tmp_qloop_10*tmp_qloop_13 - tmp_qloop_4; + const real_t tmp_qloop_40 = tmp_qloop_21*tmp_qloop_39; + const real_t tmp_qloop_45 = -tmp_qloop_10*tmp_qloop_9 + tmp_qloop_7; + const real_t tmp_qloop_46 = -tmp_qloop_21*tmp_qloop_45; + const real_t tmp_qloop_53 = -tmp_qloop_21*tmp_qloop_39; + const real_t tmp_qloop_68 = tmp_qloop_21*tmp_qloop_32; + const real_t tmp_qloop_72 = tmp_qloop_21*tmp_qloop_45; + { + /* CellType.WHITE_UP */ + for (int64_t ctr_2 = 0; ctr_2 < micro_edges_per_macro_edge; ctr_2 += 1) + for (int64_t ctr_1 = 0; ctr_1 < -ctr_2 + micro_edges_per_macro_edge; ctr_1 += 1) + { + for (int64_t ctr_0 = 0; ctr_0 < (int64_t)((-ctr_1 - ctr_2 + micro_edges_per_macro_edge) / (4)) * (4); ctr_0 += 4) + { + + const int64_t phantom_ctr_0 = ctr_0; + real_t _data_float_loop_ctr_array_dim_0[4]; + _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0; + _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1; + _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2; + _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3; + real_t _data_float_loop_ctr_array_dim_1[4]; + _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1; + real_t _data_float_loop_ctr_array_dim_2[4]; + _data_float_loop_ctr_array_dim_2[0] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[1] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[2] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[3] = (real_t) ctr_2; + + const __m256d p_affine_0_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)); + const __m256d p_affine_0_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)); + const __m256d p_affine_0_2 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)); + const __m256d p_affine_1_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)); + const __m256d p_affine_1_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)); + const __m256d p_affine_1_2 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)); + const __m256d p_affine_2_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)); + const __m256d p_affine_2_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)); + const __m256d p_affine_2_2 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)); + const __m256d p_affine_3_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)); + const __m256d p_affine_3_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)); + const __m256d p_affine_3_2 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)); + const __m256d src_dof_0 = _mm256_loadu_pd(& _data_src_vertex_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]); + const __m256d src_dof_1 = _mm256_loadu_pd(& _data_src_vertex_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]); + const __m256d src_dof_2 = _mm256_loadu_pd(& _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]); + const __m256d src_dof_3 = _mm256_loadu_pd(& _data_src_vertex_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]); + const __m256d src_dof_4 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d src_dof_5 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 5*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d src_dof_6 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d src_dof_7 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d src_dof_8 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d src_dof_9 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d src_dof_10 = _mm256_loadu_pd(& _data_src_vertex_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]); + const __m256d src_dof_11 = _mm256_loadu_pd(& _data_src_vertex_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]); + const __m256d src_dof_12 = _mm256_loadu_pd(& _data_src_vertex_1[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]); + const __m256d src_dof_13 = _mm256_loadu_pd(& _data_src_vertex_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]); + const __m256d src_dof_14 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d src_dof_15 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 5*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d src_dof_16 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d src_dof_17 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d src_dof_18 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d src_dof_19 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d src_dof_20 = _mm256_loadu_pd(& _data_src_vertex_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]); + const __m256d src_dof_21 = _mm256_loadu_pd(& _data_src_vertex_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]); + const __m256d src_dof_22 = _mm256_loadu_pd(& _data_src_vertex_2[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]); + const __m256d src_dof_23 = _mm256_loadu_pd(& _data_src_vertex_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]); + const __m256d src_dof_24 = _mm256_loadu_pd(& _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d src_dof_25 = _mm256_loadu_pd(& _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 5*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d src_dof_26 = _mm256_loadu_pd(& _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d src_dof_27 = _mm256_loadu_pd(& _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d src_dof_28 = _mm256_loadu_pd(& _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d src_dof_29 = _mm256_loadu_pd(& _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d rho_dof_0 = _mm256_loadu_pd(& _data_rhoVertex[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]); + const __m256d rho_dof_1 = _mm256_loadu_pd(& _data_rhoVertex[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]); + const __m256d rho_dof_2 = _mm256_loadu_pd(& _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]); + const __m256d rho_dof_3 = _mm256_loadu_pd(& _data_rhoVertex[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]); + const __m256d rho_dof_4 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d rho_dof_5 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 5*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d rho_dof_6 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d rho_dof_7 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d rho_dof_8 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d rho_dof_9 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + __m256d q_acc_0_0 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_1 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_2 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_3 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_4 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_5 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_6 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_7 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_8 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_9 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_10 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_11 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_12 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_13 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_14 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_15 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_16 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_17 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_18 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_19 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_20 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_21 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_22 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_23 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_24 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_25 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_26 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_27 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_28 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_29 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_0 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_1 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_2 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_3 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_4 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_5 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_6 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_7 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_8 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_9 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_10 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_11 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_12 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_13 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_14 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_15 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_16 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_17 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_18 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_19 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_20 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_21 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_22 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_23 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_24 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_25 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_26 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_27 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_28 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_29 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_0 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_1 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_2 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_3 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_4 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_5 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_6 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_7 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_8 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_9 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_10 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_11 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_12 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_13 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_14 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_15 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_16 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_17 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_18 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_19 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_20 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_21 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_22 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_23 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_24 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_25 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_26 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_27 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_28 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_29 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_0 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_1 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_2 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_3 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_4 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_5 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_6 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_7 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_8 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_9 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_10 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_11 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_12 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_13 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_14 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_15 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_16 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_17 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_18 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_19 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_20 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_21 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_22 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_23 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_24 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_25 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_26 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_27 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_28 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_29 = _mm256_set_pd(0.0,0.0,0.0,0.0); + for (int64_t q = 0; q < 5; q += 1) + { + const __m256d tmp_qloop_18 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_1_0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_2_0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_3_0),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]))),p_affine_0_0); + const __m256d tmp_qloop_19 = _mm256_mul_pd(tmp_qloop_18,tmp_qloop_18); + const __m256d tmp_qloop_22 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_2,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_1_2),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_2,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_2_2),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_2,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_3_2),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]))),p_affine_0_2); + const __m256d tmp_qloop_23 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_qloop_12,tmp_qloop_12,tmp_qloop_12,tmp_qloop_12)),_mm256_mul_pd(tmp_qloop_22,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))); + const __m256d tmp_qloop_24 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_qloop_1,tmp_qloop_1,tmp_qloop_1,tmp_qloop_1)),_mm256_mul_pd(tmp_qloop_18,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))); + const __m256d tmp_qloop_25 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_1_1),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_2_1),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_3_1),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]))),p_affine_0_1); + const __m256d tmp_qloop_26 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_qloop_8,tmp_qloop_8,tmp_qloop_8,tmp_qloop_8)),_mm256_mul_pd(tmp_qloop_25,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))); + const __m256d tmp_qloop_27 = _mm256_mul_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_24,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_14,tmp_qloop_14,tmp_qloop_14,tmp_qloop_14)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_26,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_15,tmp_qloop_15,tmp_qloop_15,tmp_qloop_15))),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_23,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_7,tmp_qloop_7,tmp_qloop_7,tmp_qloop_7))),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_23,_mm256_set_pd(tmp_qloop_10,tmp_qloop_10,tmp_qloop_10,tmp_qloop_10)),_mm256_set_pd(tmp_qloop_9,tmp_qloop_9,tmp_qloop_9,tmp_qloop_9))),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_24,_mm256_set_pd(tmp_qloop_13,tmp_qloop_13,tmp_qloop_13,tmp_qloop_13)),_mm256_set_pd(tmp_qloop_6,tmp_qloop_6,tmp_qloop_6,tmp_qloop_6))),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_26,_mm256_set_pd(tmp_qloop_2,tmp_qloop_2,tmp_qloop_2,tmp_qloop_2)),_mm256_set_pd(tmp_qloop_3,tmp_qloop_3,tmp_qloop_3,tmp_qloop_3))),_mm256_set_pd(tmp_qloop_21,tmp_qloop_21,tmp_qloop_21,tmp_qloop_21)); + const __m256d tmp_qloop_28 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_27,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_20,tmp_qloop_20,tmp_qloop_20,tmp_qloop_20)); + const __m256d tmp_qloop_29 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_25); + const __m256d tmp_qloop_30 = _mm256_mul_pd(tmp_qloop_22,tmp_qloop_22); + const __m256d tmp_qloop_31 = _mm256_add_pd(_mm256_add_pd(tmp_qloop_19,tmp_qloop_29),tmp_qloop_30); + const __m256d tmp_qloop_34 = _mm256_mul_pd(tmp_qloop_18,_mm256_set_pd(tmp_qloop_33,tmp_qloop_33,tmp_qloop_33,tmp_qloop_33)); + const __m256d tmp_qloop_35 = _mm256_add_pd(tmp_qloop_28,tmp_qloop_34); + const __m256d tmp_qloop_36 = _mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_31,tmp_qloop_35),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_19,tmp_qloop_28),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17)); + const __m256d tmp_qloop_37 = _mm256_div_pd(_mm256_sqrt_pd(tmp_qloop_31),_mm256_mul_pd(tmp_qloop_31,tmp_qloop_31)); + const __m256d tmp_qloop_38 = _mm256_mul_pd(tmp_qloop_37,_mm256_set_pd(1.0,1.0,1.0,1.0)); + const __m256d tmp_qloop_41 = _mm256_mul_pd(tmp_qloop_31,_mm256_set_pd(tmp_qloop_40,tmp_qloop_40,tmp_qloop_40,tmp_qloop_40)); + const __m256d tmp_qloop_42 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_41,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_25,tmp_qloop_28),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))); + const __m256d tmp_qloop_43 = _mm256_mul_pd(tmp_qloop_38,_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17)); + const __m256d tmp_qloop_44 = _mm256_mul_pd(tmp_qloop_42,tmp_qloop_43); + const __m256d tmp_qloop_47 = _mm256_mul_pd(tmp_qloop_28,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)); + const __m256d tmp_qloop_48 = _mm256_mul_pd(tmp_qloop_22,tmp_qloop_47); + const __m256d tmp_qloop_49 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_31,_mm256_set_pd(tmp_qloop_46,tmp_qloop_46,tmp_qloop_46,tmp_qloop_46)),tmp_qloop_48); + const __m256d tmp_qloop_50 = _mm256_mul_pd(tmp_qloop_43,tmp_qloop_49); + const __m256d tmp_qloop_51 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_31,_mm256_set_pd(tmp_qloop_33,tmp_qloop_33,tmp_qloop_33,tmp_qloop_33)),_mm256_mul_pd(tmp_qloop_18,tmp_qloop_47)); + const __m256d tmp_qloop_52 = _mm256_mul_pd(tmp_qloop_43,tmp_qloop_51); + const __m256d tmp_qloop_54 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_25,_mm256_set_pd(tmp_qloop_53,tmp_qloop_53,tmp_qloop_53,tmp_qloop_53)),tmp_qloop_28); + const __m256d tmp_qloop_55 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_29,tmp_qloop_47),_mm256_mul_pd(tmp_qloop_31,tmp_qloop_54)); + const __m256d tmp_qloop_56 = _mm256_mul_pd(tmp_qloop_22,_mm256_set_pd(tmp_qloop_46,tmp_qloop_46,tmp_qloop_46,tmp_qloop_46)); + const __m256d tmp_qloop_57 = _mm256_add_pd(tmp_qloop_28,tmp_qloop_56); + const __m256d tmp_qloop_58 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_30,tmp_qloop_47),_mm256_mul_pd(tmp_qloop_31,tmp_qloop_57)); + const __m256d tmp_qloop_66 = _mm256_mul_pd(_mm256_div_pd(_mm256_sqrt_pd(tmp_qloop_31),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_31,tmp_qloop_31),tmp_qloop_31)),_mm256_set_pd(3.0,3.0,3.0,3.0)); + const __m256d tmp_qloop_67 = _mm256_mul_pd(tmp_qloop_36,tmp_qloop_66); + const __m256d tmp_qloop_69 = _mm256_mul_pd(tmp_qloop_18,_mm256_set_pd(2.0,2.0,2.0,2.0)); + const __m256d tmp_qloop_70 = _mm256_mul_pd(tmp_qloop_41,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)); + const __m256d tmp_qloop_71 = _mm256_mul_pd(tmp_qloop_35,_mm256_set_pd(2.0,2.0,2.0,2.0)); + const __m256d tmp_qloop_73 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_31,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_72,tmp_qloop_72,tmp_qloop_72,tmp_qloop_72)); + const __m256d tmp_qloop_74 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_qloop_20,tmp_qloop_20,tmp_qloop_20,tmp_qloop_20)),tmp_qloop_27); + const __m256d tmp_qloop_75 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_34,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_mul_pd(tmp_qloop_18,_mm256_set_pd(tmp_qloop_68,tmp_qloop_68,tmp_qloop_68,tmp_qloop_68))),tmp_qloop_74); + const __m256d tmp_qloop_76 = _mm256_mul_pd(tmp_qloop_66,_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17)); + const __m256d tmp_qloop_77 = _mm256_mul_pd(tmp_qloop_51,tmp_qloop_76); + const __m256d tmp_qloop_78 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_77); + const __m256d tmp_qloop_79 = _mm256_mul_pd(tmp_qloop_25,_mm256_set_pd(2.0,2.0,2.0,2.0)); + const __m256d tmp_qloop_80 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_79,_mm256_set_pd(tmp_qloop_33,tmp_qloop_33,tmp_qloop_33,tmp_qloop_33)),_mm256_mul_pd(tmp_qloop_18,_mm256_set_pd(tmp_qloop_40,tmp_qloop_40,tmp_qloop_40,tmp_qloop_40))); + const __m256d tmp_qloop_81 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_43); + const __m256d tmp_qloop_82 = _mm256_mul_pd(tmp_qloop_22,_mm256_set_pd(2.0,2.0,2.0,2.0)); + const __m256d tmp_qloop_83 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_82,_mm256_set_pd(tmp_qloop_33,tmp_qloop_33,tmp_qloop_33,tmp_qloop_33)),_mm256_mul_pd(tmp_qloop_18,_mm256_set_pd(tmp_qloop_72,tmp_qloop_72,tmp_qloop_72,tmp_qloop_72))); + const __m256d tmp_qloop_84 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_22,tmp_qloop_78),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)); + const __m256d tmp_qloop_85 = _mm256_mul_pd(tmp_qloop_18,tmp_qloop_22); + const __m256d tmp_qloop_86 = _mm256_mul_pd(tmp_qloop_22,tmp_qloop_43); + const __m256d tmp_qloop_87 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_69,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_40,tmp_qloop_40,tmp_qloop_40,tmp_qloop_40)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_25,_mm256_set_pd(tmp_qloop_21,tmp_qloop_21,tmp_qloop_21,tmp_qloop_21)),_mm256_set_pd(tmp_qloop_32,tmp_qloop_32,tmp_qloop_32,tmp_qloop_32))); + const __m256d tmp_qloop_88 = _mm256_mul_pd(tmp_qloop_18,tmp_qloop_43); + const __m256d tmp_qloop_89 = _mm256_mul_pd(tmp_qloop_42,tmp_qloop_76); + const __m256d tmp_qloop_90 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_28,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_25,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_40,tmp_qloop_40,tmp_qloop_40,tmp_qloop_40))); + const __m256d tmp_qloop_91 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_89); + const __m256d tmp_qloop_92 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_25,_mm256_set_pd(tmp_qloop_72,tmp_qloop_72,tmp_qloop_72,tmp_qloop_72)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_82,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_40,tmp_qloop_40,tmp_qloop_40,tmp_qloop_40))); + const __m256d tmp_qloop_93 = _mm256_mul_pd(tmp_qloop_85,tmp_qloop_89); + const __m256d tmp_qloop_94 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_31,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_68,tmp_qloop_68,tmp_qloop_68,tmp_qloop_68)); + const __m256d tmp_qloop_95 = _mm256_mul_pd(tmp_qloop_55,tmp_qloop_76); + const __m256d tmp_qloop_96 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_69,_mm256_set_pd(tmp_qloop_46,tmp_qloop_46,tmp_qloop_46,tmp_qloop_46)),_mm256_mul_pd(tmp_qloop_22,_mm256_set_pd(tmp_qloop_68,tmp_qloop_68,tmp_qloop_68,tmp_qloop_68))); + const __m256d tmp_qloop_97 = _mm256_mul_pd(tmp_qloop_49,tmp_qloop_76); + const __m256d tmp_qloop_98 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_22,_mm256_set_pd(tmp_qloop_40,tmp_qloop_40,tmp_qloop_40,tmp_qloop_40)),_mm256_mul_pd(tmp_qloop_79,_mm256_set_pd(tmp_qloop_46,tmp_qloop_46,tmp_qloop_46,tmp_qloop_46))); + const __m256d tmp_qloop_99 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_97); + const __m256d tmp_qloop_100 = _mm256_mul_pd(tmp_qloop_18,tmp_qloop_99); + const __m256d tmp_qloop_101 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_56,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_mul_pd(tmp_qloop_22,_mm256_set_pd(tmp_qloop_72,tmp_qloop_72,tmp_qloop_72,tmp_qloop_72))),tmp_qloop_74); + const __m256d tmp_qloop_102 = _mm256_mul_pd(tmp_qloop_58,tmp_qloop_76); + const __m256d tmp_qloop_103 = _mm256_mul_pd(_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])); + const __m256d tmp_qloop_104 = _mm256_mul_pd(tmp_qloop_103,_mm256_set_pd(2.0,2.0,2.0,2.0)); + const __m256d tmp_qloop_105 = _mm256_mul_pd(_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])); + const __m256d tmp_qloop_106 = _mm256_mul_pd(tmp_qloop_105,_mm256_set_pd(2.0,2.0,2.0,2.0)); + const __m256d tmp_qloop_107 = _mm256_mul_pd(_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q])); + const __m256d tmp_qloop_108 = _mm256_mul_pd(tmp_qloop_107,_mm256_set_pd(2.0,2.0,2.0,2.0)); + const __m256d tmp_qloop_109 = _mm256_mul_pd(_mm256_set_pd(4.0,4.0,4.0,4.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])); + const __m256d tmp_qloop_110 = _mm256_mul_pd(tmp_qloop_109,_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q])); + const __m256d tmp_qloop_111 = _mm256_mul_pd(_mm256_set_pd(4.0,4.0,4.0,4.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])); + const __m256d tmp_qloop_112 = _mm256_mul_pd(tmp_qloop_111,_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])); + const __m256d tmp_qloop_113 = _mm256_mul_pd(tmp_qloop_111,_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q])); + const __m256d tmp_qloop_114 = _mm256_add_pd(tmp_qloop_112,tmp_qloop_113); + const __m256d tmp_qloop_115 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),_mm256_mul_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),tmp_qloop_104),tmp_qloop_106),tmp_qloop_108),tmp_qloop_110),tmp_qloop_114); + const __m256d tmp_qloop_116 = _mm256_mul_pd(_mm256_set_pd(4.0,4.0,4.0,4.0),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q])); + const __m256d tmp_qloop_117 = _mm256_add_pd(tmp_qloop_109,tmp_qloop_116); + const __m256d tmp_qloop_118 = _mm256_mul_pd(_mm256_mul_pd(rho_dof_8,tmp_qloop_109),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)); + const __m256d tmp_qloop_119 = _mm256_mul_pd(rho_dof_0,_mm256_add_pd(_mm256_add_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),tmp_qloop_111),tmp_qloop_117)); + const __m256d tmp_qloop_120 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(rho_dof_7,tmp_qloop_116),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),tmp_qloop_119); + const __m256d tmp_qloop_121 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(rho_dof_1,_mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),tmp_qloop_111)),_mm256_mul_pd(rho_dof_9,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_117,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_set_pd(-8.0,-8.0,-8.0,-8.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]))),_mm256_set_pd(4.0,4.0,4.0,4.0)))),_mm256_mul_pd(rho_dof_5,tmp_qloop_116)),_mm256_mul_pd(rho_dof_6,tmp_qloop_109)),tmp_qloop_118),tmp_qloop_120); + const __m256d tmp_qloop_122 = _mm256_add_pd(_mm256_set_pd(-4.0,-4.0,-4.0,-4.0),tmp_qloop_111); + const __m256d tmp_qloop_123 = _mm256_mul_pd(_mm256_mul_pd(rho_dof_9,tmp_qloop_111),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)); + const __m256d tmp_qloop_124 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(rho_dof_2,_mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),tmp_qloop_109)),_mm256_mul_pd(rho_dof_8,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_116,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_122,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(_mm256_set_pd(-8.0,-8.0,-8.0,-8.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))))),_mm256_mul_pd(rho_dof_4,tmp_qloop_116)),_mm256_mul_pd(rho_dof_6,tmp_qloop_111)),tmp_qloop_120),tmp_qloop_123); + const __m256d tmp_qloop_125 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(rho_dof_3,_mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),tmp_qloop_116)),_mm256_mul_pd(rho_dof_7,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_109,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_122,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(_mm256_set_pd(-8.0,-8.0,-8.0,-8.0),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]))))),_mm256_mul_pd(rho_dof_4,tmp_qloop_109)),_mm256_mul_pd(rho_dof_5,tmp_qloop_111)),tmp_qloop_118),tmp_qloop_119),tmp_qloop_123); + const __m256d tmp_qloop_127 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),tmp_qloop_104); + const __m256d tmp_qloop_128 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])),tmp_qloop_106); + const __m256d tmp_qloop_129 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q])),tmp_qloop_108); + const __m256d tmp_qloop_130 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_110,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_113,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(tmp_qloop_107,_mm256_set_pd(-4.0,-4.0,-4.0,-4.0))),tmp_qloop_116); + const __m256d tmp_qloop_131 = _mm256_mul_pd(tmp_qloop_105,_mm256_set_pd(4.0,4.0,4.0,4.0)); + const __m256d tmp_qloop_132 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_110,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_112,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(tmp_qloop_131,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_109); + const __m256d tmp_qloop_133 = _mm256_mul_pd(tmp_qloop_103,_mm256_set_pd(4.0,4.0,4.0,4.0)); + const __m256d tmp_qloop_134 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_114,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_133,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_111); + const __m256d tmp_qloop_158 = _mm256_mul_pd(tmp_qloop_105,tmp_qloop_111); + const __m256d jac_blending_0_0 = _mm256_mul_pd(tmp_qloop_36,tmp_qloop_38); + const __m256d jac_blending_0_1 = _mm256_mul_pd(tmp_qloop_18,tmp_qloop_44); + const __m256d jac_blending_0_2 = _mm256_mul_pd(tmp_qloop_18,tmp_qloop_50); + const __m256d jac_blending_1_0 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_52); + const __m256d jac_blending_1_1 = _mm256_mul_pd(tmp_qloop_43,tmp_qloop_55); + const __m256d tmp_qloop_63 = _mm256_mul_pd(jac_blending_0_2,jac_blending_1_1); + const __m256d jac_blending_1_2 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_50); + const __m256d tmp_qloop_60 = _mm256_mul_pd(jac_blending_0_1,jac_blending_1_2); + const __m256d jac_blending_2_0 = _mm256_mul_pd(tmp_qloop_22,tmp_qloop_52); + const __m256d jac_blending_2_1 = _mm256_mul_pd(tmp_qloop_22,tmp_qloop_44); + const __m256d tmp_qloop_61 = _mm256_mul_pd(jac_blending_1_2,jac_blending_2_1); + const __m256d jac_blending_2_2 = _mm256_mul_pd(tmp_qloop_43,tmp_qloop_58); + const __m256d tmp_qloop_59 = _mm256_mul_pd(jac_blending_1_1,jac_blending_2_2); + const __m256d tmp_qloop_62 = _mm256_mul_pd(jac_blending_0_1,jac_blending_2_2); + const __m256d tmp_qloop_64 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(jac_blending_0_0,tmp_qloop_59),_mm256_mul_pd(jac_blending_2_0,tmp_qloop_60)),_mm256_mul_pd(_mm256_mul_pd(jac_blending_0_0,tmp_qloop_61),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(_mm256_mul_pd(jac_blending_1_0,tmp_qloop_62),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(_mm256_mul_pd(jac_blending_2_0,tmp_qloop_63),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(_mm256_mul_pd(jac_blending_0_2,jac_blending_1_0),jac_blending_2_1)); + const __m256d tmp_qloop_65 = _mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),tmp_qloop_64); + const __m256d abs_det_jac_blending = tmp_qloop_64; + const __m256d tmp_qloop_135 = _mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(abs_det_jac_blending,_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(rho_dof_0,tmp_qloop_115),_mm256_mul_pd(rho_dof_1,tmp_qloop_127)),_mm256_mul_pd(rho_dof_2,tmp_qloop_128)),_mm256_mul_pd(rho_dof_3,tmp_qloop_129)),_mm256_mul_pd(rho_dof_4,tmp_qloop_110)),_mm256_mul_pd(rho_dof_5,tmp_qloop_113)),_mm256_mul_pd(rho_dof_6,tmp_qloop_112)),_mm256_mul_pd(rho_dof_7,tmp_qloop_130)),_mm256_mul_pd(rho_dof_8,tmp_qloop_132)),_mm256_mul_pd(rho_dof_9,tmp_qloop_134)))),_mm256_set_pd(_data_q_w[q],_data_q_w[q],_data_q_w[q],_data_q_w[q])),_mm256_set_pd(abs_det_jac_affine_WHITE_UP,abs_det_jac_affine_WHITE_UP,abs_det_jac_affine_WHITE_UP,abs_det_jac_affine_WHITE_UP)); + const __m256d tmp_qloop_136 = _mm256_mul_pd(tmp_qloop_135,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]))),_mm256_set_pd(1.0,1.0,1.0,1.0))); + const __m256d tmp_qloop_142 = _mm256_mul_pd(tmp_qloop_135,_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])); + const __m256d tmp_qloop_144 = _mm256_mul_pd(tmp_qloop_135,_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q])); + const __m256d tmp_qloop_147 = _mm256_mul_pd(tmp_qloop_135,_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])); + const __m256d tmp_qloop_159 = _mm256_mul_pd(tmp_qloop_135,tmp_qloop_158); + const __m256d tmp_qloop_161 = _mm256_mul_pd(tmp_qloop_107,tmp_qloop_135); + const __m256d jac_blending_inv_0_0 = _mm256_mul_pd(tmp_qloop_65,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_61,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),tmp_qloop_59)); + const __m256d jac_blending_inv_0_1 = _mm256_mul_pd(tmp_qloop_65,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_62,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(jac_blending_0_2,jac_blending_2_1))); + const __m256d jac_blending_inv_0_2 = _mm256_mul_pd(tmp_qloop_65,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_63,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),tmp_qloop_60)); + const __m256d jac_blending_inv_1_0 = _mm256_mul_pd(tmp_qloop_65,_mm256_add_pd(_mm256_mul_pd(jac_blending_1_2,jac_blending_2_0),_mm256_mul_pd(_mm256_mul_pd(jac_blending_1_0,jac_blending_2_2),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))); + const __m256d jac_blending_inv_1_1 = _mm256_mul_pd(tmp_qloop_65,_mm256_add_pd(_mm256_mul_pd(jac_blending_0_0,jac_blending_2_2),_mm256_mul_pd(_mm256_mul_pd(jac_blending_0_2,jac_blending_2_0),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))); + const __m256d jac_blending_inv_1_2 = _mm256_mul_pd(tmp_qloop_65,_mm256_add_pd(_mm256_mul_pd(jac_blending_0_2,jac_blending_1_0),_mm256_mul_pd(_mm256_mul_pd(jac_blending_0_0,jac_blending_1_2),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))); + const __m256d jac_blending_inv_2_0 = _mm256_mul_pd(tmp_qloop_65,_mm256_add_pd(_mm256_mul_pd(jac_blending_1_0,jac_blending_2_1),_mm256_mul_pd(_mm256_mul_pd(jac_blending_1_1,jac_blending_2_0),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))); + const __m256d tmp_qloop_126 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_121,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,_mm256_set_pd(jac_affine_inv_0_0_WHITE_UP,jac_affine_inv_0_0_WHITE_UP,jac_affine_inv_0_0_WHITE_UP,jac_affine_inv_0_0_WHITE_UP)),_mm256_mul_pd(jac_blending_inv_1_0,_mm256_set_pd(jac_affine_inv_0_1_WHITE_UP,jac_affine_inv_0_1_WHITE_UP,jac_affine_inv_0_1_WHITE_UP,jac_affine_inv_0_1_WHITE_UP))),_mm256_mul_pd(jac_blending_inv_2_0,_mm256_set_pd(jac_affine_inv_0_2_WHITE_UP,jac_affine_inv_0_2_WHITE_UP,jac_affine_inv_0_2_WHITE_UP,jac_affine_inv_0_2_WHITE_UP)))),_mm256_mul_pd(tmp_qloop_124,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,_mm256_set_pd(jac_affine_inv_1_0_WHITE_UP,jac_affine_inv_1_0_WHITE_UP,jac_affine_inv_1_0_WHITE_UP,jac_affine_inv_1_0_WHITE_UP)),_mm256_mul_pd(jac_blending_inv_1_0,_mm256_set_pd(jac_affine_inv_1_1_WHITE_UP,jac_affine_inv_1_1_WHITE_UP,jac_affine_inv_1_1_WHITE_UP,jac_affine_inv_1_1_WHITE_UP))),_mm256_mul_pd(jac_blending_inv_2_0,_mm256_set_pd(jac_affine_inv_1_2_WHITE_UP,jac_affine_inv_1_2_WHITE_UP,jac_affine_inv_1_2_WHITE_UP,jac_affine_inv_1_2_WHITE_UP))))),_mm256_mul_pd(tmp_qloop_125,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,_mm256_set_pd(jac_affine_inv_2_0_WHITE_UP,jac_affine_inv_2_0_WHITE_UP,jac_affine_inv_2_0_WHITE_UP,jac_affine_inv_2_0_WHITE_UP)),_mm256_mul_pd(jac_blending_inv_1_0,_mm256_set_pd(jac_affine_inv_2_1_WHITE_UP,jac_affine_inv_2_1_WHITE_UP,jac_affine_inv_2_1_WHITE_UP,jac_affine_inv_2_1_WHITE_UP))),_mm256_mul_pd(jac_blending_inv_2_0,_mm256_set_pd(jac_affine_inv_2_2_WHITE_UP,jac_affine_inv_2_2_WHITE_UP,jac_affine_inv_2_2_WHITE_UP,jac_affine_inv_2_2_WHITE_UP))))); + const __m256d tmp_qloop_137 = _mm256_mul_pd(tmp_qloop_126,tmp_qloop_136); + const __m256d tmp_qloop_143 = _mm256_mul_pd(tmp_qloop_126,tmp_qloop_142); + const __m256d tmp_qloop_145 = _mm256_mul_pd(tmp_qloop_126,tmp_qloop_144); + const __m256d tmp_qloop_146 = _mm256_mul_pd(tmp_qloop_112,tmp_qloop_145); + const __m256d tmp_qloop_148 = _mm256_mul_pd(tmp_qloop_126,tmp_qloop_147); + const __m256d tmp_qloop_157 = _mm256_mul_pd(tmp_qloop_126,tmp_qloop_135); + const __m256d tmp_qloop_160 = _mm256_mul_pd(tmp_qloop_107,tmp_qloop_157); + const __m256d jac_blending_inv_2_1 = _mm256_mul_pd(tmp_qloop_65,_mm256_add_pd(_mm256_mul_pd(jac_blending_0_1,jac_blending_2_0),_mm256_mul_pd(_mm256_mul_pd(jac_blending_0_0,jac_blending_2_1),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))); + const __m256d tmp_qloop_138 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_121,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,_mm256_set_pd(jac_affine_inv_0_0_WHITE_UP,jac_affine_inv_0_0_WHITE_UP,jac_affine_inv_0_0_WHITE_UP,jac_affine_inv_0_0_WHITE_UP)),_mm256_mul_pd(jac_blending_inv_1_1,_mm256_set_pd(jac_affine_inv_0_1_WHITE_UP,jac_affine_inv_0_1_WHITE_UP,jac_affine_inv_0_1_WHITE_UP,jac_affine_inv_0_1_WHITE_UP))),_mm256_mul_pd(jac_blending_inv_2_1,_mm256_set_pd(jac_affine_inv_0_2_WHITE_UP,jac_affine_inv_0_2_WHITE_UP,jac_affine_inv_0_2_WHITE_UP,jac_affine_inv_0_2_WHITE_UP)))),_mm256_mul_pd(tmp_qloop_124,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,_mm256_set_pd(jac_affine_inv_1_0_WHITE_UP,jac_affine_inv_1_0_WHITE_UP,jac_affine_inv_1_0_WHITE_UP,jac_affine_inv_1_0_WHITE_UP)),_mm256_mul_pd(jac_blending_inv_1_1,_mm256_set_pd(jac_affine_inv_1_1_WHITE_UP,jac_affine_inv_1_1_WHITE_UP,jac_affine_inv_1_1_WHITE_UP,jac_affine_inv_1_1_WHITE_UP))),_mm256_mul_pd(jac_blending_inv_2_1,_mm256_set_pd(jac_affine_inv_1_2_WHITE_UP,jac_affine_inv_1_2_WHITE_UP,jac_affine_inv_1_2_WHITE_UP,jac_affine_inv_1_2_WHITE_UP))))),_mm256_mul_pd(tmp_qloop_125,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,_mm256_set_pd(jac_affine_inv_2_0_WHITE_UP,jac_affine_inv_2_0_WHITE_UP,jac_affine_inv_2_0_WHITE_UP,jac_affine_inv_2_0_WHITE_UP)),_mm256_mul_pd(jac_blending_inv_1_1,_mm256_set_pd(jac_affine_inv_2_1_WHITE_UP,jac_affine_inv_2_1_WHITE_UP,jac_affine_inv_2_1_WHITE_UP,jac_affine_inv_2_1_WHITE_UP))),_mm256_mul_pd(jac_blending_inv_2_1,_mm256_set_pd(jac_affine_inv_2_2_WHITE_UP,jac_affine_inv_2_2_WHITE_UP,jac_affine_inv_2_2_WHITE_UP,jac_affine_inv_2_2_WHITE_UP))))); + const __m256d tmp_qloop_139 = _mm256_mul_pd(tmp_qloop_136,tmp_qloop_138); + const __m256d tmp_qloop_149 = _mm256_mul_pd(tmp_qloop_138,tmp_qloop_142); + const __m256d tmp_qloop_150 = _mm256_mul_pd(tmp_qloop_138,tmp_qloop_144); + const __m256d tmp_qloop_151 = _mm256_mul_pd(tmp_qloop_112,tmp_qloop_150); + const __m256d tmp_qloop_152 = _mm256_mul_pd(tmp_qloop_138,tmp_qloop_147); + const __m256d tmp_qloop_162 = _mm256_mul_pd(tmp_qloop_138,tmp_qloop_161); + const __m256d jac_blending_inv_2_2 = _mm256_mul_pd(tmp_qloop_65,_mm256_add_pd(_mm256_mul_pd(jac_blending_0_0,jac_blending_1_1),_mm256_mul_pd(_mm256_mul_pd(jac_blending_0_1,jac_blending_1_0),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))); + const __m256d tmp_qloop_140 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_121,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_2,_mm256_set_pd(jac_affine_inv_0_0_WHITE_UP,jac_affine_inv_0_0_WHITE_UP,jac_affine_inv_0_0_WHITE_UP,jac_affine_inv_0_0_WHITE_UP)),_mm256_mul_pd(jac_blending_inv_1_2,_mm256_set_pd(jac_affine_inv_0_1_WHITE_UP,jac_affine_inv_0_1_WHITE_UP,jac_affine_inv_0_1_WHITE_UP,jac_affine_inv_0_1_WHITE_UP))),_mm256_mul_pd(jac_blending_inv_2_2,_mm256_set_pd(jac_affine_inv_0_2_WHITE_UP,jac_affine_inv_0_2_WHITE_UP,jac_affine_inv_0_2_WHITE_UP,jac_affine_inv_0_2_WHITE_UP)))),_mm256_mul_pd(tmp_qloop_124,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_2,_mm256_set_pd(jac_affine_inv_1_0_WHITE_UP,jac_affine_inv_1_0_WHITE_UP,jac_affine_inv_1_0_WHITE_UP,jac_affine_inv_1_0_WHITE_UP)),_mm256_mul_pd(jac_blending_inv_1_2,_mm256_set_pd(jac_affine_inv_1_1_WHITE_UP,jac_affine_inv_1_1_WHITE_UP,jac_affine_inv_1_1_WHITE_UP,jac_affine_inv_1_1_WHITE_UP))),_mm256_mul_pd(jac_blending_inv_2_2,_mm256_set_pd(jac_affine_inv_1_2_WHITE_UP,jac_affine_inv_1_2_WHITE_UP,jac_affine_inv_1_2_WHITE_UP,jac_affine_inv_1_2_WHITE_UP))))),_mm256_mul_pd(tmp_qloop_125,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_2,_mm256_set_pd(jac_affine_inv_2_0_WHITE_UP,jac_affine_inv_2_0_WHITE_UP,jac_affine_inv_2_0_WHITE_UP,jac_affine_inv_2_0_WHITE_UP)),_mm256_mul_pd(jac_blending_inv_1_2,_mm256_set_pd(jac_affine_inv_2_1_WHITE_UP,jac_affine_inv_2_1_WHITE_UP,jac_affine_inv_2_1_WHITE_UP,jac_affine_inv_2_1_WHITE_UP))),_mm256_mul_pd(jac_blending_inv_2_2,_mm256_set_pd(jac_affine_inv_2_2_WHITE_UP,jac_affine_inv_2_2_WHITE_UP,jac_affine_inv_2_2_WHITE_UP,jac_affine_inv_2_2_WHITE_UP))))); + const __m256d tmp_qloop_141 = _mm256_mul_pd(tmp_qloop_136,tmp_qloop_140); + const __m256d tmp_qloop_153 = _mm256_mul_pd(tmp_qloop_140,tmp_qloop_142); + const __m256d tmp_qloop_154 = _mm256_mul_pd(tmp_qloop_140,tmp_qloop_144); + const __m256d tmp_qloop_155 = _mm256_mul_pd(tmp_qloop_112,tmp_qloop_154); + const __m256d tmp_qloop_156 = _mm256_mul_pd(tmp_qloop_140,tmp_qloop_147); + const __m256d tmp_qloop_163 = _mm256_mul_pd(tmp_qloop_140,tmp_qloop_161); + const __m256d hessian_blending_0_0_0 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_18,tmp_qloop_67),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_37,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_31,_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_qloop_68,tmp_qloop_68,tmp_qloop_68,tmp_qloop_68)),_mm256_set_pd(tmp_qloop_33,tmp_qloop_33,tmp_qloop_33,tmp_qloop_33))),_mm256_mul_pd(tmp_qloop_19,_mm256_set_pd(tmp_qloop_68,tmp_qloop_68,tmp_qloop_68,tmp_qloop_68))),_mm256_mul_pd(tmp_qloop_35,tmp_qloop_69)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_28,tmp_qloop_69),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17))); + const __m256d hessian_blending_1_0_0 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_25,tmp_qloop_67),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_37,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_19,_mm256_set_pd(tmp_qloop_40,tmp_qloop_40,tmp_qloop_40,tmp_qloop_40)),_mm256_mul_pd(tmp_qloop_25,tmp_qloop_71)),tmp_qloop_70)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17))); + const __m256d hessian_blending_2_0_0 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_22,tmp_qloop_67),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_37,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_19,_mm256_set_pd(tmp_qloop_72,tmp_qloop_72,tmp_qloop_72,tmp_qloop_72)),_mm256_mul_pd(tmp_qloop_22,tmp_qloop_71)),tmp_qloop_73)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17))); + const __m256d hessian_blending_0_0_1 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_18,tmp_qloop_78),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_25,tmp_qloop_37),tmp_qloop_75),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17))); + const __m256d hessian_blending_1_0_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_80,tmp_qloop_81),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_29,tmp_qloop_77),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_52); + const __m256d hessian_blending_2_0_1 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_81,tmp_qloop_83),tmp_qloop_84); + const __m256d hessian_blending_0_0_2 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_77,tmp_qloop_85),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_22,tmp_qloop_37),tmp_qloop_75),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17))); + const __m256d hessian_blending_1_0_2 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_80,tmp_qloop_86),tmp_qloop_84); + const __m256d hessian_blending_2_0_2 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_83,tmp_qloop_86),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_30,tmp_qloop_77),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_52); + const __m256d hessian_blending_0_1_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_87,tmp_qloop_88),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_19,tmp_qloop_89),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_44); + const __m256d hessian_blending_1_1_0 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_18,tmp_qloop_91),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_18,tmp_qloop_37),tmp_qloop_90),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17))); + const __m256d hessian_blending_2_1_0 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_93,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_88,tmp_qloop_92)); + const __m256d hessian_blending_0_1_1 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_18,tmp_qloop_95),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_37,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_29,_mm256_set_pd(tmp_qloop_68,tmp_qloop_68,tmp_qloop_68,tmp_qloop_68)),_mm256_mul_pd(tmp_qloop_54,tmp_qloop_69)),tmp_qloop_94)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17))); + const __m256d hessian_blending_1_1_1 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_25,tmp_qloop_95),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_37,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_31,_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_qloop_40,tmp_qloop_40,tmp_qloop_40,tmp_qloop_40)),_mm256_set_pd(tmp_qloop_53,tmp_qloop_53,tmp_qloop_53,tmp_qloop_53))),_mm256_mul_pd(tmp_qloop_29,_mm256_set_pd(tmp_qloop_40,tmp_qloop_40,tmp_qloop_40,tmp_qloop_40))),_mm256_mul_pd(tmp_qloop_47,tmp_qloop_79)),_mm256_mul_pd(tmp_qloop_54,tmp_qloop_79))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17))); + const __m256d hessian_blending_2_1_1 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_22,tmp_qloop_95),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_37,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_29,_mm256_set_pd(tmp_qloop_72,tmp_qloop_72,tmp_qloop_72,tmp_qloop_72)),_mm256_mul_pd(tmp_qloop_54,tmp_qloop_82)),tmp_qloop_73)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17))); + const __m256d hessian_blending_0_1_2 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_93,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_22,tmp_qloop_37),tmp_qloop_87),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17))); + const __m256d hessian_blending_1_1_2 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_22,tmp_qloop_91),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_22,tmp_qloop_37),tmp_qloop_90),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17))); + const __m256d hessian_blending_2_1_2 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_86,tmp_qloop_92),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_30,tmp_qloop_89),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_44); + const __m256d hessian_blending_0_2_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_88,tmp_qloop_96),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_19,tmp_qloop_97),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_50); + const __m256d hessian_blending_1_2_0 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_100,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_18,tmp_qloop_37),tmp_qloop_98),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17))); + const __m256d hessian_blending_2_2_0 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_85,tmp_qloop_97),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_101,tmp_qloop_18),tmp_qloop_37),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17))); + const __m256d hessian_blending_0_2_1 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_100,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_25,tmp_qloop_37),tmp_qloop_96),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17))); + const __m256d hessian_blending_1_2_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_81,tmp_qloop_98),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_29,tmp_qloop_97),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_50); + const __m256d hessian_blending_2_2_1 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_22,tmp_qloop_99),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_101,tmp_qloop_25),tmp_qloop_37),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17))); + const __m256d hessian_blending_0_2_2 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_102,tmp_qloop_18),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_37,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_30,_mm256_set_pd(tmp_qloop_68,tmp_qloop_68,tmp_qloop_68,tmp_qloop_68)),_mm256_mul_pd(tmp_qloop_57,tmp_qloop_69)),tmp_qloop_94)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17))); + const __m256d hessian_blending_1_2_2 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_102,tmp_qloop_25),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_37,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_30,_mm256_set_pd(tmp_qloop_40,tmp_qloop_40,tmp_qloop_40,tmp_qloop_40)),_mm256_mul_pd(tmp_qloop_57,tmp_qloop_79)),tmp_qloop_70)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17))); + const __m256d hessian_blending_2_2_2 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_102,tmp_qloop_22),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_37,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_31,_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_qloop_72,tmp_qloop_72,tmp_qloop_72,tmp_qloop_72)),_mm256_set_pd(tmp_qloop_46,tmp_qloop_46,tmp_qloop_46,tmp_qloop_46))),_mm256_mul_pd(tmp_qloop_48,_mm256_set_pd(2.0,2.0,2.0,2.0))),_mm256_mul_pd(tmp_qloop_30,_mm256_set_pd(tmp_qloop_72,tmp_qloop_72,tmp_qloop_72,tmp_qloop_72))),_mm256_mul_pd(tmp_qloop_57,tmp_qloop_82))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17))); + const __m256d q_tmp_0_0 = _mm256_mul_pd(tmp_qloop_115,tmp_qloop_137); + const __m256d q_tmp_0_1 = _mm256_mul_pd(tmp_qloop_127,tmp_qloop_137); + const __m256d q_tmp_0_2 = _mm256_mul_pd(tmp_qloop_128,tmp_qloop_137); + const __m256d q_tmp_0_3 = _mm256_mul_pd(tmp_qloop_129,tmp_qloop_137); + const __m256d q_tmp_0_4 = _mm256_mul_pd(tmp_qloop_110,tmp_qloop_137); + const __m256d q_tmp_0_5 = _mm256_mul_pd(tmp_qloop_113,tmp_qloop_137); + const __m256d q_tmp_0_6 = _mm256_mul_pd(tmp_qloop_112,tmp_qloop_137); + const __m256d q_tmp_0_7 = _mm256_mul_pd(tmp_qloop_130,tmp_qloop_137); + const __m256d q_tmp_0_8 = _mm256_mul_pd(tmp_qloop_132,tmp_qloop_137); + const __m256d q_tmp_0_9 = _mm256_mul_pd(tmp_qloop_134,tmp_qloop_137); + const __m256d q_tmp_0_10 = _mm256_mul_pd(tmp_qloop_115,tmp_qloop_139); + const __m256d q_tmp_0_11 = _mm256_mul_pd(tmp_qloop_127,tmp_qloop_139); + const __m256d q_tmp_0_12 = _mm256_mul_pd(tmp_qloop_128,tmp_qloop_139); + const __m256d q_tmp_0_13 = _mm256_mul_pd(tmp_qloop_129,tmp_qloop_139); + const __m256d q_tmp_0_14 = _mm256_mul_pd(tmp_qloop_110,tmp_qloop_139); + const __m256d q_tmp_0_15 = _mm256_mul_pd(tmp_qloop_113,tmp_qloop_139); + const __m256d q_tmp_0_16 = _mm256_mul_pd(tmp_qloop_112,tmp_qloop_139); + const __m256d q_tmp_0_17 = _mm256_mul_pd(tmp_qloop_130,tmp_qloop_139); + const __m256d q_tmp_0_18 = _mm256_mul_pd(tmp_qloop_132,tmp_qloop_139); + const __m256d q_tmp_0_19 = _mm256_mul_pd(tmp_qloop_134,tmp_qloop_139); + const __m256d q_tmp_0_20 = _mm256_mul_pd(tmp_qloop_115,tmp_qloop_141); + const __m256d q_tmp_0_21 = _mm256_mul_pd(tmp_qloop_127,tmp_qloop_141); + const __m256d q_tmp_0_22 = _mm256_mul_pd(tmp_qloop_128,tmp_qloop_141); + const __m256d q_tmp_0_23 = _mm256_mul_pd(tmp_qloop_129,tmp_qloop_141); + const __m256d q_tmp_0_24 = _mm256_mul_pd(tmp_qloop_110,tmp_qloop_141); + const __m256d q_tmp_0_25 = _mm256_mul_pd(tmp_qloop_113,tmp_qloop_141); + const __m256d q_tmp_0_26 = _mm256_mul_pd(tmp_qloop_112,tmp_qloop_141); + const __m256d q_tmp_0_27 = _mm256_mul_pd(tmp_qloop_130,tmp_qloop_141); + const __m256d q_tmp_0_28 = _mm256_mul_pd(tmp_qloop_132,tmp_qloop_141); + const __m256d q_tmp_0_29 = _mm256_mul_pd(tmp_qloop_134,tmp_qloop_141); + const __m256d q_tmp_1_0 = _mm256_mul_pd(tmp_qloop_115,tmp_qloop_143); + const __m256d q_tmp_1_1 = _mm256_mul_pd(tmp_qloop_127,tmp_qloop_143); + const __m256d q_tmp_1_2 = _mm256_mul_pd(tmp_qloop_128,tmp_qloop_143); + const __m256d q_tmp_1_3 = _mm256_mul_pd(tmp_qloop_129,tmp_qloop_143); + const __m256d q_tmp_1_4 = tmp_qloop_146; + const __m256d q_tmp_1_5 = _mm256_mul_pd(tmp_qloop_133,tmp_qloop_145); + const __m256d q_tmp_1_6 = _mm256_mul_pd(tmp_qloop_133,tmp_qloop_148); + const __m256d q_tmp_1_7 = _mm256_mul_pd(tmp_qloop_130,tmp_qloop_143); + const __m256d q_tmp_1_8 = _mm256_mul_pd(tmp_qloop_132,tmp_qloop_143); + const __m256d q_tmp_1_9 = _mm256_mul_pd(tmp_qloop_134,tmp_qloop_143); + const __m256d q_tmp_1_10 = _mm256_mul_pd(tmp_qloop_115,tmp_qloop_149); + const __m256d q_tmp_1_11 = _mm256_mul_pd(tmp_qloop_127,tmp_qloop_149); + const __m256d q_tmp_1_12 = _mm256_mul_pd(tmp_qloop_128,tmp_qloop_149); + const __m256d q_tmp_1_13 = _mm256_mul_pd(tmp_qloop_129,tmp_qloop_149); + const __m256d q_tmp_1_14 = tmp_qloop_151; + const __m256d q_tmp_1_15 = _mm256_mul_pd(tmp_qloop_133,tmp_qloop_150); + const __m256d q_tmp_1_16 = _mm256_mul_pd(tmp_qloop_133,tmp_qloop_152); + const __m256d q_tmp_1_17 = _mm256_mul_pd(tmp_qloop_130,tmp_qloop_149); + const __m256d q_tmp_1_18 = _mm256_mul_pd(tmp_qloop_132,tmp_qloop_149); + const __m256d q_tmp_1_19 = _mm256_mul_pd(tmp_qloop_134,tmp_qloop_149); + const __m256d q_tmp_1_20 = _mm256_mul_pd(tmp_qloop_115,tmp_qloop_153); + const __m256d q_tmp_1_21 = _mm256_mul_pd(tmp_qloop_127,tmp_qloop_153); + const __m256d q_tmp_1_22 = _mm256_mul_pd(tmp_qloop_128,tmp_qloop_153); + const __m256d q_tmp_1_23 = _mm256_mul_pd(tmp_qloop_129,tmp_qloop_153); + const __m256d q_tmp_1_24 = tmp_qloop_155; + const __m256d q_tmp_1_25 = _mm256_mul_pd(tmp_qloop_133,tmp_qloop_154); + const __m256d q_tmp_1_26 = _mm256_mul_pd(tmp_qloop_133,tmp_qloop_156); + const __m256d q_tmp_1_27 = _mm256_mul_pd(tmp_qloop_130,tmp_qloop_153); + const __m256d q_tmp_1_28 = _mm256_mul_pd(tmp_qloop_132,tmp_qloop_153); + const __m256d q_tmp_1_29 = _mm256_mul_pd(tmp_qloop_134,tmp_qloop_153); + const __m256d q_tmp_2_0 = _mm256_mul_pd(tmp_qloop_115,tmp_qloop_148); + const __m256d q_tmp_2_1 = _mm256_mul_pd(tmp_qloop_127,tmp_qloop_148); + const __m256d q_tmp_2_2 = _mm256_mul_pd(tmp_qloop_128,tmp_qloop_148); + const __m256d q_tmp_2_3 = _mm256_mul_pd(tmp_qloop_129,tmp_qloop_148); + const __m256d q_tmp_2_4 = _mm256_mul_pd(tmp_qloop_131,tmp_qloop_145); + const __m256d q_tmp_2_5 = tmp_qloop_146; + const __m256d q_tmp_2_6 = _mm256_mul_pd(tmp_qloop_157,tmp_qloop_158); + const __m256d q_tmp_2_7 = _mm256_mul_pd(tmp_qloop_130,tmp_qloop_148); + const __m256d q_tmp_2_8 = _mm256_mul_pd(tmp_qloop_132,tmp_qloop_148); + const __m256d q_tmp_2_9 = _mm256_mul_pd(tmp_qloop_134,tmp_qloop_148); + const __m256d q_tmp_2_10 = _mm256_mul_pd(tmp_qloop_115,tmp_qloop_152); + const __m256d q_tmp_2_11 = _mm256_mul_pd(tmp_qloop_127,tmp_qloop_152); + const __m256d q_tmp_2_12 = _mm256_mul_pd(tmp_qloop_128,tmp_qloop_152); + const __m256d q_tmp_2_13 = _mm256_mul_pd(tmp_qloop_129,tmp_qloop_152); + const __m256d q_tmp_2_14 = _mm256_mul_pd(tmp_qloop_131,tmp_qloop_150); + const __m256d q_tmp_2_15 = tmp_qloop_151; + const __m256d q_tmp_2_16 = _mm256_mul_pd(tmp_qloop_138,tmp_qloop_159); + const __m256d q_tmp_2_17 = _mm256_mul_pd(tmp_qloop_130,tmp_qloop_152); + const __m256d q_tmp_2_18 = _mm256_mul_pd(tmp_qloop_132,tmp_qloop_152); + const __m256d q_tmp_2_19 = _mm256_mul_pd(tmp_qloop_134,tmp_qloop_152); + const __m256d q_tmp_2_20 = _mm256_mul_pd(tmp_qloop_115,tmp_qloop_156); + const __m256d q_tmp_2_21 = _mm256_mul_pd(tmp_qloop_127,tmp_qloop_156); + const __m256d q_tmp_2_22 = _mm256_mul_pd(tmp_qloop_128,tmp_qloop_156); + const __m256d q_tmp_2_23 = _mm256_mul_pd(tmp_qloop_129,tmp_qloop_156); + const __m256d q_tmp_2_24 = _mm256_mul_pd(tmp_qloop_131,tmp_qloop_154); + const __m256d q_tmp_2_25 = tmp_qloop_155; + const __m256d q_tmp_2_26 = _mm256_mul_pd(tmp_qloop_140,tmp_qloop_159); + const __m256d q_tmp_2_27 = _mm256_mul_pd(tmp_qloop_130,tmp_qloop_156); + const __m256d q_tmp_2_28 = _mm256_mul_pd(tmp_qloop_132,tmp_qloop_156); + const __m256d q_tmp_2_29 = _mm256_mul_pd(tmp_qloop_134,tmp_qloop_156); + const __m256d q_tmp_3_0 = _mm256_mul_pd(tmp_qloop_115,tmp_qloop_145); + const __m256d q_tmp_3_1 = _mm256_mul_pd(tmp_qloop_127,tmp_qloop_145); + const __m256d q_tmp_3_2 = _mm256_mul_pd(tmp_qloop_128,tmp_qloop_145); + const __m256d q_tmp_3_3 = _mm256_mul_pd(tmp_qloop_129,tmp_qloop_145); + const __m256d q_tmp_3_4 = _mm256_mul_pd(tmp_qloop_109,tmp_qloop_160); + const __m256d q_tmp_3_5 = _mm256_mul_pd(tmp_qloop_111,tmp_qloop_160); + const __m256d q_tmp_3_6 = tmp_qloop_146; + const __m256d q_tmp_3_7 = _mm256_mul_pd(tmp_qloop_130,tmp_qloop_145); + const __m256d q_tmp_3_8 = _mm256_mul_pd(tmp_qloop_132,tmp_qloop_145); + const __m256d q_tmp_3_9 = _mm256_mul_pd(tmp_qloop_134,tmp_qloop_145); + const __m256d q_tmp_3_10 = _mm256_mul_pd(tmp_qloop_115,tmp_qloop_150); + const __m256d q_tmp_3_11 = _mm256_mul_pd(tmp_qloop_127,tmp_qloop_150); + const __m256d q_tmp_3_12 = _mm256_mul_pd(tmp_qloop_128,tmp_qloop_150); + const __m256d q_tmp_3_13 = _mm256_mul_pd(tmp_qloop_129,tmp_qloop_150); + const __m256d q_tmp_3_14 = _mm256_mul_pd(tmp_qloop_109,tmp_qloop_162); + const __m256d q_tmp_3_15 = _mm256_mul_pd(tmp_qloop_111,tmp_qloop_162); + const __m256d q_tmp_3_16 = tmp_qloop_151; + const __m256d q_tmp_3_17 = _mm256_mul_pd(tmp_qloop_130,tmp_qloop_150); + const __m256d q_tmp_3_18 = _mm256_mul_pd(tmp_qloop_132,tmp_qloop_150); + const __m256d q_tmp_3_19 = _mm256_mul_pd(tmp_qloop_134,tmp_qloop_150); + const __m256d q_tmp_3_20 = _mm256_mul_pd(tmp_qloop_115,tmp_qloop_154); + const __m256d q_tmp_3_21 = _mm256_mul_pd(tmp_qloop_127,tmp_qloop_154); + const __m256d q_tmp_3_22 = _mm256_mul_pd(tmp_qloop_128,tmp_qloop_154); + const __m256d q_tmp_3_23 = _mm256_mul_pd(tmp_qloop_129,tmp_qloop_154); + const __m256d q_tmp_3_24 = _mm256_mul_pd(tmp_qloop_109,tmp_qloop_163); + const __m256d q_tmp_3_25 = _mm256_mul_pd(tmp_qloop_111,tmp_qloop_163); + const __m256d q_tmp_3_26 = tmp_qloop_155; + const __m256d q_tmp_3_27 = _mm256_mul_pd(tmp_qloop_130,tmp_qloop_154); + const __m256d q_tmp_3_28 = _mm256_mul_pd(tmp_qloop_132,tmp_qloop_154); + const __m256d q_tmp_3_29 = _mm256_mul_pd(tmp_qloop_134,tmp_qloop_154); + q_acc_0_0 = _mm256_add_pd(q_acc_0_0,q_tmp_0_0); + q_acc_0_1 = _mm256_add_pd(q_acc_0_1,q_tmp_0_1); + q_acc_0_2 = _mm256_add_pd(q_acc_0_2,q_tmp_0_2); + q_acc_0_3 = _mm256_add_pd(q_acc_0_3,q_tmp_0_3); + q_acc_0_4 = _mm256_add_pd(q_acc_0_4,q_tmp_0_4); + q_acc_0_5 = _mm256_add_pd(q_acc_0_5,q_tmp_0_5); + q_acc_0_6 = _mm256_add_pd(q_acc_0_6,q_tmp_0_6); + q_acc_0_7 = _mm256_add_pd(q_acc_0_7,q_tmp_0_7); + q_acc_0_8 = _mm256_add_pd(q_acc_0_8,q_tmp_0_8); + q_acc_0_9 = _mm256_add_pd(q_acc_0_9,q_tmp_0_9); + q_acc_0_10 = _mm256_add_pd(q_acc_0_10,q_tmp_0_10); + q_acc_0_11 = _mm256_add_pd(q_acc_0_11,q_tmp_0_11); + q_acc_0_12 = _mm256_add_pd(q_acc_0_12,q_tmp_0_12); + q_acc_0_13 = _mm256_add_pd(q_acc_0_13,q_tmp_0_13); + q_acc_0_14 = _mm256_add_pd(q_acc_0_14,q_tmp_0_14); + q_acc_0_15 = _mm256_add_pd(q_acc_0_15,q_tmp_0_15); + q_acc_0_16 = _mm256_add_pd(q_acc_0_16,q_tmp_0_16); + q_acc_0_17 = _mm256_add_pd(q_acc_0_17,q_tmp_0_17); + q_acc_0_18 = _mm256_add_pd(q_acc_0_18,q_tmp_0_18); + q_acc_0_19 = _mm256_add_pd(q_acc_0_19,q_tmp_0_19); + q_acc_0_20 = _mm256_add_pd(q_acc_0_20,q_tmp_0_20); + q_acc_0_21 = _mm256_add_pd(q_acc_0_21,q_tmp_0_21); + q_acc_0_22 = _mm256_add_pd(q_acc_0_22,q_tmp_0_22); + q_acc_0_23 = _mm256_add_pd(q_acc_0_23,q_tmp_0_23); + q_acc_0_24 = _mm256_add_pd(q_acc_0_24,q_tmp_0_24); + q_acc_0_25 = _mm256_add_pd(q_acc_0_25,q_tmp_0_25); + q_acc_0_26 = _mm256_add_pd(q_acc_0_26,q_tmp_0_26); + q_acc_0_27 = _mm256_add_pd(q_acc_0_27,q_tmp_0_27); + q_acc_0_28 = _mm256_add_pd(q_acc_0_28,q_tmp_0_28); + q_acc_0_29 = _mm256_add_pd(q_acc_0_29,q_tmp_0_29); + q_acc_1_0 = _mm256_add_pd(q_acc_1_0,q_tmp_1_0); + q_acc_1_1 = _mm256_add_pd(q_acc_1_1,q_tmp_1_1); + q_acc_1_2 = _mm256_add_pd(q_acc_1_2,q_tmp_1_2); + q_acc_1_3 = _mm256_add_pd(q_acc_1_3,q_tmp_1_3); + q_acc_1_4 = _mm256_add_pd(q_acc_1_4,q_tmp_1_4); + q_acc_1_5 = _mm256_add_pd(q_acc_1_5,q_tmp_1_5); + q_acc_1_6 = _mm256_add_pd(q_acc_1_6,q_tmp_1_6); + q_acc_1_7 = _mm256_add_pd(q_acc_1_7,q_tmp_1_7); + q_acc_1_8 = _mm256_add_pd(q_acc_1_8,q_tmp_1_8); + q_acc_1_9 = _mm256_add_pd(q_acc_1_9,q_tmp_1_9); + q_acc_1_10 = _mm256_add_pd(q_acc_1_10,q_tmp_1_10); + q_acc_1_11 = _mm256_add_pd(q_acc_1_11,q_tmp_1_11); + q_acc_1_12 = _mm256_add_pd(q_acc_1_12,q_tmp_1_12); + q_acc_1_13 = _mm256_add_pd(q_acc_1_13,q_tmp_1_13); + q_acc_1_14 = _mm256_add_pd(q_acc_1_14,q_tmp_1_14); + q_acc_1_15 = _mm256_add_pd(q_acc_1_15,q_tmp_1_15); + q_acc_1_16 = _mm256_add_pd(q_acc_1_16,q_tmp_1_16); + q_acc_1_17 = _mm256_add_pd(q_acc_1_17,q_tmp_1_17); + q_acc_1_18 = _mm256_add_pd(q_acc_1_18,q_tmp_1_18); + q_acc_1_19 = _mm256_add_pd(q_acc_1_19,q_tmp_1_19); + q_acc_1_20 = _mm256_add_pd(q_acc_1_20,q_tmp_1_20); + q_acc_1_21 = _mm256_add_pd(q_acc_1_21,q_tmp_1_21); + q_acc_1_22 = _mm256_add_pd(q_acc_1_22,q_tmp_1_22); + q_acc_1_23 = _mm256_add_pd(q_acc_1_23,q_tmp_1_23); + q_acc_1_24 = _mm256_add_pd(q_acc_1_24,q_tmp_1_24); + q_acc_1_25 = _mm256_add_pd(q_acc_1_25,q_tmp_1_25); + q_acc_1_26 = _mm256_add_pd(q_acc_1_26,q_tmp_1_26); + q_acc_1_27 = _mm256_add_pd(q_acc_1_27,q_tmp_1_27); + q_acc_1_28 = _mm256_add_pd(q_acc_1_28,q_tmp_1_28); + q_acc_1_29 = _mm256_add_pd(q_acc_1_29,q_tmp_1_29); + q_acc_2_0 = _mm256_add_pd(q_acc_2_0,q_tmp_2_0); + q_acc_2_1 = _mm256_add_pd(q_acc_2_1,q_tmp_2_1); + q_acc_2_2 = _mm256_add_pd(q_acc_2_2,q_tmp_2_2); + q_acc_2_3 = _mm256_add_pd(q_acc_2_3,q_tmp_2_3); + q_acc_2_4 = _mm256_add_pd(q_acc_2_4,q_tmp_2_4); + q_acc_2_5 = _mm256_add_pd(q_acc_2_5,q_tmp_2_5); + q_acc_2_6 = _mm256_add_pd(q_acc_2_6,q_tmp_2_6); + q_acc_2_7 = _mm256_add_pd(q_acc_2_7,q_tmp_2_7); + q_acc_2_8 = _mm256_add_pd(q_acc_2_8,q_tmp_2_8); + q_acc_2_9 = _mm256_add_pd(q_acc_2_9,q_tmp_2_9); + q_acc_2_10 = _mm256_add_pd(q_acc_2_10,q_tmp_2_10); + q_acc_2_11 = _mm256_add_pd(q_acc_2_11,q_tmp_2_11); + q_acc_2_12 = _mm256_add_pd(q_acc_2_12,q_tmp_2_12); + q_acc_2_13 = _mm256_add_pd(q_acc_2_13,q_tmp_2_13); + q_acc_2_14 = _mm256_add_pd(q_acc_2_14,q_tmp_2_14); + q_acc_2_15 = _mm256_add_pd(q_acc_2_15,q_tmp_2_15); + q_acc_2_16 = _mm256_add_pd(q_acc_2_16,q_tmp_2_16); + q_acc_2_17 = _mm256_add_pd(q_acc_2_17,q_tmp_2_17); + q_acc_2_18 = _mm256_add_pd(q_acc_2_18,q_tmp_2_18); + q_acc_2_19 = _mm256_add_pd(q_acc_2_19,q_tmp_2_19); + q_acc_2_20 = _mm256_add_pd(q_acc_2_20,q_tmp_2_20); + q_acc_2_21 = _mm256_add_pd(q_acc_2_21,q_tmp_2_21); + q_acc_2_22 = _mm256_add_pd(q_acc_2_22,q_tmp_2_22); + q_acc_2_23 = _mm256_add_pd(q_acc_2_23,q_tmp_2_23); + q_acc_2_24 = _mm256_add_pd(q_acc_2_24,q_tmp_2_24); + q_acc_2_25 = _mm256_add_pd(q_acc_2_25,q_tmp_2_25); + q_acc_2_26 = _mm256_add_pd(q_acc_2_26,q_tmp_2_26); + q_acc_2_27 = _mm256_add_pd(q_acc_2_27,q_tmp_2_27); + q_acc_2_28 = _mm256_add_pd(q_acc_2_28,q_tmp_2_28); + q_acc_2_29 = _mm256_add_pd(q_acc_2_29,q_tmp_2_29); + q_acc_3_0 = _mm256_add_pd(q_acc_3_0,q_tmp_3_0); + q_acc_3_1 = _mm256_add_pd(q_acc_3_1,q_tmp_3_1); + q_acc_3_2 = _mm256_add_pd(q_acc_3_2,q_tmp_3_2); + q_acc_3_3 = _mm256_add_pd(q_acc_3_3,q_tmp_3_3); + q_acc_3_4 = _mm256_add_pd(q_acc_3_4,q_tmp_3_4); + q_acc_3_5 = _mm256_add_pd(q_acc_3_5,q_tmp_3_5); + q_acc_3_6 = _mm256_add_pd(q_acc_3_6,q_tmp_3_6); + q_acc_3_7 = _mm256_add_pd(q_acc_3_7,q_tmp_3_7); + q_acc_3_8 = _mm256_add_pd(q_acc_3_8,q_tmp_3_8); + q_acc_3_9 = _mm256_add_pd(q_acc_3_9,q_tmp_3_9); + q_acc_3_10 = _mm256_add_pd(q_acc_3_10,q_tmp_3_10); + q_acc_3_11 = _mm256_add_pd(q_acc_3_11,q_tmp_3_11); + q_acc_3_12 = _mm256_add_pd(q_acc_3_12,q_tmp_3_12); + q_acc_3_13 = _mm256_add_pd(q_acc_3_13,q_tmp_3_13); + q_acc_3_14 = _mm256_add_pd(q_acc_3_14,q_tmp_3_14); + q_acc_3_15 = _mm256_add_pd(q_acc_3_15,q_tmp_3_15); + q_acc_3_16 = _mm256_add_pd(q_acc_3_16,q_tmp_3_16); + q_acc_3_17 = _mm256_add_pd(q_acc_3_17,q_tmp_3_17); + q_acc_3_18 = _mm256_add_pd(q_acc_3_18,q_tmp_3_18); + q_acc_3_19 = _mm256_add_pd(q_acc_3_19,q_tmp_3_19); + q_acc_3_20 = _mm256_add_pd(q_acc_3_20,q_tmp_3_20); + q_acc_3_21 = _mm256_add_pd(q_acc_3_21,q_tmp_3_21); + q_acc_3_22 = _mm256_add_pd(q_acc_3_22,q_tmp_3_22); + q_acc_3_23 = _mm256_add_pd(q_acc_3_23,q_tmp_3_23); + q_acc_3_24 = _mm256_add_pd(q_acc_3_24,q_tmp_3_24); + q_acc_3_25 = _mm256_add_pd(q_acc_3_25,q_tmp_3_25); + q_acc_3_26 = _mm256_add_pd(q_acc_3_26,q_tmp_3_26); + q_acc_3_27 = _mm256_add_pd(q_acc_3_27,q_tmp_3_27); + q_acc_3_28 = _mm256_add_pd(q_acc_3_28,q_tmp_3_28); + q_acc_3_29 = _mm256_add_pd(q_acc_3_29,q_tmp_3_29); + } + const __m256d elMatVec_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_0_0,src_dof_0),_mm256_mul_pd(q_acc_0_1,src_dof_1)),_mm256_mul_pd(q_acc_0_10,src_dof_10)),_mm256_mul_pd(q_acc_0_11,src_dof_11)),_mm256_mul_pd(q_acc_0_12,src_dof_12)),_mm256_mul_pd(q_acc_0_13,src_dof_13)),_mm256_mul_pd(q_acc_0_14,src_dof_14)),_mm256_mul_pd(q_acc_0_15,src_dof_15)),_mm256_mul_pd(q_acc_0_16,src_dof_16)),_mm256_mul_pd(q_acc_0_17,src_dof_17)),_mm256_mul_pd(q_acc_0_18,src_dof_18)),_mm256_mul_pd(q_acc_0_19,src_dof_19)),_mm256_mul_pd(q_acc_0_2,src_dof_2)),_mm256_mul_pd(q_acc_0_20,src_dof_20)),_mm256_mul_pd(q_acc_0_21,src_dof_21)),_mm256_mul_pd(q_acc_0_22,src_dof_22)),_mm256_mul_pd(q_acc_0_23,src_dof_23)),_mm256_mul_pd(q_acc_0_24,src_dof_24)),_mm256_mul_pd(q_acc_0_25,src_dof_25)),_mm256_mul_pd(q_acc_0_26,src_dof_26)),_mm256_mul_pd(q_acc_0_27,src_dof_27)),_mm256_mul_pd(q_acc_0_28,src_dof_28)),_mm256_mul_pd(q_acc_0_29,src_dof_29)),_mm256_mul_pd(q_acc_0_3,src_dof_3)),_mm256_mul_pd(q_acc_0_4,src_dof_4)),_mm256_mul_pd(q_acc_0_5,src_dof_5)),_mm256_mul_pd(q_acc_0_6,src_dof_6)),_mm256_mul_pd(q_acc_0_7,src_dof_7)),_mm256_mul_pd(q_acc_0_8,src_dof_8)),_mm256_mul_pd(q_acc_0_9,src_dof_9)); + const __m256d elMatVec_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_1_0,src_dof_0),_mm256_mul_pd(q_acc_1_1,src_dof_1)),_mm256_mul_pd(q_acc_1_10,src_dof_10)),_mm256_mul_pd(q_acc_1_11,src_dof_11)),_mm256_mul_pd(q_acc_1_12,src_dof_12)),_mm256_mul_pd(q_acc_1_13,src_dof_13)),_mm256_mul_pd(q_acc_1_14,src_dof_14)),_mm256_mul_pd(q_acc_1_15,src_dof_15)),_mm256_mul_pd(q_acc_1_16,src_dof_16)),_mm256_mul_pd(q_acc_1_17,src_dof_17)),_mm256_mul_pd(q_acc_1_18,src_dof_18)),_mm256_mul_pd(q_acc_1_19,src_dof_19)),_mm256_mul_pd(q_acc_1_2,src_dof_2)),_mm256_mul_pd(q_acc_1_20,src_dof_20)),_mm256_mul_pd(q_acc_1_21,src_dof_21)),_mm256_mul_pd(q_acc_1_22,src_dof_22)),_mm256_mul_pd(q_acc_1_23,src_dof_23)),_mm256_mul_pd(q_acc_1_24,src_dof_24)),_mm256_mul_pd(q_acc_1_25,src_dof_25)),_mm256_mul_pd(q_acc_1_26,src_dof_26)),_mm256_mul_pd(q_acc_1_27,src_dof_27)),_mm256_mul_pd(q_acc_1_28,src_dof_28)),_mm256_mul_pd(q_acc_1_29,src_dof_29)),_mm256_mul_pd(q_acc_1_3,src_dof_3)),_mm256_mul_pd(q_acc_1_4,src_dof_4)),_mm256_mul_pd(q_acc_1_5,src_dof_5)),_mm256_mul_pd(q_acc_1_6,src_dof_6)),_mm256_mul_pd(q_acc_1_7,src_dof_7)),_mm256_mul_pd(q_acc_1_8,src_dof_8)),_mm256_mul_pd(q_acc_1_9,src_dof_9)); + const __m256d elMatVec_2 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_2_0,src_dof_0),_mm256_mul_pd(q_acc_2_1,src_dof_1)),_mm256_mul_pd(q_acc_2_10,src_dof_10)),_mm256_mul_pd(q_acc_2_11,src_dof_11)),_mm256_mul_pd(q_acc_2_12,src_dof_12)),_mm256_mul_pd(q_acc_2_13,src_dof_13)),_mm256_mul_pd(q_acc_2_14,src_dof_14)),_mm256_mul_pd(q_acc_2_15,src_dof_15)),_mm256_mul_pd(q_acc_2_16,src_dof_16)),_mm256_mul_pd(q_acc_2_17,src_dof_17)),_mm256_mul_pd(q_acc_2_18,src_dof_18)),_mm256_mul_pd(q_acc_2_19,src_dof_19)),_mm256_mul_pd(q_acc_2_2,src_dof_2)),_mm256_mul_pd(q_acc_2_20,src_dof_20)),_mm256_mul_pd(q_acc_2_21,src_dof_21)),_mm256_mul_pd(q_acc_2_22,src_dof_22)),_mm256_mul_pd(q_acc_2_23,src_dof_23)),_mm256_mul_pd(q_acc_2_24,src_dof_24)),_mm256_mul_pd(q_acc_2_25,src_dof_25)),_mm256_mul_pd(q_acc_2_26,src_dof_26)),_mm256_mul_pd(q_acc_2_27,src_dof_27)),_mm256_mul_pd(q_acc_2_28,src_dof_28)),_mm256_mul_pd(q_acc_2_29,src_dof_29)),_mm256_mul_pd(q_acc_2_3,src_dof_3)),_mm256_mul_pd(q_acc_2_4,src_dof_4)),_mm256_mul_pd(q_acc_2_5,src_dof_5)),_mm256_mul_pd(q_acc_2_6,src_dof_6)),_mm256_mul_pd(q_acc_2_7,src_dof_7)),_mm256_mul_pd(q_acc_2_8,src_dof_8)),_mm256_mul_pd(q_acc_2_9,src_dof_9)); + const __m256d elMatVec_3 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_3_0,src_dof_0),_mm256_mul_pd(q_acc_3_1,src_dof_1)),_mm256_mul_pd(q_acc_3_10,src_dof_10)),_mm256_mul_pd(q_acc_3_11,src_dof_11)),_mm256_mul_pd(q_acc_3_12,src_dof_12)),_mm256_mul_pd(q_acc_3_13,src_dof_13)),_mm256_mul_pd(q_acc_3_14,src_dof_14)),_mm256_mul_pd(q_acc_3_15,src_dof_15)),_mm256_mul_pd(q_acc_3_16,src_dof_16)),_mm256_mul_pd(q_acc_3_17,src_dof_17)),_mm256_mul_pd(q_acc_3_18,src_dof_18)),_mm256_mul_pd(q_acc_3_19,src_dof_19)),_mm256_mul_pd(q_acc_3_2,src_dof_2)),_mm256_mul_pd(q_acc_3_20,src_dof_20)),_mm256_mul_pd(q_acc_3_21,src_dof_21)),_mm256_mul_pd(q_acc_3_22,src_dof_22)),_mm256_mul_pd(q_acc_3_23,src_dof_23)),_mm256_mul_pd(q_acc_3_24,src_dof_24)),_mm256_mul_pd(q_acc_3_25,src_dof_25)),_mm256_mul_pd(q_acc_3_26,src_dof_26)),_mm256_mul_pd(q_acc_3_27,src_dof_27)),_mm256_mul_pd(q_acc_3_28,src_dof_28)),_mm256_mul_pd(q_acc_3_29,src_dof_29)),_mm256_mul_pd(q_acc_3_3,src_dof_3)),_mm256_mul_pd(q_acc_3_4,src_dof_4)),_mm256_mul_pd(q_acc_3_5,src_dof_5)),_mm256_mul_pd(q_acc_3_6,src_dof_6)),_mm256_mul_pd(q_acc_3_7,src_dof_7)),_mm256_mul_pd(q_acc_3_8,src_dof_8)),_mm256_mul_pd(q_acc_3_9,src_dof_9)); + _mm256_storeu_pd(&_data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))],_mm256_add_pd(elMatVec_0,_mm256_loadu_pd(& _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]))); + _mm256_storeu_pd(&_data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1],_mm256_add_pd(elMatVec_1,_mm256_loadu_pd(& _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]))); + _mm256_storeu_pd(&_data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))],_mm256_add_pd(elMatVec_2,_mm256_loadu_pd(& _data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]))); + _mm256_storeu_pd(&_data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))],_mm256_add_pd(elMatVec_3,_mm256_loadu_pd(& _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]))); + } + for (int64_t ctr_0 = (int64_t)((-ctr_1 - ctr_2 + micro_edges_per_macro_edge) / (4)) * (4); ctr_0 < -ctr_1 - ctr_2 + micro_edges_per_macro_edge; ctr_0 += 1) + { + + const int64_t phantom_ctr_0 = ctr_0; + real_t _data_float_loop_ctr_array_dim_0[4]; + _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0; + _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1; + _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2; + _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3; + real_t _data_float_loop_ctr_array_dim_1[4]; + _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1; + real_t _data_float_loop_ctr_array_dim_2[4]; + _data_float_loop_ctr_array_dim_2[0] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[1] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[2] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[3] = (real_t) ctr_2; + + const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_0_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_2_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_3_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_3_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_3_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t src_dof_0 = _data_src_vertex_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]; + const real_t src_dof_1 = _data_src_vertex_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]; + const real_t src_dof_2 = _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]; + const real_t src_dof_3 = _data_src_vertex_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]; + const real_t src_dof_4 = _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t src_dof_5 = _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 5*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t src_dof_6 = _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t src_dof_7 = _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t src_dof_8 = _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t src_dof_9 = _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t src_dof_10 = _data_src_vertex_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]; + const real_t src_dof_11 = _data_src_vertex_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]; + const real_t src_dof_12 = _data_src_vertex_1[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]; + const real_t src_dof_13 = _data_src_vertex_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]; + const real_t src_dof_14 = _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t src_dof_15 = _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 5*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t src_dof_16 = _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t src_dof_17 = _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t src_dof_18 = _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t src_dof_19 = _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t src_dof_20 = _data_src_vertex_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]; + const real_t src_dof_21 = _data_src_vertex_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]; + const real_t src_dof_22 = _data_src_vertex_2[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]; + const real_t src_dof_23 = _data_src_vertex_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]; + const real_t src_dof_24 = _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t src_dof_25 = _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 5*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t src_dof_26 = _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t src_dof_27 = _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t src_dof_28 = _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t src_dof_29 = _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t rho_dof_0 = _data_rhoVertex[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]; + const real_t rho_dof_1 = _data_rhoVertex[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]; + const real_t rho_dof_2 = _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]; + const real_t rho_dof_3 = _data_rhoVertex[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]; + const real_t rho_dof_4 = _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t rho_dof_5 = _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 5*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t rho_dof_6 = _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t rho_dof_7 = _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t rho_dof_8 = _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t rho_dof_9 = _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + real_t q_acc_0_0 = 0.0; + real_t q_acc_0_1 = 0.0; + real_t q_acc_0_2 = 0.0; + real_t q_acc_0_3 = 0.0; + real_t q_acc_0_4 = 0.0; + real_t q_acc_0_5 = 0.0; + real_t q_acc_0_6 = 0.0; + real_t q_acc_0_7 = 0.0; + real_t q_acc_0_8 = 0.0; + real_t q_acc_0_9 = 0.0; + real_t q_acc_0_10 = 0.0; + real_t q_acc_0_11 = 0.0; + real_t q_acc_0_12 = 0.0; + real_t q_acc_0_13 = 0.0; + real_t q_acc_0_14 = 0.0; + real_t q_acc_0_15 = 0.0; + real_t q_acc_0_16 = 0.0; + real_t q_acc_0_17 = 0.0; + real_t q_acc_0_18 = 0.0; + real_t q_acc_0_19 = 0.0; + real_t q_acc_0_20 = 0.0; + real_t q_acc_0_21 = 0.0; + real_t q_acc_0_22 = 0.0; + real_t q_acc_0_23 = 0.0; + real_t q_acc_0_24 = 0.0; + real_t q_acc_0_25 = 0.0; + real_t q_acc_0_26 = 0.0; + real_t q_acc_0_27 = 0.0; + real_t q_acc_0_28 = 0.0; + real_t q_acc_0_29 = 0.0; + real_t q_acc_1_0 = 0.0; + real_t q_acc_1_1 = 0.0; + real_t q_acc_1_2 = 0.0; + real_t q_acc_1_3 = 0.0; + real_t q_acc_1_4 = 0.0; + real_t q_acc_1_5 = 0.0; + real_t q_acc_1_6 = 0.0; + real_t q_acc_1_7 = 0.0; + real_t q_acc_1_8 = 0.0; + real_t q_acc_1_9 = 0.0; + real_t q_acc_1_10 = 0.0; + real_t q_acc_1_11 = 0.0; + real_t q_acc_1_12 = 0.0; + real_t q_acc_1_13 = 0.0; + real_t q_acc_1_14 = 0.0; + real_t q_acc_1_15 = 0.0; + real_t q_acc_1_16 = 0.0; + real_t q_acc_1_17 = 0.0; + real_t q_acc_1_18 = 0.0; + real_t q_acc_1_19 = 0.0; + real_t q_acc_1_20 = 0.0; + real_t q_acc_1_21 = 0.0; + real_t q_acc_1_22 = 0.0; + real_t q_acc_1_23 = 0.0; + real_t q_acc_1_24 = 0.0; + real_t q_acc_1_25 = 0.0; + real_t q_acc_1_26 = 0.0; + real_t q_acc_1_27 = 0.0; + real_t q_acc_1_28 = 0.0; + real_t q_acc_1_29 = 0.0; + real_t q_acc_2_0 = 0.0; + real_t q_acc_2_1 = 0.0; + real_t q_acc_2_2 = 0.0; + real_t q_acc_2_3 = 0.0; + real_t q_acc_2_4 = 0.0; + real_t q_acc_2_5 = 0.0; + real_t q_acc_2_6 = 0.0; + real_t q_acc_2_7 = 0.0; + real_t q_acc_2_8 = 0.0; + real_t q_acc_2_9 = 0.0; + real_t q_acc_2_10 = 0.0; + real_t q_acc_2_11 = 0.0; + real_t q_acc_2_12 = 0.0; + real_t q_acc_2_13 = 0.0; + real_t q_acc_2_14 = 0.0; + real_t q_acc_2_15 = 0.0; + real_t q_acc_2_16 = 0.0; + real_t q_acc_2_17 = 0.0; + real_t q_acc_2_18 = 0.0; + real_t q_acc_2_19 = 0.0; + real_t q_acc_2_20 = 0.0; + real_t q_acc_2_21 = 0.0; + real_t q_acc_2_22 = 0.0; + real_t q_acc_2_23 = 0.0; + real_t q_acc_2_24 = 0.0; + real_t q_acc_2_25 = 0.0; + real_t q_acc_2_26 = 0.0; + real_t q_acc_2_27 = 0.0; + real_t q_acc_2_28 = 0.0; + real_t q_acc_2_29 = 0.0; + real_t q_acc_3_0 = 0.0; + real_t q_acc_3_1 = 0.0; + real_t q_acc_3_2 = 0.0; + real_t q_acc_3_3 = 0.0; + real_t q_acc_3_4 = 0.0; + real_t q_acc_3_5 = 0.0; + real_t q_acc_3_6 = 0.0; + real_t q_acc_3_7 = 0.0; + real_t q_acc_3_8 = 0.0; + real_t q_acc_3_9 = 0.0; + real_t q_acc_3_10 = 0.0; + real_t q_acc_3_11 = 0.0; + real_t q_acc_3_12 = 0.0; + real_t q_acc_3_13 = 0.0; + real_t q_acc_3_14 = 0.0; + real_t q_acc_3_15 = 0.0; + real_t q_acc_3_16 = 0.0; + real_t q_acc_3_17 = 0.0; + real_t q_acc_3_18 = 0.0; + real_t q_acc_3_19 = 0.0; + real_t q_acc_3_20 = 0.0; + real_t q_acc_3_21 = 0.0; + real_t q_acc_3_22 = 0.0; + real_t q_acc_3_23 = 0.0; + real_t q_acc_3_24 = 0.0; + real_t q_acc_3_25 = 0.0; + real_t q_acc_3_26 = 0.0; + real_t q_acc_3_27 = 0.0; + real_t q_acc_3_28 = 0.0; + real_t q_acc_3_29 = 0.0; + for (int64_t q = 0; q < 5; q += 1) + { + const real_t tmp_qloop_18 = p_affine_0_0 + (-p_affine_0_0 + p_affine_1_0)*_data_q_p_0[q] + (-p_affine_0_0 + p_affine_2_0)*_data_q_p_1[q] + (-p_affine_0_0 + p_affine_3_0)*_data_q_p_2[q]; + const real_t tmp_qloop_19 = (tmp_qloop_18*tmp_qloop_18); + const real_t tmp_qloop_22 = p_affine_0_2 + (-p_affine_0_2 + p_affine_1_2)*_data_q_p_0[q] + (-p_affine_0_2 + p_affine_2_2)*_data_q_p_1[q] + (-p_affine_0_2 + p_affine_3_2)*_data_q_p_2[q]; + const real_t tmp_qloop_23 = -tmp_qloop_12 - tmp_qloop_22; + const real_t tmp_qloop_24 = -tmp_qloop_1 - tmp_qloop_18; + const real_t tmp_qloop_25 = p_affine_0_1 + (-p_affine_0_1 + p_affine_1_1)*_data_q_p_0[q] + (-p_affine_0_1 + p_affine_2_1)*_data_q_p_1[q] + (-p_affine_0_1 + p_affine_3_1)*_data_q_p_2[q]; + const real_t tmp_qloop_26 = -tmp_qloop_25 - tmp_qloop_8; + const real_t tmp_qloop_27 = tmp_qloop_21*(tmp_qloop_10*tmp_qloop_23*tmp_qloop_9 + tmp_qloop_13*tmp_qloop_24*tmp_qloop_6 - tmp_qloop_14*tmp_qloop_24 - tmp_qloop_15*tmp_qloop_26 + tmp_qloop_2*tmp_qloop_26*tmp_qloop_3 - tmp_qloop_23*tmp_qloop_7); + const real_t tmp_qloop_28 = tmp_qloop_20 - tmp_qloop_27; + const real_t tmp_qloop_29 = (tmp_qloop_25*tmp_qloop_25); + const real_t tmp_qloop_30 = (tmp_qloop_22*tmp_qloop_22); + const real_t tmp_qloop_31 = tmp_qloop_19 + tmp_qloop_29 + tmp_qloop_30; + const real_t tmp_qloop_34 = tmp_qloop_18*tmp_qloop_33; + const real_t tmp_qloop_35 = tmp_qloop_28 + tmp_qloop_34; + const real_t tmp_qloop_36 = tmp_qloop_17*(-tmp_qloop_19*tmp_qloop_28 + tmp_qloop_31*tmp_qloop_35); + const real_t tmp_qloop_37 = pow(tmp_qloop_31, -1.5000000000000000); + const real_t tmp_qloop_38 = tmp_qloop_37*1.0; + const real_t tmp_qloop_41 = tmp_qloop_31*tmp_qloop_40; + const real_t tmp_qloop_42 = -tmp_qloop_25*tmp_qloop_28 - tmp_qloop_41; + const real_t tmp_qloop_43 = tmp_qloop_17*tmp_qloop_38; + const real_t tmp_qloop_44 = tmp_qloop_42*tmp_qloop_43; + const real_t tmp_qloop_47 = -tmp_qloop_28; + const real_t tmp_qloop_48 = tmp_qloop_22*tmp_qloop_47; + const real_t tmp_qloop_49 = tmp_qloop_31*tmp_qloop_46 + tmp_qloop_48; + const real_t tmp_qloop_50 = tmp_qloop_43*tmp_qloop_49; + const real_t tmp_qloop_51 = tmp_qloop_18*tmp_qloop_47 + tmp_qloop_31*tmp_qloop_33; + const real_t tmp_qloop_52 = tmp_qloop_43*tmp_qloop_51; + const real_t tmp_qloop_54 = tmp_qloop_25*tmp_qloop_53 + tmp_qloop_28; + const real_t tmp_qloop_55 = tmp_qloop_29*tmp_qloop_47 + tmp_qloop_31*tmp_qloop_54; + const real_t tmp_qloop_56 = tmp_qloop_22*tmp_qloop_46; + const real_t tmp_qloop_57 = tmp_qloop_28 + tmp_qloop_56; + const real_t tmp_qloop_58 = tmp_qloop_30*tmp_qloop_47 + tmp_qloop_31*tmp_qloop_57; + const real_t tmp_qloop_66 = pow(tmp_qloop_31, -2.5000000000000000)*3.0; + const real_t tmp_qloop_67 = tmp_qloop_36*tmp_qloop_66; + const real_t tmp_qloop_69 = tmp_qloop_18*2.0; + const real_t tmp_qloop_70 = -tmp_qloop_41; + const real_t tmp_qloop_71 = tmp_qloop_35*2.0; + const real_t tmp_qloop_73 = -tmp_qloop_31*tmp_qloop_72; + const real_t tmp_qloop_74 = -tmp_qloop_20 + tmp_qloop_27; + const real_t tmp_qloop_75 = tmp_qloop_18*tmp_qloop_68 + tmp_qloop_34*2.0 + tmp_qloop_74; + const real_t tmp_qloop_76 = tmp_qloop_17*tmp_qloop_66; + const real_t tmp_qloop_77 = tmp_qloop_51*tmp_qloop_76; + const real_t tmp_qloop_78 = tmp_qloop_25*tmp_qloop_77; + const real_t tmp_qloop_79 = tmp_qloop_25*2.0; + const real_t tmp_qloop_80 = tmp_qloop_18*tmp_qloop_40 + tmp_qloop_33*tmp_qloop_79; + const real_t tmp_qloop_81 = tmp_qloop_25*tmp_qloop_43; + const real_t tmp_qloop_82 = tmp_qloop_22*2.0; + const real_t tmp_qloop_83 = tmp_qloop_18*tmp_qloop_72 + tmp_qloop_33*tmp_qloop_82; + const real_t tmp_qloop_84 = -tmp_qloop_22*tmp_qloop_78; + const real_t tmp_qloop_85 = tmp_qloop_18*tmp_qloop_22; + const real_t tmp_qloop_86 = tmp_qloop_22*tmp_qloop_43; + const real_t tmp_qloop_87 = tmp_qloop_21*tmp_qloop_25*tmp_qloop_32 - tmp_qloop_40*tmp_qloop_69; + const real_t tmp_qloop_88 = tmp_qloop_18*tmp_qloop_43; + const real_t tmp_qloop_89 = tmp_qloop_42*tmp_qloop_76; + const real_t tmp_qloop_90 = -tmp_qloop_25*tmp_qloop_40 - tmp_qloop_28; + const real_t tmp_qloop_91 = tmp_qloop_25*tmp_qloop_89; + const real_t tmp_qloop_92 = tmp_qloop_25*tmp_qloop_72 - tmp_qloop_40*tmp_qloop_82; + const real_t tmp_qloop_93 = tmp_qloop_85*tmp_qloop_89; + const real_t tmp_qloop_94 = -tmp_qloop_31*tmp_qloop_68; + const real_t tmp_qloop_95 = tmp_qloop_55*tmp_qloop_76; + const real_t tmp_qloop_96 = tmp_qloop_22*tmp_qloop_68 + tmp_qloop_46*tmp_qloop_69; + const real_t tmp_qloop_97 = tmp_qloop_49*tmp_qloop_76; + const real_t tmp_qloop_98 = tmp_qloop_22*tmp_qloop_40 + tmp_qloop_46*tmp_qloop_79; + const real_t tmp_qloop_99 = tmp_qloop_25*tmp_qloop_97; + const real_t tmp_qloop_100 = tmp_qloop_18*tmp_qloop_99; + const real_t tmp_qloop_101 = tmp_qloop_22*tmp_qloop_72 + tmp_qloop_56*2.0 + tmp_qloop_74; + const real_t tmp_qloop_102 = tmp_qloop_58*tmp_qloop_76; + const real_t tmp_qloop_103 = (_data_q_p_0[q]*_data_q_p_0[q]); + const real_t tmp_qloop_104 = tmp_qloop_103*2.0; + const real_t tmp_qloop_105 = (_data_q_p_1[q]*_data_q_p_1[q]); + const real_t tmp_qloop_106 = tmp_qloop_105*2.0; + const real_t tmp_qloop_107 = (_data_q_p_2[q]*_data_q_p_2[q]); + const real_t tmp_qloop_108 = tmp_qloop_107*2.0; + const real_t tmp_qloop_109 = 4.0*_data_q_p_1[q]; + const real_t tmp_qloop_110 = tmp_qloop_109*_data_q_p_2[q]; + const real_t tmp_qloop_111 = 4.0*_data_q_p_0[q]; + const real_t tmp_qloop_112 = tmp_qloop_111*_data_q_p_1[q]; + const real_t tmp_qloop_113 = tmp_qloop_111*_data_q_p_2[q]; + const real_t tmp_qloop_114 = tmp_qloop_112 + tmp_qloop_113; + const real_t tmp_qloop_115 = tmp_qloop_104 + tmp_qloop_106 + tmp_qloop_108 + tmp_qloop_110 + tmp_qloop_114 - 3.0*_data_q_p_0[q] - 3.0*_data_q_p_1[q] - 3.0*_data_q_p_2[q] + 1.0; + const real_t tmp_qloop_116 = 4.0*_data_q_p_2[q]; + const real_t tmp_qloop_117 = tmp_qloop_109 + tmp_qloop_116; + const real_t tmp_qloop_118 = -rho_dof_8*tmp_qloop_109; + const real_t tmp_qloop_119 = rho_dof_0*(tmp_qloop_111 + tmp_qloop_117 - 3.0); + const real_t tmp_qloop_120 = -rho_dof_7*tmp_qloop_116 + tmp_qloop_119; + const real_t tmp_qloop_121 = rho_dof_1*(tmp_qloop_111 - 1.0) + rho_dof_5*tmp_qloop_116 + rho_dof_6*tmp_qloop_109 + rho_dof_9*(-tmp_qloop_117 - 8.0*_data_q_p_0[q] + 4.0) + tmp_qloop_118 + tmp_qloop_120; + const real_t tmp_qloop_122 = tmp_qloop_111 - 4.0; + const real_t tmp_qloop_123 = -rho_dof_9*tmp_qloop_111; + const real_t tmp_qloop_124 = rho_dof_2*(tmp_qloop_109 - 1.0) + rho_dof_4*tmp_qloop_116 + rho_dof_6*tmp_qloop_111 + rho_dof_8*(-tmp_qloop_116 - tmp_qloop_122 - 8.0*_data_q_p_1[q]) + tmp_qloop_120 + tmp_qloop_123; + const real_t tmp_qloop_125 = rho_dof_3*(tmp_qloop_116 - 1.0) + rho_dof_4*tmp_qloop_109 + rho_dof_5*tmp_qloop_111 + rho_dof_7*(-tmp_qloop_109 - tmp_qloop_122 - 8.0*_data_q_p_2[q]) + tmp_qloop_118 + tmp_qloop_119 + tmp_qloop_123; + const real_t tmp_qloop_127 = tmp_qloop_104 - _data_q_p_0[q]; + const real_t tmp_qloop_128 = tmp_qloop_106 - _data_q_p_1[q]; + const real_t tmp_qloop_129 = tmp_qloop_108 - _data_q_p_2[q]; + const real_t tmp_qloop_130 = tmp_qloop_107*-4.0 - tmp_qloop_110 - tmp_qloop_113 + tmp_qloop_116; + const real_t tmp_qloop_131 = tmp_qloop_105*4.0; + const real_t tmp_qloop_132 = tmp_qloop_109 - tmp_qloop_110 - tmp_qloop_112 - tmp_qloop_131; + const real_t tmp_qloop_133 = tmp_qloop_103*4.0; + const real_t tmp_qloop_134 = tmp_qloop_111 - tmp_qloop_114 - tmp_qloop_133; + const real_t tmp_qloop_158 = tmp_qloop_105*tmp_qloop_111; + const real_t jac_blending_0_0 = tmp_qloop_36*tmp_qloop_38; + const real_t jac_blending_0_1 = tmp_qloop_18*tmp_qloop_44; + const real_t jac_blending_0_2 = tmp_qloop_18*tmp_qloop_50; + const real_t jac_blending_1_0 = tmp_qloop_25*tmp_qloop_52; + const real_t jac_blending_1_1 = tmp_qloop_43*tmp_qloop_55; + const real_t tmp_qloop_63 = jac_blending_0_2*jac_blending_1_1; + const real_t jac_blending_1_2 = tmp_qloop_25*tmp_qloop_50; + const real_t tmp_qloop_60 = jac_blending_0_1*jac_blending_1_2; + const real_t jac_blending_2_0 = tmp_qloop_22*tmp_qloop_52; + const real_t jac_blending_2_1 = tmp_qloop_22*tmp_qloop_44; + const real_t tmp_qloop_61 = jac_blending_1_2*jac_blending_2_1; + const real_t jac_blending_2_2 = tmp_qloop_43*tmp_qloop_58; + const real_t tmp_qloop_59 = jac_blending_1_1*jac_blending_2_2; + const real_t tmp_qloop_62 = jac_blending_0_1*jac_blending_2_2; + const real_t tmp_qloop_64 = jac_blending_0_0*tmp_qloop_59 - jac_blending_0_0*tmp_qloop_61 + jac_blending_0_2*jac_blending_1_0*jac_blending_2_1 - jac_blending_1_0*tmp_qloop_62 + jac_blending_2_0*tmp_qloop_60 - jac_blending_2_0*tmp_qloop_63; + const real_t tmp_qloop_65 = 1.0 / (tmp_qloop_64); + const real_t abs_det_jac_blending = tmp_qloop_64; + const real_t tmp_qloop_135 = abs_det_jac_affine_WHITE_UP*abs_det_jac_blending*1.0 / (rho_dof_0*tmp_qloop_115 + rho_dof_1*tmp_qloop_127 + rho_dof_2*tmp_qloop_128 + rho_dof_3*tmp_qloop_129 + rho_dof_4*tmp_qloop_110 + rho_dof_5*tmp_qloop_113 + rho_dof_6*tmp_qloop_112 + rho_dof_7*tmp_qloop_130 + rho_dof_8*tmp_qloop_132 + rho_dof_9*tmp_qloop_134)*_data_q_w[q]; + const real_t tmp_qloop_136 = tmp_qloop_135*(1.0 - _data_q_p_0[q] - _data_q_p_1[q] - _data_q_p_2[q]); + const real_t tmp_qloop_142 = tmp_qloop_135*_data_q_p_0[q]; + const real_t tmp_qloop_144 = tmp_qloop_135*_data_q_p_2[q]; + const real_t tmp_qloop_147 = tmp_qloop_135*_data_q_p_1[q]; + const real_t tmp_qloop_159 = tmp_qloop_135*tmp_qloop_158; + const real_t tmp_qloop_161 = tmp_qloop_107*tmp_qloop_135; + const real_t jac_blending_inv_0_0 = tmp_qloop_65*(tmp_qloop_59 - tmp_qloop_61); + const real_t jac_blending_inv_0_1 = tmp_qloop_65*(jac_blending_0_2*jac_blending_2_1 - tmp_qloop_62); + const real_t jac_blending_inv_0_2 = tmp_qloop_65*(tmp_qloop_60 - tmp_qloop_63); + const real_t jac_blending_inv_1_0 = tmp_qloop_65*(-jac_blending_1_0*jac_blending_2_2 + jac_blending_1_2*jac_blending_2_0); + const real_t jac_blending_inv_1_1 = tmp_qloop_65*(jac_blending_0_0*jac_blending_2_2 - jac_blending_0_2*jac_blending_2_0); + const real_t jac_blending_inv_1_2 = tmp_qloop_65*(-jac_blending_0_0*jac_blending_1_2 + jac_blending_0_2*jac_blending_1_0); + const real_t jac_blending_inv_2_0 = tmp_qloop_65*(jac_blending_1_0*jac_blending_2_1 - jac_blending_1_1*jac_blending_2_0); + const real_t tmp_qloop_126 = tmp_qloop_121*(jac_affine_inv_0_0_WHITE_UP*jac_blending_inv_0_0 + jac_affine_inv_0_1_WHITE_UP*jac_blending_inv_1_0 + jac_affine_inv_0_2_WHITE_UP*jac_blending_inv_2_0) + tmp_qloop_124*(jac_affine_inv_1_0_WHITE_UP*jac_blending_inv_0_0 + jac_affine_inv_1_1_WHITE_UP*jac_blending_inv_1_0 + jac_affine_inv_1_2_WHITE_UP*jac_blending_inv_2_0) + tmp_qloop_125*(jac_affine_inv_2_0_WHITE_UP*jac_blending_inv_0_0 + jac_affine_inv_2_1_WHITE_UP*jac_blending_inv_1_0 + jac_affine_inv_2_2_WHITE_UP*jac_blending_inv_2_0); + const real_t tmp_qloop_137 = tmp_qloop_126*tmp_qloop_136; + const real_t tmp_qloop_143 = tmp_qloop_126*tmp_qloop_142; + const real_t tmp_qloop_145 = tmp_qloop_126*tmp_qloop_144; + const real_t tmp_qloop_146 = tmp_qloop_112*tmp_qloop_145; + const real_t tmp_qloop_148 = tmp_qloop_126*tmp_qloop_147; + const real_t tmp_qloop_157 = tmp_qloop_126*tmp_qloop_135; + const real_t tmp_qloop_160 = tmp_qloop_107*tmp_qloop_157; + const real_t jac_blending_inv_2_1 = tmp_qloop_65*(-jac_blending_0_0*jac_blending_2_1 + jac_blending_0_1*jac_blending_2_0); + const real_t tmp_qloop_138 = tmp_qloop_121*(jac_affine_inv_0_0_WHITE_UP*jac_blending_inv_0_1 + jac_affine_inv_0_1_WHITE_UP*jac_blending_inv_1_1 + jac_affine_inv_0_2_WHITE_UP*jac_blending_inv_2_1) + tmp_qloop_124*(jac_affine_inv_1_0_WHITE_UP*jac_blending_inv_0_1 + jac_affine_inv_1_1_WHITE_UP*jac_blending_inv_1_1 + jac_affine_inv_1_2_WHITE_UP*jac_blending_inv_2_1) + tmp_qloop_125*(jac_affine_inv_2_0_WHITE_UP*jac_blending_inv_0_1 + jac_affine_inv_2_1_WHITE_UP*jac_blending_inv_1_1 + jac_affine_inv_2_2_WHITE_UP*jac_blending_inv_2_1); + const real_t tmp_qloop_139 = tmp_qloop_136*tmp_qloop_138; + const real_t tmp_qloop_149 = tmp_qloop_138*tmp_qloop_142; + const real_t tmp_qloop_150 = tmp_qloop_138*tmp_qloop_144; + const real_t tmp_qloop_151 = tmp_qloop_112*tmp_qloop_150; + const real_t tmp_qloop_152 = tmp_qloop_138*tmp_qloop_147; + const real_t tmp_qloop_162 = tmp_qloop_138*tmp_qloop_161; + const real_t jac_blending_inv_2_2 = tmp_qloop_65*(jac_blending_0_0*jac_blending_1_1 - jac_blending_0_1*jac_blending_1_0); + const real_t tmp_qloop_140 = tmp_qloop_121*(jac_affine_inv_0_0_WHITE_UP*jac_blending_inv_0_2 + jac_affine_inv_0_1_WHITE_UP*jac_blending_inv_1_2 + jac_affine_inv_0_2_WHITE_UP*jac_blending_inv_2_2) + tmp_qloop_124*(jac_affine_inv_1_0_WHITE_UP*jac_blending_inv_0_2 + jac_affine_inv_1_1_WHITE_UP*jac_blending_inv_1_2 + jac_affine_inv_1_2_WHITE_UP*jac_blending_inv_2_2) + tmp_qloop_125*(jac_affine_inv_2_0_WHITE_UP*jac_blending_inv_0_2 + jac_affine_inv_2_1_WHITE_UP*jac_blending_inv_1_2 + jac_affine_inv_2_2_WHITE_UP*jac_blending_inv_2_2); + const real_t tmp_qloop_141 = tmp_qloop_136*tmp_qloop_140; + const real_t tmp_qloop_153 = tmp_qloop_140*tmp_qloop_142; + const real_t tmp_qloop_154 = tmp_qloop_140*tmp_qloop_144; + const real_t tmp_qloop_155 = tmp_qloop_112*tmp_qloop_154; + const real_t tmp_qloop_156 = tmp_qloop_140*tmp_qloop_147; + const real_t tmp_qloop_163 = tmp_qloop_140*tmp_qloop_161; + const real_t hessian_blending_0_0_0 = tmp_qloop_17*tmp_qloop_37*(tmp_qloop_19*tmp_qloop_68 - tmp_qloop_28*tmp_qloop_69 + tmp_qloop_31*(tmp_qloop_33 - tmp_qloop_68) + tmp_qloop_35*tmp_qloop_69)*1.0 - tmp_qloop_18*tmp_qloop_67; + const real_t hessian_blending_1_0_0 = tmp_qloop_17*tmp_qloop_37*(tmp_qloop_19*tmp_qloop_40 + tmp_qloop_25*tmp_qloop_71 + tmp_qloop_70)*1.0 - tmp_qloop_25*tmp_qloop_67; + const real_t hessian_blending_2_0_0 = tmp_qloop_17*tmp_qloop_37*(tmp_qloop_19*tmp_qloop_72 + tmp_qloop_22*tmp_qloop_71 + tmp_qloop_73)*1.0 - tmp_qloop_22*tmp_qloop_67; + const real_t hessian_blending_0_0_1 = tmp_qloop_17*tmp_qloop_25*tmp_qloop_37*tmp_qloop_75*1.0 - tmp_qloop_18*tmp_qloop_78; + const real_t hessian_blending_1_0_1 = -tmp_qloop_29*tmp_qloop_77 + tmp_qloop_52 + tmp_qloop_80*tmp_qloop_81; + const real_t hessian_blending_2_0_1 = tmp_qloop_81*tmp_qloop_83 + tmp_qloop_84; + const real_t hessian_blending_0_0_2 = tmp_qloop_17*tmp_qloop_22*tmp_qloop_37*tmp_qloop_75*1.0 - tmp_qloop_77*tmp_qloop_85; + const real_t hessian_blending_1_0_2 = tmp_qloop_80*tmp_qloop_86 + tmp_qloop_84; + const real_t hessian_blending_2_0_2 = -tmp_qloop_30*tmp_qloop_77 + tmp_qloop_52 + tmp_qloop_83*tmp_qloop_86; + const real_t hessian_blending_0_1_0 = -tmp_qloop_19*tmp_qloop_89 + tmp_qloop_44 + tmp_qloop_87*tmp_qloop_88; + const real_t hessian_blending_1_1_0 = tmp_qloop_17*tmp_qloop_18*tmp_qloop_37*tmp_qloop_90*1.0 - tmp_qloop_18*tmp_qloop_91; + const real_t hessian_blending_2_1_0 = tmp_qloop_88*tmp_qloop_92 - tmp_qloop_93; + const real_t hessian_blending_0_1_1 = tmp_qloop_17*tmp_qloop_37*(tmp_qloop_29*tmp_qloop_68 + tmp_qloop_54*tmp_qloop_69 + tmp_qloop_94)*1.0 - tmp_qloop_18*tmp_qloop_95; + const real_t hessian_blending_1_1_1 = tmp_qloop_17*tmp_qloop_37*(tmp_qloop_29*tmp_qloop_40 + tmp_qloop_31*(-tmp_qloop_40 + tmp_qloop_53) + tmp_qloop_47*tmp_qloop_79 + tmp_qloop_54*tmp_qloop_79)*1.0 - tmp_qloop_25*tmp_qloop_95; + const real_t hessian_blending_2_1_1 = tmp_qloop_17*tmp_qloop_37*(tmp_qloop_29*tmp_qloop_72 + tmp_qloop_54*tmp_qloop_82 + tmp_qloop_73)*1.0 - tmp_qloop_22*tmp_qloop_95; + const real_t hessian_blending_0_1_2 = tmp_qloop_17*tmp_qloop_22*tmp_qloop_37*tmp_qloop_87*1.0 - tmp_qloop_93; + const real_t hessian_blending_1_1_2 = tmp_qloop_17*tmp_qloop_22*tmp_qloop_37*tmp_qloop_90*1.0 - tmp_qloop_22*tmp_qloop_91; + const real_t hessian_blending_2_1_2 = -tmp_qloop_30*tmp_qloop_89 + tmp_qloop_44 + tmp_qloop_86*tmp_qloop_92; + const real_t hessian_blending_0_2_0 = -tmp_qloop_19*tmp_qloop_97 + tmp_qloop_50 + tmp_qloop_88*tmp_qloop_96; + const real_t hessian_blending_1_2_0 = -tmp_qloop_100 + tmp_qloop_17*tmp_qloop_18*tmp_qloop_37*tmp_qloop_98*1.0; + const real_t hessian_blending_2_2_0 = tmp_qloop_101*tmp_qloop_17*tmp_qloop_18*tmp_qloop_37*1.0 - tmp_qloop_85*tmp_qloop_97; + const real_t hessian_blending_0_2_1 = -tmp_qloop_100 + tmp_qloop_17*tmp_qloop_25*tmp_qloop_37*tmp_qloop_96*1.0; + const real_t hessian_blending_1_2_1 = -tmp_qloop_29*tmp_qloop_97 + tmp_qloop_50 + tmp_qloop_81*tmp_qloop_98; + const real_t hessian_blending_2_2_1 = tmp_qloop_101*tmp_qloop_17*tmp_qloop_25*tmp_qloop_37*1.0 - tmp_qloop_22*tmp_qloop_99; + const real_t hessian_blending_0_2_2 = -tmp_qloop_102*tmp_qloop_18 + tmp_qloop_17*tmp_qloop_37*(tmp_qloop_30*tmp_qloop_68 + tmp_qloop_57*tmp_qloop_69 + tmp_qloop_94)*1.0; + const real_t hessian_blending_1_2_2 = -tmp_qloop_102*tmp_qloop_25 + tmp_qloop_17*tmp_qloop_37*(tmp_qloop_30*tmp_qloop_40 + tmp_qloop_57*tmp_qloop_79 + tmp_qloop_70)*1.0; + const real_t hessian_blending_2_2_2 = -tmp_qloop_102*tmp_qloop_22 + tmp_qloop_17*tmp_qloop_37*(tmp_qloop_30*tmp_qloop_72 + tmp_qloop_31*(tmp_qloop_46 - tmp_qloop_72) + tmp_qloop_48*2.0 + tmp_qloop_57*tmp_qloop_82)*1.0; + const real_t q_tmp_0_0 = tmp_qloop_115*tmp_qloop_137; + const real_t q_tmp_0_1 = tmp_qloop_127*tmp_qloop_137; + const real_t q_tmp_0_2 = tmp_qloop_128*tmp_qloop_137; + const real_t q_tmp_0_3 = tmp_qloop_129*tmp_qloop_137; + const real_t q_tmp_0_4 = tmp_qloop_110*tmp_qloop_137; + const real_t q_tmp_0_5 = tmp_qloop_113*tmp_qloop_137; + const real_t q_tmp_0_6 = tmp_qloop_112*tmp_qloop_137; + const real_t q_tmp_0_7 = tmp_qloop_130*tmp_qloop_137; + const real_t q_tmp_0_8 = tmp_qloop_132*tmp_qloop_137; + const real_t q_tmp_0_9 = tmp_qloop_134*tmp_qloop_137; + const real_t q_tmp_0_10 = tmp_qloop_115*tmp_qloop_139; + const real_t q_tmp_0_11 = tmp_qloop_127*tmp_qloop_139; + const real_t q_tmp_0_12 = tmp_qloop_128*tmp_qloop_139; + const real_t q_tmp_0_13 = tmp_qloop_129*tmp_qloop_139; + const real_t q_tmp_0_14 = tmp_qloop_110*tmp_qloop_139; + const real_t q_tmp_0_15 = tmp_qloop_113*tmp_qloop_139; + const real_t q_tmp_0_16 = tmp_qloop_112*tmp_qloop_139; + const real_t q_tmp_0_17 = tmp_qloop_130*tmp_qloop_139; + const real_t q_tmp_0_18 = tmp_qloop_132*tmp_qloop_139; + const real_t q_tmp_0_19 = tmp_qloop_134*tmp_qloop_139; + const real_t q_tmp_0_20 = tmp_qloop_115*tmp_qloop_141; + const real_t q_tmp_0_21 = tmp_qloop_127*tmp_qloop_141; + const real_t q_tmp_0_22 = tmp_qloop_128*tmp_qloop_141; + const real_t q_tmp_0_23 = tmp_qloop_129*tmp_qloop_141; + const real_t q_tmp_0_24 = tmp_qloop_110*tmp_qloop_141; + const real_t q_tmp_0_25 = tmp_qloop_113*tmp_qloop_141; + const real_t q_tmp_0_26 = tmp_qloop_112*tmp_qloop_141; + const real_t q_tmp_0_27 = tmp_qloop_130*tmp_qloop_141; + const real_t q_tmp_0_28 = tmp_qloop_132*tmp_qloop_141; + const real_t q_tmp_0_29 = tmp_qloop_134*tmp_qloop_141; + const real_t q_tmp_1_0 = tmp_qloop_115*tmp_qloop_143; + const real_t q_tmp_1_1 = tmp_qloop_127*tmp_qloop_143; + const real_t q_tmp_1_2 = tmp_qloop_128*tmp_qloop_143; + const real_t q_tmp_1_3 = tmp_qloop_129*tmp_qloop_143; + const real_t q_tmp_1_4 = tmp_qloop_146; + const real_t q_tmp_1_5 = tmp_qloop_133*tmp_qloop_145; + const real_t q_tmp_1_6 = tmp_qloop_133*tmp_qloop_148; + const real_t q_tmp_1_7 = tmp_qloop_130*tmp_qloop_143; + const real_t q_tmp_1_8 = tmp_qloop_132*tmp_qloop_143; + const real_t q_tmp_1_9 = tmp_qloop_134*tmp_qloop_143; + const real_t q_tmp_1_10 = tmp_qloop_115*tmp_qloop_149; + const real_t q_tmp_1_11 = tmp_qloop_127*tmp_qloop_149; + const real_t q_tmp_1_12 = tmp_qloop_128*tmp_qloop_149; + const real_t q_tmp_1_13 = tmp_qloop_129*tmp_qloop_149; + const real_t q_tmp_1_14 = tmp_qloop_151; + const real_t q_tmp_1_15 = tmp_qloop_133*tmp_qloop_150; + const real_t q_tmp_1_16 = tmp_qloop_133*tmp_qloop_152; + const real_t q_tmp_1_17 = tmp_qloop_130*tmp_qloop_149; + const real_t q_tmp_1_18 = tmp_qloop_132*tmp_qloop_149; + const real_t q_tmp_1_19 = tmp_qloop_134*tmp_qloop_149; + const real_t q_tmp_1_20 = tmp_qloop_115*tmp_qloop_153; + const real_t q_tmp_1_21 = tmp_qloop_127*tmp_qloop_153; + const real_t q_tmp_1_22 = tmp_qloop_128*tmp_qloop_153; + const real_t q_tmp_1_23 = tmp_qloop_129*tmp_qloop_153; + const real_t q_tmp_1_24 = tmp_qloop_155; + const real_t q_tmp_1_25 = tmp_qloop_133*tmp_qloop_154; + const real_t q_tmp_1_26 = tmp_qloop_133*tmp_qloop_156; + const real_t q_tmp_1_27 = tmp_qloop_130*tmp_qloop_153; + const real_t q_tmp_1_28 = tmp_qloop_132*tmp_qloop_153; + const real_t q_tmp_1_29 = tmp_qloop_134*tmp_qloop_153; + const real_t q_tmp_2_0 = tmp_qloop_115*tmp_qloop_148; + const real_t q_tmp_2_1 = tmp_qloop_127*tmp_qloop_148; + const real_t q_tmp_2_2 = tmp_qloop_128*tmp_qloop_148; + const real_t q_tmp_2_3 = tmp_qloop_129*tmp_qloop_148; + const real_t q_tmp_2_4 = tmp_qloop_131*tmp_qloop_145; + const real_t q_tmp_2_5 = tmp_qloop_146; + const real_t q_tmp_2_6 = tmp_qloop_157*tmp_qloop_158; + const real_t q_tmp_2_7 = tmp_qloop_130*tmp_qloop_148; + const real_t q_tmp_2_8 = tmp_qloop_132*tmp_qloop_148; + const real_t q_tmp_2_9 = tmp_qloop_134*tmp_qloop_148; + const real_t q_tmp_2_10 = tmp_qloop_115*tmp_qloop_152; + const real_t q_tmp_2_11 = tmp_qloop_127*tmp_qloop_152; + const real_t q_tmp_2_12 = tmp_qloop_128*tmp_qloop_152; + const real_t q_tmp_2_13 = tmp_qloop_129*tmp_qloop_152; + const real_t q_tmp_2_14 = tmp_qloop_131*tmp_qloop_150; + const real_t q_tmp_2_15 = tmp_qloop_151; + const real_t q_tmp_2_16 = tmp_qloop_138*tmp_qloop_159; + const real_t q_tmp_2_17 = tmp_qloop_130*tmp_qloop_152; + const real_t q_tmp_2_18 = tmp_qloop_132*tmp_qloop_152; + const real_t q_tmp_2_19 = tmp_qloop_134*tmp_qloop_152; + const real_t q_tmp_2_20 = tmp_qloop_115*tmp_qloop_156; + const real_t q_tmp_2_21 = tmp_qloop_127*tmp_qloop_156; + const real_t q_tmp_2_22 = tmp_qloop_128*tmp_qloop_156; + const real_t q_tmp_2_23 = tmp_qloop_129*tmp_qloop_156; + const real_t q_tmp_2_24 = tmp_qloop_131*tmp_qloop_154; + const real_t q_tmp_2_25 = tmp_qloop_155; + const real_t q_tmp_2_26 = tmp_qloop_140*tmp_qloop_159; + const real_t q_tmp_2_27 = tmp_qloop_130*tmp_qloop_156; + const real_t q_tmp_2_28 = tmp_qloop_132*tmp_qloop_156; + const real_t q_tmp_2_29 = tmp_qloop_134*tmp_qloop_156; + const real_t q_tmp_3_0 = tmp_qloop_115*tmp_qloop_145; + const real_t q_tmp_3_1 = tmp_qloop_127*tmp_qloop_145; + const real_t q_tmp_3_2 = tmp_qloop_128*tmp_qloop_145; + const real_t q_tmp_3_3 = tmp_qloop_129*tmp_qloop_145; + const real_t q_tmp_3_4 = tmp_qloop_109*tmp_qloop_160; + const real_t q_tmp_3_5 = tmp_qloop_111*tmp_qloop_160; + const real_t q_tmp_3_6 = tmp_qloop_146; + const real_t q_tmp_3_7 = tmp_qloop_130*tmp_qloop_145; + const real_t q_tmp_3_8 = tmp_qloop_132*tmp_qloop_145; + const real_t q_tmp_3_9 = tmp_qloop_134*tmp_qloop_145; + const real_t q_tmp_3_10 = tmp_qloop_115*tmp_qloop_150; + const real_t q_tmp_3_11 = tmp_qloop_127*tmp_qloop_150; + const real_t q_tmp_3_12 = tmp_qloop_128*tmp_qloop_150; + const real_t q_tmp_3_13 = tmp_qloop_129*tmp_qloop_150; + const real_t q_tmp_3_14 = tmp_qloop_109*tmp_qloop_162; + const real_t q_tmp_3_15 = tmp_qloop_111*tmp_qloop_162; + const real_t q_tmp_3_16 = tmp_qloop_151; + const real_t q_tmp_3_17 = tmp_qloop_130*tmp_qloop_150; + const real_t q_tmp_3_18 = tmp_qloop_132*tmp_qloop_150; + const real_t q_tmp_3_19 = tmp_qloop_134*tmp_qloop_150; + const real_t q_tmp_3_20 = tmp_qloop_115*tmp_qloop_154; + const real_t q_tmp_3_21 = tmp_qloop_127*tmp_qloop_154; + const real_t q_tmp_3_22 = tmp_qloop_128*tmp_qloop_154; + const real_t q_tmp_3_23 = tmp_qloop_129*tmp_qloop_154; + const real_t q_tmp_3_24 = tmp_qloop_109*tmp_qloop_163; + const real_t q_tmp_3_25 = tmp_qloop_111*tmp_qloop_163; + const real_t q_tmp_3_26 = tmp_qloop_155; + const real_t q_tmp_3_27 = tmp_qloop_130*tmp_qloop_154; + const real_t q_tmp_3_28 = tmp_qloop_132*tmp_qloop_154; + const real_t q_tmp_3_29 = tmp_qloop_134*tmp_qloop_154; + q_acc_0_0 = q_acc_0_0 + q_tmp_0_0; + q_acc_0_1 = q_acc_0_1 + q_tmp_0_1; + q_acc_0_2 = q_acc_0_2 + q_tmp_0_2; + q_acc_0_3 = q_acc_0_3 + q_tmp_0_3; + q_acc_0_4 = q_acc_0_4 + q_tmp_0_4; + q_acc_0_5 = q_acc_0_5 + q_tmp_0_5; + q_acc_0_6 = q_acc_0_6 + q_tmp_0_6; + q_acc_0_7 = q_acc_0_7 + q_tmp_0_7; + q_acc_0_8 = q_acc_0_8 + q_tmp_0_8; + q_acc_0_9 = q_acc_0_9 + q_tmp_0_9; + q_acc_0_10 = q_acc_0_10 + q_tmp_0_10; + q_acc_0_11 = q_acc_0_11 + q_tmp_0_11; + q_acc_0_12 = q_acc_0_12 + q_tmp_0_12; + q_acc_0_13 = q_acc_0_13 + q_tmp_0_13; + q_acc_0_14 = q_acc_0_14 + q_tmp_0_14; + q_acc_0_15 = q_acc_0_15 + q_tmp_0_15; + q_acc_0_16 = q_acc_0_16 + q_tmp_0_16; + q_acc_0_17 = q_acc_0_17 + q_tmp_0_17; + q_acc_0_18 = q_acc_0_18 + q_tmp_0_18; + q_acc_0_19 = q_acc_0_19 + q_tmp_0_19; + q_acc_0_20 = q_acc_0_20 + q_tmp_0_20; + q_acc_0_21 = q_acc_0_21 + q_tmp_0_21; + q_acc_0_22 = q_acc_0_22 + q_tmp_0_22; + q_acc_0_23 = q_acc_0_23 + q_tmp_0_23; + q_acc_0_24 = q_acc_0_24 + q_tmp_0_24; + q_acc_0_25 = q_acc_0_25 + q_tmp_0_25; + q_acc_0_26 = q_acc_0_26 + q_tmp_0_26; + q_acc_0_27 = q_acc_0_27 + q_tmp_0_27; + q_acc_0_28 = q_acc_0_28 + q_tmp_0_28; + q_acc_0_29 = q_acc_0_29 + q_tmp_0_29; + q_acc_1_0 = q_acc_1_0 + q_tmp_1_0; + q_acc_1_1 = q_acc_1_1 + q_tmp_1_1; + q_acc_1_2 = q_acc_1_2 + q_tmp_1_2; + q_acc_1_3 = q_acc_1_3 + q_tmp_1_3; + q_acc_1_4 = q_acc_1_4 + q_tmp_1_4; + q_acc_1_5 = q_acc_1_5 + q_tmp_1_5; + q_acc_1_6 = q_acc_1_6 + q_tmp_1_6; + q_acc_1_7 = q_acc_1_7 + q_tmp_1_7; + q_acc_1_8 = q_acc_1_8 + q_tmp_1_8; + q_acc_1_9 = q_acc_1_9 + q_tmp_1_9; + q_acc_1_10 = q_acc_1_10 + q_tmp_1_10; + q_acc_1_11 = q_acc_1_11 + q_tmp_1_11; + q_acc_1_12 = q_acc_1_12 + q_tmp_1_12; + q_acc_1_13 = q_acc_1_13 + q_tmp_1_13; + q_acc_1_14 = q_acc_1_14 + q_tmp_1_14; + q_acc_1_15 = q_acc_1_15 + q_tmp_1_15; + q_acc_1_16 = q_acc_1_16 + q_tmp_1_16; + q_acc_1_17 = q_acc_1_17 + q_tmp_1_17; + q_acc_1_18 = q_acc_1_18 + q_tmp_1_18; + q_acc_1_19 = q_acc_1_19 + q_tmp_1_19; + q_acc_1_20 = q_acc_1_20 + q_tmp_1_20; + q_acc_1_21 = q_acc_1_21 + q_tmp_1_21; + q_acc_1_22 = q_acc_1_22 + q_tmp_1_22; + q_acc_1_23 = q_acc_1_23 + q_tmp_1_23; + q_acc_1_24 = q_acc_1_24 + q_tmp_1_24; + q_acc_1_25 = q_acc_1_25 + q_tmp_1_25; + q_acc_1_26 = q_acc_1_26 + q_tmp_1_26; + q_acc_1_27 = q_acc_1_27 + q_tmp_1_27; + q_acc_1_28 = q_acc_1_28 + q_tmp_1_28; + q_acc_1_29 = q_acc_1_29 + q_tmp_1_29; + q_acc_2_0 = q_acc_2_0 + q_tmp_2_0; + q_acc_2_1 = q_acc_2_1 + q_tmp_2_1; + q_acc_2_2 = q_acc_2_2 + q_tmp_2_2; + q_acc_2_3 = q_acc_2_3 + q_tmp_2_3; + q_acc_2_4 = q_acc_2_4 + q_tmp_2_4; + q_acc_2_5 = q_acc_2_5 + q_tmp_2_5; + q_acc_2_6 = q_acc_2_6 + q_tmp_2_6; + q_acc_2_7 = q_acc_2_7 + q_tmp_2_7; + q_acc_2_8 = q_acc_2_8 + q_tmp_2_8; + q_acc_2_9 = q_acc_2_9 + q_tmp_2_9; + q_acc_2_10 = q_acc_2_10 + q_tmp_2_10; + q_acc_2_11 = q_acc_2_11 + q_tmp_2_11; + q_acc_2_12 = q_acc_2_12 + q_tmp_2_12; + q_acc_2_13 = q_acc_2_13 + q_tmp_2_13; + q_acc_2_14 = q_acc_2_14 + q_tmp_2_14; + q_acc_2_15 = q_acc_2_15 + q_tmp_2_15; + q_acc_2_16 = q_acc_2_16 + q_tmp_2_16; + q_acc_2_17 = q_acc_2_17 + q_tmp_2_17; + q_acc_2_18 = q_acc_2_18 + q_tmp_2_18; + q_acc_2_19 = q_acc_2_19 + q_tmp_2_19; + q_acc_2_20 = q_acc_2_20 + q_tmp_2_20; + q_acc_2_21 = q_acc_2_21 + q_tmp_2_21; + q_acc_2_22 = q_acc_2_22 + q_tmp_2_22; + q_acc_2_23 = q_acc_2_23 + q_tmp_2_23; + q_acc_2_24 = q_acc_2_24 + q_tmp_2_24; + q_acc_2_25 = q_acc_2_25 + q_tmp_2_25; + q_acc_2_26 = q_acc_2_26 + q_tmp_2_26; + q_acc_2_27 = q_acc_2_27 + q_tmp_2_27; + q_acc_2_28 = q_acc_2_28 + q_tmp_2_28; + q_acc_2_29 = q_acc_2_29 + q_tmp_2_29; + q_acc_3_0 = q_acc_3_0 + q_tmp_3_0; + q_acc_3_1 = q_acc_3_1 + q_tmp_3_1; + q_acc_3_2 = q_acc_3_2 + q_tmp_3_2; + q_acc_3_3 = q_acc_3_3 + q_tmp_3_3; + q_acc_3_4 = q_acc_3_4 + q_tmp_3_4; + q_acc_3_5 = q_acc_3_5 + q_tmp_3_5; + q_acc_3_6 = q_acc_3_6 + q_tmp_3_6; + q_acc_3_7 = q_acc_3_7 + q_tmp_3_7; + q_acc_3_8 = q_acc_3_8 + q_tmp_3_8; + q_acc_3_9 = q_acc_3_9 + q_tmp_3_9; + q_acc_3_10 = q_acc_3_10 + q_tmp_3_10; + q_acc_3_11 = q_acc_3_11 + q_tmp_3_11; + q_acc_3_12 = q_acc_3_12 + q_tmp_3_12; + q_acc_3_13 = q_acc_3_13 + q_tmp_3_13; + q_acc_3_14 = q_acc_3_14 + q_tmp_3_14; + q_acc_3_15 = q_acc_3_15 + q_tmp_3_15; + q_acc_3_16 = q_acc_3_16 + q_tmp_3_16; + q_acc_3_17 = q_acc_3_17 + q_tmp_3_17; + q_acc_3_18 = q_acc_3_18 + q_tmp_3_18; + q_acc_3_19 = q_acc_3_19 + q_tmp_3_19; + q_acc_3_20 = q_acc_3_20 + q_tmp_3_20; + q_acc_3_21 = q_acc_3_21 + q_tmp_3_21; + q_acc_3_22 = q_acc_3_22 + q_tmp_3_22; + q_acc_3_23 = q_acc_3_23 + q_tmp_3_23; + q_acc_3_24 = q_acc_3_24 + q_tmp_3_24; + q_acc_3_25 = q_acc_3_25 + q_tmp_3_25; + q_acc_3_26 = q_acc_3_26 + q_tmp_3_26; + q_acc_3_27 = q_acc_3_27 + q_tmp_3_27; + q_acc_3_28 = q_acc_3_28 + q_tmp_3_28; + q_acc_3_29 = q_acc_3_29 + q_tmp_3_29; + } + const real_t elMatVec_0 = q_acc_0_0*src_dof_0 + q_acc_0_1*src_dof_1 + q_acc_0_10*src_dof_10 + q_acc_0_11*src_dof_11 + q_acc_0_12*src_dof_12 + q_acc_0_13*src_dof_13 + q_acc_0_14*src_dof_14 + q_acc_0_15*src_dof_15 + q_acc_0_16*src_dof_16 + q_acc_0_17*src_dof_17 + q_acc_0_18*src_dof_18 + q_acc_0_19*src_dof_19 + q_acc_0_2*src_dof_2 + q_acc_0_20*src_dof_20 + q_acc_0_21*src_dof_21 + q_acc_0_22*src_dof_22 + q_acc_0_23*src_dof_23 + q_acc_0_24*src_dof_24 + q_acc_0_25*src_dof_25 + q_acc_0_26*src_dof_26 + q_acc_0_27*src_dof_27 + q_acc_0_28*src_dof_28 + q_acc_0_29*src_dof_29 + q_acc_0_3*src_dof_3 + q_acc_0_4*src_dof_4 + q_acc_0_5*src_dof_5 + q_acc_0_6*src_dof_6 + q_acc_0_7*src_dof_7 + q_acc_0_8*src_dof_8 + q_acc_0_9*src_dof_9; + const real_t elMatVec_1 = q_acc_1_0*src_dof_0 + q_acc_1_1*src_dof_1 + q_acc_1_10*src_dof_10 + q_acc_1_11*src_dof_11 + q_acc_1_12*src_dof_12 + q_acc_1_13*src_dof_13 + q_acc_1_14*src_dof_14 + q_acc_1_15*src_dof_15 + q_acc_1_16*src_dof_16 + q_acc_1_17*src_dof_17 + q_acc_1_18*src_dof_18 + q_acc_1_19*src_dof_19 + q_acc_1_2*src_dof_2 + q_acc_1_20*src_dof_20 + q_acc_1_21*src_dof_21 + q_acc_1_22*src_dof_22 + q_acc_1_23*src_dof_23 + q_acc_1_24*src_dof_24 + q_acc_1_25*src_dof_25 + q_acc_1_26*src_dof_26 + q_acc_1_27*src_dof_27 + q_acc_1_28*src_dof_28 + q_acc_1_29*src_dof_29 + q_acc_1_3*src_dof_3 + q_acc_1_4*src_dof_4 + q_acc_1_5*src_dof_5 + q_acc_1_6*src_dof_6 + q_acc_1_7*src_dof_7 + q_acc_1_8*src_dof_8 + q_acc_1_9*src_dof_9; + const real_t elMatVec_2 = q_acc_2_0*src_dof_0 + q_acc_2_1*src_dof_1 + q_acc_2_10*src_dof_10 + q_acc_2_11*src_dof_11 + q_acc_2_12*src_dof_12 + q_acc_2_13*src_dof_13 + q_acc_2_14*src_dof_14 + q_acc_2_15*src_dof_15 + q_acc_2_16*src_dof_16 + q_acc_2_17*src_dof_17 + q_acc_2_18*src_dof_18 + q_acc_2_19*src_dof_19 + q_acc_2_2*src_dof_2 + q_acc_2_20*src_dof_20 + q_acc_2_21*src_dof_21 + q_acc_2_22*src_dof_22 + q_acc_2_23*src_dof_23 + q_acc_2_24*src_dof_24 + q_acc_2_25*src_dof_25 + q_acc_2_26*src_dof_26 + q_acc_2_27*src_dof_27 + q_acc_2_28*src_dof_28 + q_acc_2_29*src_dof_29 + q_acc_2_3*src_dof_3 + q_acc_2_4*src_dof_4 + q_acc_2_5*src_dof_5 + q_acc_2_6*src_dof_6 + q_acc_2_7*src_dof_7 + q_acc_2_8*src_dof_8 + q_acc_2_9*src_dof_9; + const real_t elMatVec_3 = q_acc_3_0*src_dof_0 + q_acc_3_1*src_dof_1 + q_acc_3_10*src_dof_10 + q_acc_3_11*src_dof_11 + q_acc_3_12*src_dof_12 + q_acc_3_13*src_dof_13 + q_acc_3_14*src_dof_14 + q_acc_3_15*src_dof_15 + q_acc_3_16*src_dof_16 + q_acc_3_17*src_dof_17 + q_acc_3_18*src_dof_18 + q_acc_3_19*src_dof_19 + q_acc_3_2*src_dof_2 + q_acc_3_20*src_dof_20 + q_acc_3_21*src_dof_21 + q_acc_3_22*src_dof_22 + q_acc_3_23*src_dof_23 + q_acc_3_24*src_dof_24 + q_acc_3_25*src_dof_25 + q_acc_3_26*src_dof_26 + q_acc_3_27*src_dof_27 + q_acc_3_28*src_dof_28 + q_acc_3_29*src_dof_29 + q_acc_3_3*src_dof_3 + q_acc_3_4*src_dof_4 + q_acc_3_5*src_dof_5 + q_acc_3_6*src_dof_6 + q_acc_3_7*src_dof_7 + q_acc_3_8*src_dof_8 + q_acc_3_9*src_dof_9; + _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))] = elMatVec_0 + _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]; + _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1] = elMatVec_1 + _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]; + _data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))] = elMatVec_2 + _data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]; + _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))] = elMatVec_3 + _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]; + } + } + } + const real_t tmp_coords_jac_0_WHITE_DOWN = 1.0 / (micro_edges_per_macro_edge_float)*1.0; + const real_t tmp_coords_jac_1_WHITE_DOWN = tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0); + const real_t tmp_coords_jac_2_WHITE_DOWN = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0); + const real_t tmp_coords_jac_3_WHITE_DOWN = tmp_coords_jac_1_WHITE_DOWN + tmp_coords_jac_2_WHITE_DOWN; + const real_t tmp_coords_jac_4_WHITE_DOWN = tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1); + const real_t tmp_coords_jac_5_WHITE_DOWN = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1); + const real_t tmp_coords_jac_6_WHITE_DOWN = tmp_coords_jac_4_WHITE_DOWN + tmp_coords_jac_5_WHITE_DOWN; + const real_t tmp_coords_jac_7_WHITE_DOWN = tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2); + const real_t tmp_coords_jac_8_WHITE_DOWN = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2); + const real_t tmp_coords_jac_9_WHITE_DOWN = tmp_coords_jac_7_WHITE_DOWN + tmp_coords_jac_8_WHITE_DOWN; + const real_t tmp_coords_jac_10_WHITE_DOWN = tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0); + const real_t tmp_coords_jac_11_WHITE_DOWN = tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1); + const real_t tmp_coords_jac_12_WHITE_DOWN = tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2); + const real_t p_affine_const_0_0_WHITE_DOWN = tmp_coords_jac_3_WHITE_DOWN; + const real_t p_affine_const_0_1_WHITE_DOWN = tmp_coords_jac_6_WHITE_DOWN; + const real_t p_affine_const_0_2_WHITE_DOWN = tmp_coords_jac_9_WHITE_DOWN; + const real_t p_affine_const_1_0_WHITE_DOWN = tmp_coords_jac_10_WHITE_DOWN + tmp_coords_jac_2_WHITE_DOWN; + const real_t p_affine_const_1_1_WHITE_DOWN = tmp_coords_jac_11_WHITE_DOWN + tmp_coords_jac_5_WHITE_DOWN; + const real_t p_affine_const_1_2_WHITE_DOWN = tmp_coords_jac_12_WHITE_DOWN + tmp_coords_jac_8_WHITE_DOWN; + const real_t p_affine_const_2_0_WHITE_DOWN = macro_vertex_coord_id_0comp0 + tmp_coords_jac_10_WHITE_DOWN + tmp_coords_jac_1_WHITE_DOWN; + const real_t p_affine_const_2_1_WHITE_DOWN = macro_vertex_coord_id_0comp1 + tmp_coords_jac_11_WHITE_DOWN + tmp_coords_jac_4_WHITE_DOWN; + const real_t p_affine_const_2_2_WHITE_DOWN = macro_vertex_coord_id_0comp2 + tmp_coords_jac_12_WHITE_DOWN + tmp_coords_jac_7_WHITE_DOWN; + const real_t p_affine_const_3_0_WHITE_DOWN = tmp_coords_jac_10_WHITE_DOWN + tmp_coords_jac_3_WHITE_DOWN; + const real_t p_affine_const_3_1_WHITE_DOWN = tmp_coords_jac_11_WHITE_DOWN + tmp_coords_jac_6_WHITE_DOWN; + const real_t p_affine_const_3_2_WHITE_DOWN = tmp_coords_jac_12_WHITE_DOWN + tmp_coords_jac_9_WHITE_DOWN; + const real_t jac_affine_0_0_WHITE_DOWN = -p_affine_const_0_0_WHITE_DOWN + p_affine_const_1_0_WHITE_DOWN; + const real_t jac_affine_0_1_WHITE_DOWN = -p_affine_const_0_0_WHITE_DOWN + p_affine_const_2_0_WHITE_DOWN; + const real_t jac_affine_0_2_WHITE_DOWN = -p_affine_const_0_0_WHITE_DOWN + p_affine_const_3_0_WHITE_DOWN; + const real_t jac_affine_1_0_WHITE_DOWN = -p_affine_const_0_1_WHITE_DOWN + p_affine_const_1_1_WHITE_DOWN; + const real_t jac_affine_1_1_WHITE_DOWN = -p_affine_const_0_1_WHITE_DOWN + p_affine_const_2_1_WHITE_DOWN; + const real_t tmp_coords_jac_17_WHITE_DOWN = jac_affine_0_2_WHITE_DOWN*jac_affine_1_1_WHITE_DOWN; + const real_t jac_affine_1_2_WHITE_DOWN = -p_affine_const_0_1_WHITE_DOWN + p_affine_const_3_1_WHITE_DOWN; + const real_t tmp_coords_jac_15_WHITE_DOWN = jac_affine_0_1_WHITE_DOWN*jac_affine_1_2_WHITE_DOWN; + const real_t jac_affine_2_0_WHITE_DOWN = -p_affine_const_0_2_WHITE_DOWN + p_affine_const_1_2_WHITE_DOWN; + const real_t jac_affine_2_1_WHITE_DOWN = -p_affine_const_0_2_WHITE_DOWN + p_affine_const_2_2_WHITE_DOWN; + const real_t tmp_coords_jac_14_WHITE_DOWN = jac_affine_1_2_WHITE_DOWN*jac_affine_2_1_WHITE_DOWN; + const real_t jac_affine_2_2_WHITE_DOWN = -p_affine_const_0_2_WHITE_DOWN + p_affine_const_3_2_WHITE_DOWN; + const real_t tmp_coords_jac_13_WHITE_DOWN = jac_affine_1_1_WHITE_DOWN*jac_affine_2_2_WHITE_DOWN; + const real_t tmp_coords_jac_16_WHITE_DOWN = jac_affine_0_1_WHITE_DOWN*jac_affine_2_2_WHITE_DOWN; + const real_t tmp_coords_jac_18_WHITE_DOWN = jac_affine_0_0_WHITE_DOWN*tmp_coords_jac_13_WHITE_DOWN - jac_affine_0_0_WHITE_DOWN*tmp_coords_jac_14_WHITE_DOWN + jac_affine_0_2_WHITE_DOWN*jac_affine_1_0_WHITE_DOWN*jac_affine_2_1_WHITE_DOWN - jac_affine_1_0_WHITE_DOWN*tmp_coords_jac_16_WHITE_DOWN + jac_affine_2_0_WHITE_DOWN*tmp_coords_jac_15_WHITE_DOWN - jac_affine_2_0_WHITE_DOWN*tmp_coords_jac_17_WHITE_DOWN; + const real_t tmp_coords_jac_19_WHITE_DOWN = 1.0 / (tmp_coords_jac_18_WHITE_DOWN); + const real_t jac_affine_inv_0_0_WHITE_DOWN = tmp_coords_jac_19_WHITE_DOWN*(tmp_coords_jac_13_WHITE_DOWN - tmp_coords_jac_14_WHITE_DOWN); + const real_t jac_affine_inv_0_1_WHITE_DOWN = tmp_coords_jac_19_WHITE_DOWN*(jac_affine_0_2_WHITE_DOWN*jac_affine_2_1_WHITE_DOWN - tmp_coords_jac_16_WHITE_DOWN); + const real_t jac_affine_inv_0_2_WHITE_DOWN = tmp_coords_jac_19_WHITE_DOWN*(tmp_coords_jac_15_WHITE_DOWN - tmp_coords_jac_17_WHITE_DOWN); + const real_t jac_affine_inv_1_0_WHITE_DOWN = tmp_coords_jac_19_WHITE_DOWN*(-jac_affine_1_0_WHITE_DOWN*jac_affine_2_2_WHITE_DOWN + jac_affine_1_2_WHITE_DOWN*jac_affine_2_0_WHITE_DOWN); + const real_t jac_affine_inv_1_1_WHITE_DOWN = tmp_coords_jac_19_WHITE_DOWN*(jac_affine_0_0_WHITE_DOWN*jac_affine_2_2_WHITE_DOWN - jac_affine_0_2_WHITE_DOWN*jac_affine_2_0_WHITE_DOWN); + const real_t jac_affine_inv_1_2_WHITE_DOWN = tmp_coords_jac_19_WHITE_DOWN*(-jac_affine_0_0_WHITE_DOWN*jac_affine_1_2_WHITE_DOWN + jac_affine_0_2_WHITE_DOWN*jac_affine_1_0_WHITE_DOWN); + const real_t jac_affine_inv_2_0_WHITE_DOWN = tmp_coords_jac_19_WHITE_DOWN*(jac_affine_1_0_WHITE_DOWN*jac_affine_2_1_WHITE_DOWN - jac_affine_1_1_WHITE_DOWN*jac_affine_2_0_WHITE_DOWN); + const real_t jac_affine_inv_2_1_WHITE_DOWN = tmp_coords_jac_19_WHITE_DOWN*(-jac_affine_0_0_WHITE_DOWN*jac_affine_2_1_WHITE_DOWN + jac_affine_0_1_WHITE_DOWN*jac_affine_2_0_WHITE_DOWN); + const real_t jac_affine_inv_2_2_WHITE_DOWN = tmp_coords_jac_19_WHITE_DOWN*(jac_affine_0_0_WHITE_DOWN*jac_affine_1_1_WHITE_DOWN - jac_affine_0_1_WHITE_DOWN*jac_affine_1_0_WHITE_DOWN); + const real_t abs_det_jac_affine_WHITE_DOWN = abs(tmp_coords_jac_18_WHITE_DOWN); + { + /* CellType.WHITE_DOWN */ + for (int64_t ctr_2 = 0; ctr_2 < micro_edges_per_macro_edge; ctr_2 += 1) + for (int64_t ctr_1 = 0; ctr_1 < -ctr_2 + micro_edges_per_macro_edge; ctr_1 += 1) + { + for (int64_t ctr_0 = 0; ctr_0 < (int64_t)((-ctr_1 - ctr_2 + micro_edges_per_macro_edge - 2) / (4)) * (4); ctr_0 += 4) + { + + const int64_t phantom_ctr_0 = ctr_0; + real_t _data_float_loop_ctr_array_dim_0[4]; + _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0; + _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1; + _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2; + _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3; + real_t _data_float_loop_ctr_array_dim_1[4]; + _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1; + real_t _data_float_loop_ctr_array_dim_2[4]; + _data_float_loop_ctr_array_dim_2[0] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[1] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[2] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[3] = (real_t) ctr_2; + + const __m256d p_affine_0_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)); + const __m256d p_affine_0_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)); + const __m256d p_affine_0_2 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)); + const __m256d p_affine_1_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)); + const __m256d p_affine_1_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)); + const __m256d p_affine_1_2 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)); + const __m256d p_affine_2_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)); + const __m256d p_affine_2_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)); + const __m256d p_affine_2_2 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)); + const __m256d p_affine_3_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)); + const __m256d p_affine_3_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)); + const __m256d p_affine_3_2 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)); + const __m256d src_dof_0 = _mm256_loadu_pd(& _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]); + const __m256d src_dof_1 = _mm256_loadu_pd(& _data_src_vertex_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]); + const __m256d src_dof_2 = _mm256_loadu_pd(& _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]); + const __m256d src_dof_3 = _mm256_loadu_pd(& _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]); + const __m256d src_dof_4 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]); + const __m256d src_dof_5 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6)) + 1]); + const __m256d src_dof_6 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]); + const __m256d src_dof_7 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]); + const __m256d src_dof_8 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 5*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d src_dof_9 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]); + const __m256d src_dof_10 = _mm256_loadu_pd(& _data_src_vertex_1[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]); + const __m256d src_dof_11 = _mm256_loadu_pd(& _data_src_vertex_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]); + const __m256d src_dof_12 = _mm256_loadu_pd(& _data_src_vertex_1[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]); + const __m256d src_dof_13 = _mm256_loadu_pd(& _data_src_vertex_1[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]); + const __m256d src_dof_14 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]); + const __m256d src_dof_15 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6)) + 1]); + const __m256d src_dof_16 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]); + const __m256d src_dof_17 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]); + const __m256d src_dof_18 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 5*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d src_dof_19 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]); + const __m256d src_dof_20 = _mm256_loadu_pd(& _data_src_vertex_2[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]); + const __m256d src_dof_21 = _mm256_loadu_pd(& _data_src_vertex_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]); + const __m256d src_dof_22 = _mm256_loadu_pd(& _data_src_vertex_2[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]); + const __m256d src_dof_23 = _mm256_loadu_pd(& _data_src_vertex_2[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]); + const __m256d src_dof_24 = _mm256_loadu_pd(& _data_src_edge_2[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]); + const __m256d src_dof_25 = _mm256_loadu_pd(& _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6)) + 1]); + const __m256d src_dof_26 = _mm256_loadu_pd(& _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]); + const __m256d src_dof_27 = _mm256_loadu_pd(& _data_src_edge_2[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]); + const __m256d src_dof_28 = _mm256_loadu_pd(& _data_src_edge_2[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 5*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d src_dof_29 = _mm256_loadu_pd(& _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]); + const __m256d rho_dof_0 = _mm256_loadu_pd(& _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]); + const __m256d rho_dof_1 = _mm256_loadu_pd(& _data_rhoVertex[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]); + const __m256d rho_dof_2 = _mm256_loadu_pd(& _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]); + const __m256d rho_dof_3 = _mm256_loadu_pd(& _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]); + const __m256d rho_dof_4 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]); + const __m256d rho_dof_5 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6)) + 1]); + const __m256d rho_dof_6 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]); + const __m256d rho_dof_7 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]); + const __m256d rho_dof_8 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 5*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d rho_dof_9 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]); + __m256d q_acc_0_0 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_1 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_2 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_3 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_4 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_5 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_6 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_7 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_8 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_9 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_10 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_11 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_12 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_13 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_14 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_15 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_16 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_17 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_18 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_19 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_20 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_21 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_22 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_23 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_24 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_25 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_26 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_27 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_28 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_29 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_0 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_1 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_2 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_3 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_4 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_5 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_6 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_7 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_8 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_9 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_10 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_11 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_12 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_13 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_14 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_15 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_16 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_17 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_18 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_19 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_20 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_21 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_22 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_23 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_24 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_25 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_26 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_27 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_28 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_29 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_0 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_1 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_2 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_3 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_4 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_5 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_6 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_7 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_8 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_9 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_10 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_11 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_12 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_13 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_14 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_15 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_16 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_17 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_18 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_19 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_20 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_21 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_22 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_23 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_24 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_25 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_26 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_27 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_28 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_29 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_0 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_1 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_2 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_3 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_4 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_5 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_6 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_7 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_8 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_9 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_10 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_11 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_12 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_13 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_14 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_15 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_16 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_17 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_18 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_19 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_20 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_21 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_22 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_23 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_24 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_25 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_26 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_27 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_28 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_29 = _mm256_set_pd(0.0,0.0,0.0,0.0); + for (int64_t q = 0; q < 5; q += 1) + { + const __m256d tmp_qloop_18 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_1_0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_2_0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_3_0),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]))),p_affine_0_0); + const __m256d tmp_qloop_19 = _mm256_mul_pd(tmp_qloop_18,tmp_qloop_18); + const __m256d tmp_qloop_22 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_2,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_1_2),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_2,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_2_2),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_2,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_3_2),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]))),p_affine_0_2); + const __m256d tmp_qloop_23 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_qloop_12,tmp_qloop_12,tmp_qloop_12,tmp_qloop_12)),_mm256_mul_pd(tmp_qloop_22,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))); + const __m256d tmp_qloop_24 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_qloop_1,tmp_qloop_1,tmp_qloop_1,tmp_qloop_1)),_mm256_mul_pd(tmp_qloop_18,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))); + const __m256d tmp_qloop_25 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_1_1),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_2_1),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_3_1),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]))),p_affine_0_1); + const __m256d tmp_qloop_26 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_qloop_8,tmp_qloop_8,tmp_qloop_8,tmp_qloop_8)),_mm256_mul_pd(tmp_qloop_25,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))); + const __m256d tmp_qloop_27 = _mm256_mul_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_24,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_14,tmp_qloop_14,tmp_qloop_14,tmp_qloop_14)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_26,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_15,tmp_qloop_15,tmp_qloop_15,tmp_qloop_15))),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_23,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_7,tmp_qloop_7,tmp_qloop_7,tmp_qloop_7))),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_23,_mm256_set_pd(tmp_qloop_10,tmp_qloop_10,tmp_qloop_10,tmp_qloop_10)),_mm256_set_pd(tmp_qloop_9,tmp_qloop_9,tmp_qloop_9,tmp_qloop_9))),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_24,_mm256_set_pd(tmp_qloop_13,tmp_qloop_13,tmp_qloop_13,tmp_qloop_13)),_mm256_set_pd(tmp_qloop_6,tmp_qloop_6,tmp_qloop_6,tmp_qloop_6))),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_26,_mm256_set_pd(tmp_qloop_2,tmp_qloop_2,tmp_qloop_2,tmp_qloop_2)),_mm256_set_pd(tmp_qloop_3,tmp_qloop_3,tmp_qloop_3,tmp_qloop_3))),_mm256_set_pd(tmp_qloop_21,tmp_qloop_21,tmp_qloop_21,tmp_qloop_21)); + const __m256d tmp_qloop_28 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_27,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_20,tmp_qloop_20,tmp_qloop_20,tmp_qloop_20)); + const __m256d tmp_qloop_29 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_25); + const __m256d tmp_qloop_30 = _mm256_mul_pd(tmp_qloop_22,tmp_qloop_22); + const __m256d tmp_qloop_31 = _mm256_add_pd(_mm256_add_pd(tmp_qloop_19,tmp_qloop_29),tmp_qloop_30); + const __m256d tmp_qloop_34 = _mm256_mul_pd(tmp_qloop_18,_mm256_set_pd(tmp_qloop_33,tmp_qloop_33,tmp_qloop_33,tmp_qloop_33)); + const __m256d tmp_qloop_35 = _mm256_add_pd(tmp_qloop_28,tmp_qloop_34); + const __m256d tmp_qloop_36 = _mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_31,tmp_qloop_35),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_19,tmp_qloop_28),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17)); + const __m256d tmp_qloop_37 = _mm256_div_pd(_mm256_sqrt_pd(tmp_qloop_31),_mm256_mul_pd(tmp_qloop_31,tmp_qloop_31)); + const __m256d tmp_qloop_38 = _mm256_mul_pd(tmp_qloop_37,_mm256_set_pd(1.0,1.0,1.0,1.0)); + const __m256d tmp_qloop_41 = _mm256_mul_pd(tmp_qloop_31,_mm256_set_pd(tmp_qloop_40,tmp_qloop_40,tmp_qloop_40,tmp_qloop_40)); + const __m256d tmp_qloop_42 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_41,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_25,tmp_qloop_28),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))); + const __m256d tmp_qloop_43 = _mm256_mul_pd(tmp_qloop_38,_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17)); + const __m256d tmp_qloop_44 = _mm256_mul_pd(tmp_qloop_42,tmp_qloop_43); + const __m256d tmp_qloop_47 = _mm256_mul_pd(tmp_qloop_28,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)); + const __m256d tmp_qloop_48 = _mm256_mul_pd(tmp_qloop_22,tmp_qloop_47); + const __m256d tmp_qloop_49 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_31,_mm256_set_pd(tmp_qloop_46,tmp_qloop_46,tmp_qloop_46,tmp_qloop_46)),tmp_qloop_48); + const __m256d tmp_qloop_50 = _mm256_mul_pd(tmp_qloop_43,tmp_qloop_49); + const __m256d tmp_qloop_51 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_31,_mm256_set_pd(tmp_qloop_33,tmp_qloop_33,tmp_qloop_33,tmp_qloop_33)),_mm256_mul_pd(tmp_qloop_18,tmp_qloop_47)); + const __m256d tmp_qloop_52 = _mm256_mul_pd(tmp_qloop_43,tmp_qloop_51); + const __m256d tmp_qloop_54 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_25,_mm256_set_pd(tmp_qloop_53,tmp_qloop_53,tmp_qloop_53,tmp_qloop_53)),tmp_qloop_28); + const __m256d tmp_qloop_55 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_29,tmp_qloop_47),_mm256_mul_pd(tmp_qloop_31,tmp_qloop_54)); + const __m256d tmp_qloop_56 = _mm256_mul_pd(tmp_qloop_22,_mm256_set_pd(tmp_qloop_46,tmp_qloop_46,tmp_qloop_46,tmp_qloop_46)); + const __m256d tmp_qloop_57 = _mm256_add_pd(tmp_qloop_28,tmp_qloop_56); + const __m256d tmp_qloop_58 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_30,tmp_qloop_47),_mm256_mul_pd(tmp_qloop_31,tmp_qloop_57)); + const __m256d tmp_qloop_66 = _mm256_mul_pd(_mm256_div_pd(_mm256_sqrt_pd(tmp_qloop_31),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_31,tmp_qloop_31),tmp_qloop_31)),_mm256_set_pd(3.0,3.0,3.0,3.0)); + const __m256d tmp_qloop_67 = _mm256_mul_pd(tmp_qloop_36,tmp_qloop_66); + const __m256d tmp_qloop_69 = _mm256_mul_pd(tmp_qloop_18,_mm256_set_pd(2.0,2.0,2.0,2.0)); + const __m256d tmp_qloop_70 = _mm256_mul_pd(tmp_qloop_41,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)); + const __m256d tmp_qloop_71 = _mm256_mul_pd(tmp_qloop_35,_mm256_set_pd(2.0,2.0,2.0,2.0)); + const __m256d tmp_qloop_73 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_31,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_72,tmp_qloop_72,tmp_qloop_72,tmp_qloop_72)); + const __m256d tmp_qloop_74 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_qloop_20,tmp_qloop_20,tmp_qloop_20,tmp_qloop_20)),tmp_qloop_27); + const __m256d tmp_qloop_75 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_34,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_mul_pd(tmp_qloop_18,_mm256_set_pd(tmp_qloop_68,tmp_qloop_68,tmp_qloop_68,tmp_qloop_68))),tmp_qloop_74); + const __m256d tmp_qloop_76 = _mm256_mul_pd(tmp_qloop_66,_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17)); + const __m256d tmp_qloop_77 = _mm256_mul_pd(tmp_qloop_51,tmp_qloop_76); + const __m256d tmp_qloop_78 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_77); + const __m256d tmp_qloop_79 = _mm256_mul_pd(tmp_qloop_25,_mm256_set_pd(2.0,2.0,2.0,2.0)); + const __m256d tmp_qloop_80 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_79,_mm256_set_pd(tmp_qloop_33,tmp_qloop_33,tmp_qloop_33,tmp_qloop_33)),_mm256_mul_pd(tmp_qloop_18,_mm256_set_pd(tmp_qloop_40,tmp_qloop_40,tmp_qloop_40,tmp_qloop_40))); + const __m256d tmp_qloop_81 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_43); + const __m256d tmp_qloop_82 = _mm256_mul_pd(tmp_qloop_22,_mm256_set_pd(2.0,2.0,2.0,2.0)); + const __m256d tmp_qloop_83 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_82,_mm256_set_pd(tmp_qloop_33,tmp_qloop_33,tmp_qloop_33,tmp_qloop_33)),_mm256_mul_pd(tmp_qloop_18,_mm256_set_pd(tmp_qloop_72,tmp_qloop_72,tmp_qloop_72,tmp_qloop_72))); + const __m256d tmp_qloop_84 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_22,tmp_qloop_78),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)); + const __m256d tmp_qloop_85 = _mm256_mul_pd(tmp_qloop_18,tmp_qloop_22); + const __m256d tmp_qloop_86 = _mm256_mul_pd(tmp_qloop_22,tmp_qloop_43); + const __m256d tmp_qloop_87 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_69,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_40,tmp_qloop_40,tmp_qloop_40,tmp_qloop_40)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_25,_mm256_set_pd(tmp_qloop_21,tmp_qloop_21,tmp_qloop_21,tmp_qloop_21)),_mm256_set_pd(tmp_qloop_32,tmp_qloop_32,tmp_qloop_32,tmp_qloop_32))); + const __m256d tmp_qloop_88 = _mm256_mul_pd(tmp_qloop_18,tmp_qloop_43); + const __m256d tmp_qloop_89 = _mm256_mul_pd(tmp_qloop_42,tmp_qloop_76); + const __m256d tmp_qloop_90 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_28,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_25,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_40,tmp_qloop_40,tmp_qloop_40,tmp_qloop_40))); + const __m256d tmp_qloop_91 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_89); + const __m256d tmp_qloop_92 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_25,_mm256_set_pd(tmp_qloop_72,tmp_qloop_72,tmp_qloop_72,tmp_qloop_72)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_82,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_40,tmp_qloop_40,tmp_qloop_40,tmp_qloop_40))); + const __m256d tmp_qloop_93 = _mm256_mul_pd(tmp_qloop_85,tmp_qloop_89); + const __m256d tmp_qloop_94 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_31,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_68,tmp_qloop_68,tmp_qloop_68,tmp_qloop_68)); + const __m256d tmp_qloop_95 = _mm256_mul_pd(tmp_qloop_55,tmp_qloop_76); + const __m256d tmp_qloop_96 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_69,_mm256_set_pd(tmp_qloop_46,tmp_qloop_46,tmp_qloop_46,tmp_qloop_46)),_mm256_mul_pd(tmp_qloop_22,_mm256_set_pd(tmp_qloop_68,tmp_qloop_68,tmp_qloop_68,tmp_qloop_68))); + const __m256d tmp_qloop_97 = _mm256_mul_pd(tmp_qloop_49,tmp_qloop_76); + const __m256d tmp_qloop_98 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_22,_mm256_set_pd(tmp_qloop_40,tmp_qloop_40,tmp_qloop_40,tmp_qloop_40)),_mm256_mul_pd(tmp_qloop_79,_mm256_set_pd(tmp_qloop_46,tmp_qloop_46,tmp_qloop_46,tmp_qloop_46))); + const __m256d tmp_qloop_99 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_97); + const __m256d tmp_qloop_100 = _mm256_mul_pd(tmp_qloop_18,tmp_qloop_99); + const __m256d tmp_qloop_101 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_56,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_mul_pd(tmp_qloop_22,_mm256_set_pd(tmp_qloop_72,tmp_qloop_72,tmp_qloop_72,tmp_qloop_72))),tmp_qloop_74); + const __m256d tmp_qloop_102 = _mm256_mul_pd(tmp_qloop_58,tmp_qloop_76); + const __m256d tmp_qloop_103 = _mm256_mul_pd(_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])); + const __m256d tmp_qloop_104 = _mm256_mul_pd(tmp_qloop_103,_mm256_set_pd(2.0,2.0,2.0,2.0)); + const __m256d tmp_qloop_105 = _mm256_mul_pd(_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])); + const __m256d tmp_qloop_106 = _mm256_mul_pd(tmp_qloop_105,_mm256_set_pd(2.0,2.0,2.0,2.0)); + const __m256d tmp_qloop_107 = _mm256_mul_pd(_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q])); + const __m256d tmp_qloop_108 = _mm256_mul_pd(tmp_qloop_107,_mm256_set_pd(2.0,2.0,2.0,2.0)); + const __m256d tmp_qloop_109 = _mm256_mul_pd(_mm256_set_pd(4.0,4.0,4.0,4.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])); + const __m256d tmp_qloop_110 = _mm256_mul_pd(tmp_qloop_109,_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q])); + const __m256d tmp_qloop_111 = _mm256_mul_pd(_mm256_set_pd(4.0,4.0,4.0,4.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])); + const __m256d tmp_qloop_112 = _mm256_mul_pd(tmp_qloop_111,_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])); + const __m256d tmp_qloop_113 = _mm256_mul_pd(tmp_qloop_111,_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q])); + const __m256d tmp_qloop_114 = _mm256_add_pd(tmp_qloop_112,tmp_qloop_113); + const __m256d tmp_qloop_115 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),_mm256_mul_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),tmp_qloop_104),tmp_qloop_106),tmp_qloop_108),tmp_qloop_110),tmp_qloop_114); + const __m256d tmp_qloop_116 = _mm256_mul_pd(_mm256_set_pd(4.0,4.0,4.0,4.0),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q])); + const __m256d tmp_qloop_117 = _mm256_add_pd(tmp_qloop_109,tmp_qloop_116); + const __m256d tmp_qloop_118 = _mm256_mul_pd(_mm256_mul_pd(rho_dof_8,tmp_qloop_109),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)); + const __m256d tmp_qloop_119 = _mm256_mul_pd(rho_dof_0,_mm256_add_pd(_mm256_add_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),tmp_qloop_111),tmp_qloop_117)); + const __m256d tmp_qloop_120 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(rho_dof_7,tmp_qloop_116),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),tmp_qloop_119); + const __m256d tmp_qloop_121 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(rho_dof_1,_mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),tmp_qloop_111)),_mm256_mul_pd(rho_dof_9,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_117,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_set_pd(-8.0,-8.0,-8.0,-8.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]))),_mm256_set_pd(4.0,4.0,4.0,4.0)))),_mm256_mul_pd(rho_dof_5,tmp_qloop_116)),_mm256_mul_pd(rho_dof_6,tmp_qloop_109)),tmp_qloop_118),tmp_qloop_120); + const __m256d tmp_qloop_122 = _mm256_add_pd(_mm256_set_pd(-4.0,-4.0,-4.0,-4.0),tmp_qloop_111); + const __m256d tmp_qloop_123 = _mm256_mul_pd(_mm256_mul_pd(rho_dof_9,tmp_qloop_111),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)); + const __m256d tmp_qloop_124 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(rho_dof_2,_mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),tmp_qloop_109)),_mm256_mul_pd(rho_dof_8,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_116,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_122,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(_mm256_set_pd(-8.0,-8.0,-8.0,-8.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))))),_mm256_mul_pd(rho_dof_4,tmp_qloop_116)),_mm256_mul_pd(rho_dof_6,tmp_qloop_111)),tmp_qloop_120),tmp_qloop_123); + const __m256d tmp_qloop_125 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(rho_dof_3,_mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),tmp_qloop_116)),_mm256_mul_pd(rho_dof_7,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_109,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_122,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(_mm256_set_pd(-8.0,-8.0,-8.0,-8.0),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]))))),_mm256_mul_pd(rho_dof_4,tmp_qloop_109)),_mm256_mul_pd(rho_dof_5,tmp_qloop_111)),tmp_qloop_118),tmp_qloop_119),tmp_qloop_123); + const __m256d tmp_qloop_127 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),tmp_qloop_104); + const __m256d tmp_qloop_128 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])),tmp_qloop_106); + const __m256d tmp_qloop_129 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q])),tmp_qloop_108); + const __m256d tmp_qloop_130 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_110,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_113,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(tmp_qloop_107,_mm256_set_pd(-4.0,-4.0,-4.0,-4.0))),tmp_qloop_116); + const __m256d tmp_qloop_131 = _mm256_mul_pd(tmp_qloop_105,_mm256_set_pd(4.0,4.0,4.0,4.0)); + const __m256d tmp_qloop_132 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_110,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_112,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(tmp_qloop_131,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_109); + const __m256d tmp_qloop_133 = _mm256_mul_pd(tmp_qloop_103,_mm256_set_pd(4.0,4.0,4.0,4.0)); + const __m256d tmp_qloop_134 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_114,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_133,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_111); + const __m256d tmp_qloop_158 = _mm256_mul_pd(tmp_qloop_105,tmp_qloop_111); + const __m256d jac_blending_0_0 = _mm256_mul_pd(tmp_qloop_36,tmp_qloop_38); + const __m256d jac_blending_0_1 = _mm256_mul_pd(tmp_qloop_18,tmp_qloop_44); + const __m256d jac_blending_0_2 = _mm256_mul_pd(tmp_qloop_18,tmp_qloop_50); + const __m256d jac_blending_1_0 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_52); + const __m256d jac_blending_1_1 = _mm256_mul_pd(tmp_qloop_43,tmp_qloop_55); + const __m256d tmp_qloop_63 = _mm256_mul_pd(jac_blending_0_2,jac_blending_1_1); + const __m256d jac_blending_1_2 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_50); + const __m256d tmp_qloop_60 = _mm256_mul_pd(jac_blending_0_1,jac_blending_1_2); + const __m256d jac_blending_2_0 = _mm256_mul_pd(tmp_qloop_22,tmp_qloop_52); + const __m256d jac_blending_2_1 = _mm256_mul_pd(tmp_qloop_22,tmp_qloop_44); + const __m256d tmp_qloop_61 = _mm256_mul_pd(jac_blending_1_2,jac_blending_2_1); + const __m256d jac_blending_2_2 = _mm256_mul_pd(tmp_qloop_43,tmp_qloop_58); + const __m256d tmp_qloop_59 = _mm256_mul_pd(jac_blending_1_1,jac_blending_2_2); + const __m256d tmp_qloop_62 = _mm256_mul_pd(jac_blending_0_1,jac_blending_2_2); + const __m256d tmp_qloop_64 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(jac_blending_0_0,tmp_qloop_59),_mm256_mul_pd(jac_blending_2_0,tmp_qloop_60)),_mm256_mul_pd(_mm256_mul_pd(jac_blending_0_0,tmp_qloop_61),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(_mm256_mul_pd(jac_blending_1_0,tmp_qloop_62),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(_mm256_mul_pd(jac_blending_2_0,tmp_qloop_63),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(_mm256_mul_pd(jac_blending_0_2,jac_blending_1_0),jac_blending_2_1)); + const __m256d tmp_qloop_65 = _mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),tmp_qloop_64); + const __m256d abs_det_jac_blending = tmp_qloop_64; + const __m256d tmp_qloop_135 = _mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(abs_det_jac_blending,_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(rho_dof_0,tmp_qloop_115),_mm256_mul_pd(rho_dof_1,tmp_qloop_127)),_mm256_mul_pd(rho_dof_2,tmp_qloop_128)),_mm256_mul_pd(rho_dof_3,tmp_qloop_129)),_mm256_mul_pd(rho_dof_4,tmp_qloop_110)),_mm256_mul_pd(rho_dof_5,tmp_qloop_113)),_mm256_mul_pd(rho_dof_6,tmp_qloop_112)),_mm256_mul_pd(rho_dof_7,tmp_qloop_130)),_mm256_mul_pd(rho_dof_8,tmp_qloop_132)),_mm256_mul_pd(rho_dof_9,tmp_qloop_134)))),_mm256_set_pd(_data_q_w[q],_data_q_w[q],_data_q_w[q],_data_q_w[q])),_mm256_set_pd(abs_det_jac_affine_WHITE_DOWN,abs_det_jac_affine_WHITE_DOWN,abs_det_jac_affine_WHITE_DOWN,abs_det_jac_affine_WHITE_DOWN)); + const __m256d tmp_qloop_136 = _mm256_mul_pd(tmp_qloop_135,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]))),_mm256_set_pd(1.0,1.0,1.0,1.0))); + const __m256d tmp_qloop_142 = _mm256_mul_pd(tmp_qloop_135,_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])); + const __m256d tmp_qloop_144 = _mm256_mul_pd(tmp_qloop_135,_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q])); + const __m256d tmp_qloop_147 = _mm256_mul_pd(tmp_qloop_135,_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])); + const __m256d tmp_qloop_159 = _mm256_mul_pd(tmp_qloop_135,tmp_qloop_158); + const __m256d tmp_qloop_161 = _mm256_mul_pd(tmp_qloop_107,tmp_qloop_135); + const __m256d jac_blending_inv_0_0 = _mm256_mul_pd(tmp_qloop_65,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_61,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),tmp_qloop_59)); + const __m256d jac_blending_inv_0_1 = _mm256_mul_pd(tmp_qloop_65,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_62,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(jac_blending_0_2,jac_blending_2_1))); + const __m256d jac_blending_inv_0_2 = _mm256_mul_pd(tmp_qloop_65,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_63,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),tmp_qloop_60)); + const __m256d jac_blending_inv_1_0 = _mm256_mul_pd(tmp_qloop_65,_mm256_add_pd(_mm256_mul_pd(jac_blending_1_2,jac_blending_2_0),_mm256_mul_pd(_mm256_mul_pd(jac_blending_1_0,jac_blending_2_2),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))); + const __m256d jac_blending_inv_1_1 = _mm256_mul_pd(tmp_qloop_65,_mm256_add_pd(_mm256_mul_pd(jac_blending_0_0,jac_blending_2_2),_mm256_mul_pd(_mm256_mul_pd(jac_blending_0_2,jac_blending_2_0),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))); + const __m256d jac_blending_inv_1_2 = _mm256_mul_pd(tmp_qloop_65,_mm256_add_pd(_mm256_mul_pd(jac_blending_0_2,jac_blending_1_0),_mm256_mul_pd(_mm256_mul_pd(jac_blending_0_0,jac_blending_1_2),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))); + const __m256d jac_blending_inv_2_0 = _mm256_mul_pd(tmp_qloop_65,_mm256_add_pd(_mm256_mul_pd(jac_blending_1_0,jac_blending_2_1),_mm256_mul_pd(_mm256_mul_pd(jac_blending_1_1,jac_blending_2_0),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))); + const __m256d tmp_qloop_126 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_121,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,_mm256_set_pd(jac_affine_inv_0_0_WHITE_DOWN,jac_affine_inv_0_0_WHITE_DOWN,jac_affine_inv_0_0_WHITE_DOWN,jac_affine_inv_0_0_WHITE_DOWN)),_mm256_mul_pd(jac_blending_inv_1_0,_mm256_set_pd(jac_affine_inv_0_1_WHITE_DOWN,jac_affine_inv_0_1_WHITE_DOWN,jac_affine_inv_0_1_WHITE_DOWN,jac_affine_inv_0_1_WHITE_DOWN))),_mm256_mul_pd(jac_blending_inv_2_0,_mm256_set_pd(jac_affine_inv_0_2_WHITE_DOWN,jac_affine_inv_0_2_WHITE_DOWN,jac_affine_inv_0_2_WHITE_DOWN,jac_affine_inv_0_2_WHITE_DOWN)))),_mm256_mul_pd(tmp_qloop_124,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,_mm256_set_pd(jac_affine_inv_1_0_WHITE_DOWN,jac_affine_inv_1_0_WHITE_DOWN,jac_affine_inv_1_0_WHITE_DOWN,jac_affine_inv_1_0_WHITE_DOWN)),_mm256_mul_pd(jac_blending_inv_1_0,_mm256_set_pd(jac_affine_inv_1_1_WHITE_DOWN,jac_affine_inv_1_1_WHITE_DOWN,jac_affine_inv_1_1_WHITE_DOWN,jac_affine_inv_1_1_WHITE_DOWN))),_mm256_mul_pd(jac_blending_inv_2_0,_mm256_set_pd(jac_affine_inv_1_2_WHITE_DOWN,jac_affine_inv_1_2_WHITE_DOWN,jac_affine_inv_1_2_WHITE_DOWN,jac_affine_inv_1_2_WHITE_DOWN))))),_mm256_mul_pd(tmp_qloop_125,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,_mm256_set_pd(jac_affine_inv_2_0_WHITE_DOWN,jac_affine_inv_2_0_WHITE_DOWN,jac_affine_inv_2_0_WHITE_DOWN,jac_affine_inv_2_0_WHITE_DOWN)),_mm256_mul_pd(jac_blending_inv_1_0,_mm256_set_pd(jac_affine_inv_2_1_WHITE_DOWN,jac_affine_inv_2_1_WHITE_DOWN,jac_affine_inv_2_1_WHITE_DOWN,jac_affine_inv_2_1_WHITE_DOWN))),_mm256_mul_pd(jac_blending_inv_2_0,_mm256_set_pd(jac_affine_inv_2_2_WHITE_DOWN,jac_affine_inv_2_2_WHITE_DOWN,jac_affine_inv_2_2_WHITE_DOWN,jac_affine_inv_2_2_WHITE_DOWN))))); + const __m256d tmp_qloop_137 = _mm256_mul_pd(tmp_qloop_126,tmp_qloop_136); + const __m256d tmp_qloop_143 = _mm256_mul_pd(tmp_qloop_126,tmp_qloop_142); + const __m256d tmp_qloop_145 = _mm256_mul_pd(tmp_qloop_126,tmp_qloop_144); + const __m256d tmp_qloop_146 = _mm256_mul_pd(tmp_qloop_112,tmp_qloop_145); + const __m256d tmp_qloop_148 = _mm256_mul_pd(tmp_qloop_126,tmp_qloop_147); + const __m256d tmp_qloop_157 = _mm256_mul_pd(tmp_qloop_126,tmp_qloop_135); + const __m256d tmp_qloop_160 = _mm256_mul_pd(tmp_qloop_107,tmp_qloop_157); + const __m256d jac_blending_inv_2_1 = _mm256_mul_pd(tmp_qloop_65,_mm256_add_pd(_mm256_mul_pd(jac_blending_0_1,jac_blending_2_0),_mm256_mul_pd(_mm256_mul_pd(jac_blending_0_0,jac_blending_2_1),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))); + const __m256d tmp_qloop_138 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_121,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,_mm256_set_pd(jac_affine_inv_0_0_WHITE_DOWN,jac_affine_inv_0_0_WHITE_DOWN,jac_affine_inv_0_0_WHITE_DOWN,jac_affine_inv_0_0_WHITE_DOWN)),_mm256_mul_pd(jac_blending_inv_1_1,_mm256_set_pd(jac_affine_inv_0_1_WHITE_DOWN,jac_affine_inv_0_1_WHITE_DOWN,jac_affine_inv_0_1_WHITE_DOWN,jac_affine_inv_0_1_WHITE_DOWN))),_mm256_mul_pd(jac_blending_inv_2_1,_mm256_set_pd(jac_affine_inv_0_2_WHITE_DOWN,jac_affine_inv_0_2_WHITE_DOWN,jac_affine_inv_0_2_WHITE_DOWN,jac_affine_inv_0_2_WHITE_DOWN)))),_mm256_mul_pd(tmp_qloop_124,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,_mm256_set_pd(jac_affine_inv_1_0_WHITE_DOWN,jac_affine_inv_1_0_WHITE_DOWN,jac_affine_inv_1_0_WHITE_DOWN,jac_affine_inv_1_0_WHITE_DOWN)),_mm256_mul_pd(jac_blending_inv_1_1,_mm256_set_pd(jac_affine_inv_1_1_WHITE_DOWN,jac_affine_inv_1_1_WHITE_DOWN,jac_affine_inv_1_1_WHITE_DOWN,jac_affine_inv_1_1_WHITE_DOWN))),_mm256_mul_pd(jac_blending_inv_2_1,_mm256_set_pd(jac_affine_inv_1_2_WHITE_DOWN,jac_affine_inv_1_2_WHITE_DOWN,jac_affine_inv_1_2_WHITE_DOWN,jac_affine_inv_1_2_WHITE_DOWN))))),_mm256_mul_pd(tmp_qloop_125,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,_mm256_set_pd(jac_affine_inv_2_0_WHITE_DOWN,jac_affine_inv_2_0_WHITE_DOWN,jac_affine_inv_2_0_WHITE_DOWN,jac_affine_inv_2_0_WHITE_DOWN)),_mm256_mul_pd(jac_blending_inv_1_1,_mm256_set_pd(jac_affine_inv_2_1_WHITE_DOWN,jac_affine_inv_2_1_WHITE_DOWN,jac_affine_inv_2_1_WHITE_DOWN,jac_affine_inv_2_1_WHITE_DOWN))),_mm256_mul_pd(jac_blending_inv_2_1,_mm256_set_pd(jac_affine_inv_2_2_WHITE_DOWN,jac_affine_inv_2_2_WHITE_DOWN,jac_affine_inv_2_2_WHITE_DOWN,jac_affine_inv_2_2_WHITE_DOWN))))); + const __m256d tmp_qloop_139 = _mm256_mul_pd(tmp_qloop_136,tmp_qloop_138); + const __m256d tmp_qloop_149 = _mm256_mul_pd(tmp_qloop_138,tmp_qloop_142); + const __m256d tmp_qloop_150 = _mm256_mul_pd(tmp_qloop_138,tmp_qloop_144); + const __m256d tmp_qloop_151 = _mm256_mul_pd(tmp_qloop_112,tmp_qloop_150); + const __m256d tmp_qloop_152 = _mm256_mul_pd(tmp_qloop_138,tmp_qloop_147); + const __m256d tmp_qloop_162 = _mm256_mul_pd(tmp_qloop_138,tmp_qloop_161); + const __m256d jac_blending_inv_2_2 = _mm256_mul_pd(tmp_qloop_65,_mm256_add_pd(_mm256_mul_pd(jac_blending_0_0,jac_blending_1_1),_mm256_mul_pd(_mm256_mul_pd(jac_blending_0_1,jac_blending_1_0),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))); + const __m256d tmp_qloop_140 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_121,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_2,_mm256_set_pd(jac_affine_inv_0_0_WHITE_DOWN,jac_affine_inv_0_0_WHITE_DOWN,jac_affine_inv_0_0_WHITE_DOWN,jac_affine_inv_0_0_WHITE_DOWN)),_mm256_mul_pd(jac_blending_inv_1_2,_mm256_set_pd(jac_affine_inv_0_1_WHITE_DOWN,jac_affine_inv_0_1_WHITE_DOWN,jac_affine_inv_0_1_WHITE_DOWN,jac_affine_inv_0_1_WHITE_DOWN))),_mm256_mul_pd(jac_blending_inv_2_2,_mm256_set_pd(jac_affine_inv_0_2_WHITE_DOWN,jac_affine_inv_0_2_WHITE_DOWN,jac_affine_inv_0_2_WHITE_DOWN,jac_affine_inv_0_2_WHITE_DOWN)))),_mm256_mul_pd(tmp_qloop_124,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_2,_mm256_set_pd(jac_affine_inv_1_0_WHITE_DOWN,jac_affine_inv_1_0_WHITE_DOWN,jac_affine_inv_1_0_WHITE_DOWN,jac_affine_inv_1_0_WHITE_DOWN)),_mm256_mul_pd(jac_blending_inv_1_2,_mm256_set_pd(jac_affine_inv_1_1_WHITE_DOWN,jac_affine_inv_1_1_WHITE_DOWN,jac_affine_inv_1_1_WHITE_DOWN,jac_affine_inv_1_1_WHITE_DOWN))),_mm256_mul_pd(jac_blending_inv_2_2,_mm256_set_pd(jac_affine_inv_1_2_WHITE_DOWN,jac_affine_inv_1_2_WHITE_DOWN,jac_affine_inv_1_2_WHITE_DOWN,jac_affine_inv_1_2_WHITE_DOWN))))),_mm256_mul_pd(tmp_qloop_125,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_2,_mm256_set_pd(jac_affine_inv_2_0_WHITE_DOWN,jac_affine_inv_2_0_WHITE_DOWN,jac_affine_inv_2_0_WHITE_DOWN,jac_affine_inv_2_0_WHITE_DOWN)),_mm256_mul_pd(jac_blending_inv_1_2,_mm256_set_pd(jac_affine_inv_2_1_WHITE_DOWN,jac_affine_inv_2_1_WHITE_DOWN,jac_affine_inv_2_1_WHITE_DOWN,jac_affine_inv_2_1_WHITE_DOWN))),_mm256_mul_pd(jac_blending_inv_2_2,_mm256_set_pd(jac_affine_inv_2_2_WHITE_DOWN,jac_affine_inv_2_2_WHITE_DOWN,jac_affine_inv_2_2_WHITE_DOWN,jac_affine_inv_2_2_WHITE_DOWN))))); + const __m256d tmp_qloop_141 = _mm256_mul_pd(tmp_qloop_136,tmp_qloop_140); + const __m256d tmp_qloop_153 = _mm256_mul_pd(tmp_qloop_140,tmp_qloop_142); + const __m256d tmp_qloop_154 = _mm256_mul_pd(tmp_qloop_140,tmp_qloop_144); + const __m256d tmp_qloop_155 = _mm256_mul_pd(tmp_qloop_112,tmp_qloop_154); + const __m256d tmp_qloop_156 = _mm256_mul_pd(tmp_qloop_140,tmp_qloop_147); + const __m256d tmp_qloop_163 = _mm256_mul_pd(tmp_qloop_140,tmp_qloop_161); + const __m256d hessian_blending_0_0_0 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_18,tmp_qloop_67),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_37,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_31,_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_qloop_68,tmp_qloop_68,tmp_qloop_68,tmp_qloop_68)),_mm256_set_pd(tmp_qloop_33,tmp_qloop_33,tmp_qloop_33,tmp_qloop_33))),_mm256_mul_pd(tmp_qloop_19,_mm256_set_pd(tmp_qloop_68,tmp_qloop_68,tmp_qloop_68,tmp_qloop_68))),_mm256_mul_pd(tmp_qloop_35,tmp_qloop_69)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_28,tmp_qloop_69),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17))); + const __m256d hessian_blending_1_0_0 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_25,tmp_qloop_67),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_37,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_19,_mm256_set_pd(tmp_qloop_40,tmp_qloop_40,tmp_qloop_40,tmp_qloop_40)),_mm256_mul_pd(tmp_qloop_25,tmp_qloop_71)),tmp_qloop_70)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17))); + const __m256d hessian_blending_2_0_0 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_22,tmp_qloop_67),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_37,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_19,_mm256_set_pd(tmp_qloop_72,tmp_qloop_72,tmp_qloop_72,tmp_qloop_72)),_mm256_mul_pd(tmp_qloop_22,tmp_qloop_71)),tmp_qloop_73)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17))); + const __m256d hessian_blending_0_0_1 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_18,tmp_qloop_78),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_25,tmp_qloop_37),tmp_qloop_75),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17))); + const __m256d hessian_blending_1_0_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_80,tmp_qloop_81),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_29,tmp_qloop_77),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_52); + const __m256d hessian_blending_2_0_1 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_81,tmp_qloop_83),tmp_qloop_84); + const __m256d hessian_blending_0_0_2 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_77,tmp_qloop_85),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_22,tmp_qloop_37),tmp_qloop_75),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17))); + const __m256d hessian_blending_1_0_2 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_80,tmp_qloop_86),tmp_qloop_84); + const __m256d hessian_blending_2_0_2 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_83,tmp_qloop_86),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_30,tmp_qloop_77),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_52); + const __m256d hessian_blending_0_1_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_87,tmp_qloop_88),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_19,tmp_qloop_89),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_44); + const __m256d hessian_blending_1_1_0 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_18,tmp_qloop_91),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_18,tmp_qloop_37),tmp_qloop_90),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17))); + const __m256d hessian_blending_2_1_0 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_93,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_88,tmp_qloop_92)); + const __m256d hessian_blending_0_1_1 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_18,tmp_qloop_95),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_37,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_29,_mm256_set_pd(tmp_qloop_68,tmp_qloop_68,tmp_qloop_68,tmp_qloop_68)),_mm256_mul_pd(tmp_qloop_54,tmp_qloop_69)),tmp_qloop_94)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17))); + const __m256d hessian_blending_1_1_1 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_25,tmp_qloop_95),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_37,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_31,_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_qloop_40,tmp_qloop_40,tmp_qloop_40,tmp_qloop_40)),_mm256_set_pd(tmp_qloop_53,tmp_qloop_53,tmp_qloop_53,tmp_qloop_53))),_mm256_mul_pd(tmp_qloop_29,_mm256_set_pd(tmp_qloop_40,tmp_qloop_40,tmp_qloop_40,tmp_qloop_40))),_mm256_mul_pd(tmp_qloop_47,tmp_qloop_79)),_mm256_mul_pd(tmp_qloop_54,tmp_qloop_79))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17))); + const __m256d hessian_blending_2_1_1 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_22,tmp_qloop_95),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_37,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_29,_mm256_set_pd(tmp_qloop_72,tmp_qloop_72,tmp_qloop_72,tmp_qloop_72)),_mm256_mul_pd(tmp_qloop_54,tmp_qloop_82)),tmp_qloop_73)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17))); + const __m256d hessian_blending_0_1_2 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_93,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_22,tmp_qloop_37),tmp_qloop_87),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17))); + const __m256d hessian_blending_1_1_2 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_22,tmp_qloop_91),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_22,tmp_qloop_37),tmp_qloop_90),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17))); + const __m256d hessian_blending_2_1_2 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_86,tmp_qloop_92),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_30,tmp_qloop_89),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_44); + const __m256d hessian_blending_0_2_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_88,tmp_qloop_96),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_19,tmp_qloop_97),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_50); + const __m256d hessian_blending_1_2_0 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_100,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_18,tmp_qloop_37),tmp_qloop_98),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17))); + const __m256d hessian_blending_2_2_0 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_85,tmp_qloop_97),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_101,tmp_qloop_18),tmp_qloop_37),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17))); + const __m256d hessian_blending_0_2_1 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_100,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_25,tmp_qloop_37),tmp_qloop_96),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17))); + const __m256d hessian_blending_1_2_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_81,tmp_qloop_98),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_29,tmp_qloop_97),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_50); + const __m256d hessian_blending_2_2_1 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_22,tmp_qloop_99),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_101,tmp_qloop_25),tmp_qloop_37),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17))); + const __m256d hessian_blending_0_2_2 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_102,tmp_qloop_18),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_37,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_30,_mm256_set_pd(tmp_qloop_68,tmp_qloop_68,tmp_qloop_68,tmp_qloop_68)),_mm256_mul_pd(tmp_qloop_57,tmp_qloop_69)),tmp_qloop_94)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17))); + const __m256d hessian_blending_1_2_2 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_102,tmp_qloop_25),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_37,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_30,_mm256_set_pd(tmp_qloop_40,tmp_qloop_40,tmp_qloop_40,tmp_qloop_40)),_mm256_mul_pd(tmp_qloop_57,tmp_qloop_79)),tmp_qloop_70)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17))); + const __m256d hessian_blending_2_2_2 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_102,tmp_qloop_22),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_37,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_31,_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_qloop_72,tmp_qloop_72,tmp_qloop_72,tmp_qloop_72)),_mm256_set_pd(tmp_qloop_46,tmp_qloop_46,tmp_qloop_46,tmp_qloop_46))),_mm256_mul_pd(tmp_qloop_48,_mm256_set_pd(2.0,2.0,2.0,2.0))),_mm256_mul_pd(tmp_qloop_30,_mm256_set_pd(tmp_qloop_72,tmp_qloop_72,tmp_qloop_72,tmp_qloop_72))),_mm256_mul_pd(tmp_qloop_57,tmp_qloop_82))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17))); + const __m256d q_tmp_0_0 = _mm256_mul_pd(tmp_qloop_115,tmp_qloop_137); + const __m256d q_tmp_0_1 = _mm256_mul_pd(tmp_qloop_127,tmp_qloop_137); + const __m256d q_tmp_0_2 = _mm256_mul_pd(tmp_qloop_128,tmp_qloop_137); + const __m256d q_tmp_0_3 = _mm256_mul_pd(tmp_qloop_129,tmp_qloop_137); + const __m256d q_tmp_0_4 = _mm256_mul_pd(tmp_qloop_110,tmp_qloop_137); + const __m256d q_tmp_0_5 = _mm256_mul_pd(tmp_qloop_113,tmp_qloop_137); + const __m256d q_tmp_0_6 = _mm256_mul_pd(tmp_qloop_112,tmp_qloop_137); + const __m256d q_tmp_0_7 = _mm256_mul_pd(tmp_qloop_130,tmp_qloop_137); + const __m256d q_tmp_0_8 = _mm256_mul_pd(tmp_qloop_132,tmp_qloop_137); + const __m256d q_tmp_0_9 = _mm256_mul_pd(tmp_qloop_134,tmp_qloop_137); + const __m256d q_tmp_0_10 = _mm256_mul_pd(tmp_qloop_115,tmp_qloop_139); + const __m256d q_tmp_0_11 = _mm256_mul_pd(tmp_qloop_127,tmp_qloop_139); + const __m256d q_tmp_0_12 = _mm256_mul_pd(tmp_qloop_128,tmp_qloop_139); + const __m256d q_tmp_0_13 = _mm256_mul_pd(tmp_qloop_129,tmp_qloop_139); + const __m256d q_tmp_0_14 = _mm256_mul_pd(tmp_qloop_110,tmp_qloop_139); + const __m256d q_tmp_0_15 = _mm256_mul_pd(tmp_qloop_113,tmp_qloop_139); + const __m256d q_tmp_0_16 = _mm256_mul_pd(tmp_qloop_112,tmp_qloop_139); + const __m256d q_tmp_0_17 = _mm256_mul_pd(tmp_qloop_130,tmp_qloop_139); + const __m256d q_tmp_0_18 = _mm256_mul_pd(tmp_qloop_132,tmp_qloop_139); + const __m256d q_tmp_0_19 = _mm256_mul_pd(tmp_qloop_134,tmp_qloop_139); + const __m256d q_tmp_0_20 = _mm256_mul_pd(tmp_qloop_115,tmp_qloop_141); + const __m256d q_tmp_0_21 = _mm256_mul_pd(tmp_qloop_127,tmp_qloop_141); + const __m256d q_tmp_0_22 = _mm256_mul_pd(tmp_qloop_128,tmp_qloop_141); + const __m256d q_tmp_0_23 = _mm256_mul_pd(tmp_qloop_129,tmp_qloop_141); + const __m256d q_tmp_0_24 = _mm256_mul_pd(tmp_qloop_110,tmp_qloop_141); + const __m256d q_tmp_0_25 = _mm256_mul_pd(tmp_qloop_113,tmp_qloop_141); + const __m256d q_tmp_0_26 = _mm256_mul_pd(tmp_qloop_112,tmp_qloop_141); + const __m256d q_tmp_0_27 = _mm256_mul_pd(tmp_qloop_130,tmp_qloop_141); + const __m256d q_tmp_0_28 = _mm256_mul_pd(tmp_qloop_132,tmp_qloop_141); + const __m256d q_tmp_0_29 = _mm256_mul_pd(tmp_qloop_134,tmp_qloop_141); + const __m256d q_tmp_1_0 = _mm256_mul_pd(tmp_qloop_115,tmp_qloop_143); + const __m256d q_tmp_1_1 = _mm256_mul_pd(tmp_qloop_127,tmp_qloop_143); + const __m256d q_tmp_1_2 = _mm256_mul_pd(tmp_qloop_128,tmp_qloop_143); + const __m256d q_tmp_1_3 = _mm256_mul_pd(tmp_qloop_129,tmp_qloop_143); + const __m256d q_tmp_1_4 = tmp_qloop_146; + const __m256d q_tmp_1_5 = _mm256_mul_pd(tmp_qloop_133,tmp_qloop_145); + const __m256d q_tmp_1_6 = _mm256_mul_pd(tmp_qloop_133,tmp_qloop_148); + const __m256d q_tmp_1_7 = _mm256_mul_pd(tmp_qloop_130,tmp_qloop_143); + const __m256d q_tmp_1_8 = _mm256_mul_pd(tmp_qloop_132,tmp_qloop_143); + const __m256d q_tmp_1_9 = _mm256_mul_pd(tmp_qloop_134,tmp_qloop_143); + const __m256d q_tmp_1_10 = _mm256_mul_pd(tmp_qloop_115,tmp_qloop_149); + const __m256d q_tmp_1_11 = _mm256_mul_pd(tmp_qloop_127,tmp_qloop_149); + const __m256d q_tmp_1_12 = _mm256_mul_pd(tmp_qloop_128,tmp_qloop_149); + const __m256d q_tmp_1_13 = _mm256_mul_pd(tmp_qloop_129,tmp_qloop_149); + const __m256d q_tmp_1_14 = tmp_qloop_151; + const __m256d q_tmp_1_15 = _mm256_mul_pd(tmp_qloop_133,tmp_qloop_150); + const __m256d q_tmp_1_16 = _mm256_mul_pd(tmp_qloop_133,tmp_qloop_152); + const __m256d q_tmp_1_17 = _mm256_mul_pd(tmp_qloop_130,tmp_qloop_149); + const __m256d q_tmp_1_18 = _mm256_mul_pd(tmp_qloop_132,tmp_qloop_149); + const __m256d q_tmp_1_19 = _mm256_mul_pd(tmp_qloop_134,tmp_qloop_149); + const __m256d q_tmp_1_20 = _mm256_mul_pd(tmp_qloop_115,tmp_qloop_153); + const __m256d q_tmp_1_21 = _mm256_mul_pd(tmp_qloop_127,tmp_qloop_153); + const __m256d q_tmp_1_22 = _mm256_mul_pd(tmp_qloop_128,tmp_qloop_153); + const __m256d q_tmp_1_23 = _mm256_mul_pd(tmp_qloop_129,tmp_qloop_153); + const __m256d q_tmp_1_24 = tmp_qloop_155; + const __m256d q_tmp_1_25 = _mm256_mul_pd(tmp_qloop_133,tmp_qloop_154); + const __m256d q_tmp_1_26 = _mm256_mul_pd(tmp_qloop_133,tmp_qloop_156); + const __m256d q_tmp_1_27 = _mm256_mul_pd(tmp_qloop_130,tmp_qloop_153); + const __m256d q_tmp_1_28 = _mm256_mul_pd(tmp_qloop_132,tmp_qloop_153); + const __m256d q_tmp_1_29 = _mm256_mul_pd(tmp_qloop_134,tmp_qloop_153); + const __m256d q_tmp_2_0 = _mm256_mul_pd(tmp_qloop_115,tmp_qloop_148); + const __m256d q_tmp_2_1 = _mm256_mul_pd(tmp_qloop_127,tmp_qloop_148); + const __m256d q_tmp_2_2 = _mm256_mul_pd(tmp_qloop_128,tmp_qloop_148); + const __m256d q_tmp_2_3 = _mm256_mul_pd(tmp_qloop_129,tmp_qloop_148); + const __m256d q_tmp_2_4 = _mm256_mul_pd(tmp_qloop_131,tmp_qloop_145); + const __m256d q_tmp_2_5 = tmp_qloop_146; + const __m256d q_tmp_2_6 = _mm256_mul_pd(tmp_qloop_157,tmp_qloop_158); + const __m256d q_tmp_2_7 = _mm256_mul_pd(tmp_qloop_130,tmp_qloop_148); + const __m256d q_tmp_2_8 = _mm256_mul_pd(tmp_qloop_132,tmp_qloop_148); + const __m256d q_tmp_2_9 = _mm256_mul_pd(tmp_qloop_134,tmp_qloop_148); + const __m256d q_tmp_2_10 = _mm256_mul_pd(tmp_qloop_115,tmp_qloop_152); + const __m256d q_tmp_2_11 = _mm256_mul_pd(tmp_qloop_127,tmp_qloop_152); + const __m256d q_tmp_2_12 = _mm256_mul_pd(tmp_qloop_128,tmp_qloop_152); + const __m256d q_tmp_2_13 = _mm256_mul_pd(tmp_qloop_129,tmp_qloop_152); + const __m256d q_tmp_2_14 = _mm256_mul_pd(tmp_qloop_131,tmp_qloop_150); + const __m256d q_tmp_2_15 = tmp_qloop_151; + const __m256d q_tmp_2_16 = _mm256_mul_pd(tmp_qloop_138,tmp_qloop_159); + const __m256d q_tmp_2_17 = _mm256_mul_pd(tmp_qloop_130,tmp_qloop_152); + const __m256d q_tmp_2_18 = _mm256_mul_pd(tmp_qloop_132,tmp_qloop_152); + const __m256d q_tmp_2_19 = _mm256_mul_pd(tmp_qloop_134,tmp_qloop_152); + const __m256d q_tmp_2_20 = _mm256_mul_pd(tmp_qloop_115,tmp_qloop_156); + const __m256d q_tmp_2_21 = _mm256_mul_pd(tmp_qloop_127,tmp_qloop_156); + const __m256d q_tmp_2_22 = _mm256_mul_pd(tmp_qloop_128,tmp_qloop_156); + const __m256d q_tmp_2_23 = _mm256_mul_pd(tmp_qloop_129,tmp_qloop_156); + const __m256d q_tmp_2_24 = _mm256_mul_pd(tmp_qloop_131,tmp_qloop_154); + const __m256d q_tmp_2_25 = tmp_qloop_155; + const __m256d q_tmp_2_26 = _mm256_mul_pd(tmp_qloop_140,tmp_qloop_159); + const __m256d q_tmp_2_27 = _mm256_mul_pd(tmp_qloop_130,tmp_qloop_156); + const __m256d q_tmp_2_28 = _mm256_mul_pd(tmp_qloop_132,tmp_qloop_156); + const __m256d q_tmp_2_29 = _mm256_mul_pd(tmp_qloop_134,tmp_qloop_156); + const __m256d q_tmp_3_0 = _mm256_mul_pd(tmp_qloop_115,tmp_qloop_145); + const __m256d q_tmp_3_1 = _mm256_mul_pd(tmp_qloop_127,tmp_qloop_145); + const __m256d q_tmp_3_2 = _mm256_mul_pd(tmp_qloop_128,tmp_qloop_145); + const __m256d q_tmp_3_3 = _mm256_mul_pd(tmp_qloop_129,tmp_qloop_145); + const __m256d q_tmp_3_4 = _mm256_mul_pd(tmp_qloop_109,tmp_qloop_160); + const __m256d q_tmp_3_5 = _mm256_mul_pd(tmp_qloop_111,tmp_qloop_160); + const __m256d q_tmp_3_6 = tmp_qloop_146; + const __m256d q_tmp_3_7 = _mm256_mul_pd(tmp_qloop_130,tmp_qloop_145); + const __m256d q_tmp_3_8 = _mm256_mul_pd(tmp_qloop_132,tmp_qloop_145); + const __m256d q_tmp_3_9 = _mm256_mul_pd(tmp_qloop_134,tmp_qloop_145); + const __m256d q_tmp_3_10 = _mm256_mul_pd(tmp_qloop_115,tmp_qloop_150); + const __m256d q_tmp_3_11 = _mm256_mul_pd(tmp_qloop_127,tmp_qloop_150); + const __m256d q_tmp_3_12 = _mm256_mul_pd(tmp_qloop_128,tmp_qloop_150); + const __m256d q_tmp_3_13 = _mm256_mul_pd(tmp_qloop_129,tmp_qloop_150); + const __m256d q_tmp_3_14 = _mm256_mul_pd(tmp_qloop_109,tmp_qloop_162); + const __m256d q_tmp_3_15 = _mm256_mul_pd(tmp_qloop_111,tmp_qloop_162); + const __m256d q_tmp_3_16 = tmp_qloop_151; + const __m256d q_tmp_3_17 = _mm256_mul_pd(tmp_qloop_130,tmp_qloop_150); + const __m256d q_tmp_3_18 = _mm256_mul_pd(tmp_qloop_132,tmp_qloop_150); + const __m256d q_tmp_3_19 = _mm256_mul_pd(tmp_qloop_134,tmp_qloop_150); + const __m256d q_tmp_3_20 = _mm256_mul_pd(tmp_qloop_115,tmp_qloop_154); + const __m256d q_tmp_3_21 = _mm256_mul_pd(tmp_qloop_127,tmp_qloop_154); + const __m256d q_tmp_3_22 = _mm256_mul_pd(tmp_qloop_128,tmp_qloop_154); + const __m256d q_tmp_3_23 = _mm256_mul_pd(tmp_qloop_129,tmp_qloop_154); + const __m256d q_tmp_3_24 = _mm256_mul_pd(tmp_qloop_109,tmp_qloop_163); + const __m256d q_tmp_3_25 = _mm256_mul_pd(tmp_qloop_111,tmp_qloop_163); + const __m256d q_tmp_3_26 = tmp_qloop_155; + const __m256d q_tmp_3_27 = _mm256_mul_pd(tmp_qloop_130,tmp_qloop_154); + const __m256d q_tmp_3_28 = _mm256_mul_pd(tmp_qloop_132,tmp_qloop_154); + const __m256d q_tmp_3_29 = _mm256_mul_pd(tmp_qloop_134,tmp_qloop_154); + q_acc_0_0 = _mm256_add_pd(q_acc_0_0,q_tmp_0_0); + q_acc_0_1 = _mm256_add_pd(q_acc_0_1,q_tmp_0_1); + q_acc_0_2 = _mm256_add_pd(q_acc_0_2,q_tmp_0_2); + q_acc_0_3 = _mm256_add_pd(q_acc_0_3,q_tmp_0_3); + q_acc_0_4 = _mm256_add_pd(q_acc_0_4,q_tmp_0_4); + q_acc_0_5 = _mm256_add_pd(q_acc_0_5,q_tmp_0_5); + q_acc_0_6 = _mm256_add_pd(q_acc_0_6,q_tmp_0_6); + q_acc_0_7 = _mm256_add_pd(q_acc_0_7,q_tmp_0_7); + q_acc_0_8 = _mm256_add_pd(q_acc_0_8,q_tmp_0_8); + q_acc_0_9 = _mm256_add_pd(q_acc_0_9,q_tmp_0_9); + q_acc_0_10 = _mm256_add_pd(q_acc_0_10,q_tmp_0_10); + q_acc_0_11 = _mm256_add_pd(q_acc_0_11,q_tmp_0_11); + q_acc_0_12 = _mm256_add_pd(q_acc_0_12,q_tmp_0_12); + q_acc_0_13 = _mm256_add_pd(q_acc_0_13,q_tmp_0_13); + q_acc_0_14 = _mm256_add_pd(q_acc_0_14,q_tmp_0_14); + q_acc_0_15 = _mm256_add_pd(q_acc_0_15,q_tmp_0_15); + q_acc_0_16 = _mm256_add_pd(q_acc_0_16,q_tmp_0_16); + q_acc_0_17 = _mm256_add_pd(q_acc_0_17,q_tmp_0_17); + q_acc_0_18 = _mm256_add_pd(q_acc_0_18,q_tmp_0_18); + q_acc_0_19 = _mm256_add_pd(q_acc_0_19,q_tmp_0_19); + q_acc_0_20 = _mm256_add_pd(q_acc_0_20,q_tmp_0_20); + q_acc_0_21 = _mm256_add_pd(q_acc_0_21,q_tmp_0_21); + q_acc_0_22 = _mm256_add_pd(q_acc_0_22,q_tmp_0_22); + q_acc_0_23 = _mm256_add_pd(q_acc_0_23,q_tmp_0_23); + q_acc_0_24 = _mm256_add_pd(q_acc_0_24,q_tmp_0_24); + q_acc_0_25 = _mm256_add_pd(q_acc_0_25,q_tmp_0_25); + q_acc_0_26 = _mm256_add_pd(q_acc_0_26,q_tmp_0_26); + q_acc_0_27 = _mm256_add_pd(q_acc_0_27,q_tmp_0_27); + q_acc_0_28 = _mm256_add_pd(q_acc_0_28,q_tmp_0_28); + q_acc_0_29 = _mm256_add_pd(q_acc_0_29,q_tmp_0_29); + q_acc_1_0 = _mm256_add_pd(q_acc_1_0,q_tmp_1_0); + q_acc_1_1 = _mm256_add_pd(q_acc_1_1,q_tmp_1_1); + q_acc_1_2 = _mm256_add_pd(q_acc_1_2,q_tmp_1_2); + q_acc_1_3 = _mm256_add_pd(q_acc_1_3,q_tmp_1_3); + q_acc_1_4 = _mm256_add_pd(q_acc_1_4,q_tmp_1_4); + q_acc_1_5 = _mm256_add_pd(q_acc_1_5,q_tmp_1_5); + q_acc_1_6 = _mm256_add_pd(q_acc_1_6,q_tmp_1_6); + q_acc_1_7 = _mm256_add_pd(q_acc_1_7,q_tmp_1_7); + q_acc_1_8 = _mm256_add_pd(q_acc_1_8,q_tmp_1_8); + q_acc_1_9 = _mm256_add_pd(q_acc_1_9,q_tmp_1_9); + q_acc_1_10 = _mm256_add_pd(q_acc_1_10,q_tmp_1_10); + q_acc_1_11 = _mm256_add_pd(q_acc_1_11,q_tmp_1_11); + q_acc_1_12 = _mm256_add_pd(q_acc_1_12,q_tmp_1_12); + q_acc_1_13 = _mm256_add_pd(q_acc_1_13,q_tmp_1_13); + q_acc_1_14 = _mm256_add_pd(q_acc_1_14,q_tmp_1_14); + q_acc_1_15 = _mm256_add_pd(q_acc_1_15,q_tmp_1_15); + q_acc_1_16 = _mm256_add_pd(q_acc_1_16,q_tmp_1_16); + q_acc_1_17 = _mm256_add_pd(q_acc_1_17,q_tmp_1_17); + q_acc_1_18 = _mm256_add_pd(q_acc_1_18,q_tmp_1_18); + q_acc_1_19 = _mm256_add_pd(q_acc_1_19,q_tmp_1_19); + q_acc_1_20 = _mm256_add_pd(q_acc_1_20,q_tmp_1_20); + q_acc_1_21 = _mm256_add_pd(q_acc_1_21,q_tmp_1_21); + q_acc_1_22 = _mm256_add_pd(q_acc_1_22,q_tmp_1_22); + q_acc_1_23 = _mm256_add_pd(q_acc_1_23,q_tmp_1_23); + q_acc_1_24 = _mm256_add_pd(q_acc_1_24,q_tmp_1_24); + q_acc_1_25 = _mm256_add_pd(q_acc_1_25,q_tmp_1_25); + q_acc_1_26 = _mm256_add_pd(q_acc_1_26,q_tmp_1_26); + q_acc_1_27 = _mm256_add_pd(q_acc_1_27,q_tmp_1_27); + q_acc_1_28 = _mm256_add_pd(q_acc_1_28,q_tmp_1_28); + q_acc_1_29 = _mm256_add_pd(q_acc_1_29,q_tmp_1_29); + q_acc_2_0 = _mm256_add_pd(q_acc_2_0,q_tmp_2_0); + q_acc_2_1 = _mm256_add_pd(q_acc_2_1,q_tmp_2_1); + q_acc_2_2 = _mm256_add_pd(q_acc_2_2,q_tmp_2_2); + q_acc_2_3 = _mm256_add_pd(q_acc_2_3,q_tmp_2_3); + q_acc_2_4 = _mm256_add_pd(q_acc_2_4,q_tmp_2_4); + q_acc_2_5 = _mm256_add_pd(q_acc_2_5,q_tmp_2_5); + q_acc_2_6 = _mm256_add_pd(q_acc_2_6,q_tmp_2_6); + q_acc_2_7 = _mm256_add_pd(q_acc_2_7,q_tmp_2_7); + q_acc_2_8 = _mm256_add_pd(q_acc_2_8,q_tmp_2_8); + q_acc_2_9 = _mm256_add_pd(q_acc_2_9,q_tmp_2_9); + q_acc_2_10 = _mm256_add_pd(q_acc_2_10,q_tmp_2_10); + q_acc_2_11 = _mm256_add_pd(q_acc_2_11,q_tmp_2_11); + q_acc_2_12 = _mm256_add_pd(q_acc_2_12,q_tmp_2_12); + q_acc_2_13 = _mm256_add_pd(q_acc_2_13,q_tmp_2_13); + q_acc_2_14 = _mm256_add_pd(q_acc_2_14,q_tmp_2_14); + q_acc_2_15 = _mm256_add_pd(q_acc_2_15,q_tmp_2_15); + q_acc_2_16 = _mm256_add_pd(q_acc_2_16,q_tmp_2_16); + q_acc_2_17 = _mm256_add_pd(q_acc_2_17,q_tmp_2_17); + q_acc_2_18 = _mm256_add_pd(q_acc_2_18,q_tmp_2_18); + q_acc_2_19 = _mm256_add_pd(q_acc_2_19,q_tmp_2_19); + q_acc_2_20 = _mm256_add_pd(q_acc_2_20,q_tmp_2_20); + q_acc_2_21 = _mm256_add_pd(q_acc_2_21,q_tmp_2_21); + q_acc_2_22 = _mm256_add_pd(q_acc_2_22,q_tmp_2_22); + q_acc_2_23 = _mm256_add_pd(q_acc_2_23,q_tmp_2_23); + q_acc_2_24 = _mm256_add_pd(q_acc_2_24,q_tmp_2_24); + q_acc_2_25 = _mm256_add_pd(q_acc_2_25,q_tmp_2_25); + q_acc_2_26 = _mm256_add_pd(q_acc_2_26,q_tmp_2_26); + q_acc_2_27 = _mm256_add_pd(q_acc_2_27,q_tmp_2_27); + q_acc_2_28 = _mm256_add_pd(q_acc_2_28,q_tmp_2_28); + q_acc_2_29 = _mm256_add_pd(q_acc_2_29,q_tmp_2_29); + q_acc_3_0 = _mm256_add_pd(q_acc_3_0,q_tmp_3_0); + q_acc_3_1 = _mm256_add_pd(q_acc_3_1,q_tmp_3_1); + q_acc_3_2 = _mm256_add_pd(q_acc_3_2,q_tmp_3_2); + q_acc_3_3 = _mm256_add_pd(q_acc_3_3,q_tmp_3_3); + q_acc_3_4 = _mm256_add_pd(q_acc_3_4,q_tmp_3_4); + q_acc_3_5 = _mm256_add_pd(q_acc_3_5,q_tmp_3_5); + q_acc_3_6 = _mm256_add_pd(q_acc_3_6,q_tmp_3_6); + q_acc_3_7 = _mm256_add_pd(q_acc_3_7,q_tmp_3_7); + q_acc_3_8 = _mm256_add_pd(q_acc_3_8,q_tmp_3_8); + q_acc_3_9 = _mm256_add_pd(q_acc_3_9,q_tmp_3_9); + q_acc_3_10 = _mm256_add_pd(q_acc_3_10,q_tmp_3_10); + q_acc_3_11 = _mm256_add_pd(q_acc_3_11,q_tmp_3_11); + q_acc_3_12 = _mm256_add_pd(q_acc_3_12,q_tmp_3_12); + q_acc_3_13 = _mm256_add_pd(q_acc_3_13,q_tmp_3_13); + q_acc_3_14 = _mm256_add_pd(q_acc_3_14,q_tmp_3_14); + q_acc_3_15 = _mm256_add_pd(q_acc_3_15,q_tmp_3_15); + q_acc_3_16 = _mm256_add_pd(q_acc_3_16,q_tmp_3_16); + q_acc_3_17 = _mm256_add_pd(q_acc_3_17,q_tmp_3_17); + q_acc_3_18 = _mm256_add_pd(q_acc_3_18,q_tmp_3_18); + q_acc_3_19 = _mm256_add_pd(q_acc_3_19,q_tmp_3_19); + q_acc_3_20 = _mm256_add_pd(q_acc_3_20,q_tmp_3_20); + q_acc_3_21 = _mm256_add_pd(q_acc_3_21,q_tmp_3_21); + q_acc_3_22 = _mm256_add_pd(q_acc_3_22,q_tmp_3_22); + q_acc_3_23 = _mm256_add_pd(q_acc_3_23,q_tmp_3_23); + q_acc_3_24 = _mm256_add_pd(q_acc_3_24,q_tmp_3_24); + q_acc_3_25 = _mm256_add_pd(q_acc_3_25,q_tmp_3_25); + q_acc_3_26 = _mm256_add_pd(q_acc_3_26,q_tmp_3_26); + q_acc_3_27 = _mm256_add_pd(q_acc_3_27,q_tmp_3_27); + q_acc_3_28 = _mm256_add_pd(q_acc_3_28,q_tmp_3_28); + q_acc_3_29 = _mm256_add_pd(q_acc_3_29,q_tmp_3_29); + } + const __m256d elMatVec_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_0_0,src_dof_0),_mm256_mul_pd(q_acc_0_1,src_dof_1)),_mm256_mul_pd(q_acc_0_10,src_dof_10)),_mm256_mul_pd(q_acc_0_11,src_dof_11)),_mm256_mul_pd(q_acc_0_12,src_dof_12)),_mm256_mul_pd(q_acc_0_13,src_dof_13)),_mm256_mul_pd(q_acc_0_14,src_dof_14)),_mm256_mul_pd(q_acc_0_15,src_dof_15)),_mm256_mul_pd(q_acc_0_16,src_dof_16)),_mm256_mul_pd(q_acc_0_17,src_dof_17)),_mm256_mul_pd(q_acc_0_18,src_dof_18)),_mm256_mul_pd(q_acc_0_19,src_dof_19)),_mm256_mul_pd(q_acc_0_2,src_dof_2)),_mm256_mul_pd(q_acc_0_20,src_dof_20)),_mm256_mul_pd(q_acc_0_21,src_dof_21)),_mm256_mul_pd(q_acc_0_22,src_dof_22)),_mm256_mul_pd(q_acc_0_23,src_dof_23)),_mm256_mul_pd(q_acc_0_24,src_dof_24)),_mm256_mul_pd(q_acc_0_25,src_dof_25)),_mm256_mul_pd(q_acc_0_26,src_dof_26)),_mm256_mul_pd(q_acc_0_27,src_dof_27)),_mm256_mul_pd(q_acc_0_28,src_dof_28)),_mm256_mul_pd(q_acc_0_29,src_dof_29)),_mm256_mul_pd(q_acc_0_3,src_dof_3)),_mm256_mul_pd(q_acc_0_4,src_dof_4)),_mm256_mul_pd(q_acc_0_5,src_dof_5)),_mm256_mul_pd(q_acc_0_6,src_dof_6)),_mm256_mul_pd(q_acc_0_7,src_dof_7)),_mm256_mul_pd(q_acc_0_8,src_dof_8)),_mm256_mul_pd(q_acc_0_9,src_dof_9)); + const __m256d elMatVec_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_1_0,src_dof_0),_mm256_mul_pd(q_acc_1_1,src_dof_1)),_mm256_mul_pd(q_acc_1_10,src_dof_10)),_mm256_mul_pd(q_acc_1_11,src_dof_11)),_mm256_mul_pd(q_acc_1_12,src_dof_12)),_mm256_mul_pd(q_acc_1_13,src_dof_13)),_mm256_mul_pd(q_acc_1_14,src_dof_14)),_mm256_mul_pd(q_acc_1_15,src_dof_15)),_mm256_mul_pd(q_acc_1_16,src_dof_16)),_mm256_mul_pd(q_acc_1_17,src_dof_17)),_mm256_mul_pd(q_acc_1_18,src_dof_18)),_mm256_mul_pd(q_acc_1_19,src_dof_19)),_mm256_mul_pd(q_acc_1_2,src_dof_2)),_mm256_mul_pd(q_acc_1_20,src_dof_20)),_mm256_mul_pd(q_acc_1_21,src_dof_21)),_mm256_mul_pd(q_acc_1_22,src_dof_22)),_mm256_mul_pd(q_acc_1_23,src_dof_23)),_mm256_mul_pd(q_acc_1_24,src_dof_24)),_mm256_mul_pd(q_acc_1_25,src_dof_25)),_mm256_mul_pd(q_acc_1_26,src_dof_26)),_mm256_mul_pd(q_acc_1_27,src_dof_27)),_mm256_mul_pd(q_acc_1_28,src_dof_28)),_mm256_mul_pd(q_acc_1_29,src_dof_29)),_mm256_mul_pd(q_acc_1_3,src_dof_3)),_mm256_mul_pd(q_acc_1_4,src_dof_4)),_mm256_mul_pd(q_acc_1_5,src_dof_5)),_mm256_mul_pd(q_acc_1_6,src_dof_6)),_mm256_mul_pd(q_acc_1_7,src_dof_7)),_mm256_mul_pd(q_acc_1_8,src_dof_8)),_mm256_mul_pd(q_acc_1_9,src_dof_9)); + const __m256d elMatVec_2 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_2_0,src_dof_0),_mm256_mul_pd(q_acc_2_1,src_dof_1)),_mm256_mul_pd(q_acc_2_10,src_dof_10)),_mm256_mul_pd(q_acc_2_11,src_dof_11)),_mm256_mul_pd(q_acc_2_12,src_dof_12)),_mm256_mul_pd(q_acc_2_13,src_dof_13)),_mm256_mul_pd(q_acc_2_14,src_dof_14)),_mm256_mul_pd(q_acc_2_15,src_dof_15)),_mm256_mul_pd(q_acc_2_16,src_dof_16)),_mm256_mul_pd(q_acc_2_17,src_dof_17)),_mm256_mul_pd(q_acc_2_18,src_dof_18)),_mm256_mul_pd(q_acc_2_19,src_dof_19)),_mm256_mul_pd(q_acc_2_2,src_dof_2)),_mm256_mul_pd(q_acc_2_20,src_dof_20)),_mm256_mul_pd(q_acc_2_21,src_dof_21)),_mm256_mul_pd(q_acc_2_22,src_dof_22)),_mm256_mul_pd(q_acc_2_23,src_dof_23)),_mm256_mul_pd(q_acc_2_24,src_dof_24)),_mm256_mul_pd(q_acc_2_25,src_dof_25)),_mm256_mul_pd(q_acc_2_26,src_dof_26)),_mm256_mul_pd(q_acc_2_27,src_dof_27)),_mm256_mul_pd(q_acc_2_28,src_dof_28)),_mm256_mul_pd(q_acc_2_29,src_dof_29)),_mm256_mul_pd(q_acc_2_3,src_dof_3)),_mm256_mul_pd(q_acc_2_4,src_dof_4)),_mm256_mul_pd(q_acc_2_5,src_dof_5)),_mm256_mul_pd(q_acc_2_6,src_dof_6)),_mm256_mul_pd(q_acc_2_7,src_dof_7)),_mm256_mul_pd(q_acc_2_8,src_dof_8)),_mm256_mul_pd(q_acc_2_9,src_dof_9)); + const __m256d elMatVec_3 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_3_0,src_dof_0),_mm256_mul_pd(q_acc_3_1,src_dof_1)),_mm256_mul_pd(q_acc_3_10,src_dof_10)),_mm256_mul_pd(q_acc_3_11,src_dof_11)),_mm256_mul_pd(q_acc_3_12,src_dof_12)),_mm256_mul_pd(q_acc_3_13,src_dof_13)),_mm256_mul_pd(q_acc_3_14,src_dof_14)),_mm256_mul_pd(q_acc_3_15,src_dof_15)),_mm256_mul_pd(q_acc_3_16,src_dof_16)),_mm256_mul_pd(q_acc_3_17,src_dof_17)),_mm256_mul_pd(q_acc_3_18,src_dof_18)),_mm256_mul_pd(q_acc_3_19,src_dof_19)),_mm256_mul_pd(q_acc_3_2,src_dof_2)),_mm256_mul_pd(q_acc_3_20,src_dof_20)),_mm256_mul_pd(q_acc_3_21,src_dof_21)),_mm256_mul_pd(q_acc_3_22,src_dof_22)),_mm256_mul_pd(q_acc_3_23,src_dof_23)),_mm256_mul_pd(q_acc_3_24,src_dof_24)),_mm256_mul_pd(q_acc_3_25,src_dof_25)),_mm256_mul_pd(q_acc_3_26,src_dof_26)),_mm256_mul_pd(q_acc_3_27,src_dof_27)),_mm256_mul_pd(q_acc_3_28,src_dof_28)),_mm256_mul_pd(q_acc_3_29,src_dof_29)),_mm256_mul_pd(q_acc_3_3,src_dof_3)),_mm256_mul_pd(q_acc_3_4,src_dof_4)),_mm256_mul_pd(q_acc_3_5,src_dof_5)),_mm256_mul_pd(q_acc_3_6,src_dof_6)),_mm256_mul_pd(q_acc_3_7,src_dof_7)),_mm256_mul_pd(q_acc_3_8,src_dof_8)),_mm256_mul_pd(q_acc_3_9,src_dof_9)); + _mm256_storeu_pd(&_data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1],_mm256_add_pd(elMatVec_0,_mm256_loadu_pd(& _data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]))); + _mm256_storeu_pd(&_data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1],_mm256_add_pd(elMatVec_1,_mm256_loadu_pd(& _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]))); + _mm256_storeu_pd(&_data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))],_mm256_add_pd(elMatVec_2,_mm256_loadu_pd(& _data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]))); + _mm256_storeu_pd(&_data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1],_mm256_add_pd(elMatVec_3,_mm256_loadu_pd(& _data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]))); + } + for (int64_t ctr_0 = (int64_t)((-ctr_1 - ctr_2 + micro_edges_per_macro_edge - 2) / (4)) * (4); ctr_0 < -ctr_1 - ctr_2 + micro_edges_per_macro_edge - 2; ctr_0 += 1) + { + + const int64_t phantom_ctr_0 = ctr_0; + real_t _data_float_loop_ctr_array_dim_0[4]; + _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0; + _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1; + _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2; + _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3; + real_t _data_float_loop_ctr_array_dim_1[4]; + _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1; + real_t _data_float_loop_ctr_array_dim_2[4]; + _data_float_loop_ctr_array_dim_2[0] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[1] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[2] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[3] = (real_t) ctr_2; + + const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_0_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_1_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_2_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_3_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_3_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_3_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t src_dof_0 = _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]; + const real_t src_dof_1 = _data_src_vertex_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]; + const real_t src_dof_2 = _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]; + const real_t src_dof_3 = _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]; + const real_t src_dof_4 = _data_src_edge_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]; + const real_t src_dof_5 = _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6)) + 1]; + const real_t src_dof_6 = _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]; + const real_t src_dof_7 = _data_src_edge_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]; + const real_t src_dof_8 = _data_src_edge_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 5*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t src_dof_9 = _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]; + const real_t src_dof_10 = _data_src_vertex_1[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]; + const real_t src_dof_11 = _data_src_vertex_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]; + const real_t src_dof_12 = _data_src_vertex_1[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]; + const real_t src_dof_13 = _data_src_vertex_1[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]; + const real_t src_dof_14 = _data_src_edge_1[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]; + const real_t src_dof_15 = _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6)) + 1]; + const real_t src_dof_16 = _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]; + const real_t src_dof_17 = _data_src_edge_1[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]; + const real_t src_dof_18 = _data_src_edge_1[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 5*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t src_dof_19 = _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]; + const real_t src_dof_20 = _data_src_vertex_2[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]; + const real_t src_dof_21 = _data_src_vertex_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]; + const real_t src_dof_22 = _data_src_vertex_2[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]; + const real_t src_dof_23 = _data_src_vertex_2[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]; + const real_t src_dof_24 = _data_src_edge_2[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]; + const real_t src_dof_25 = _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6)) + 1]; + const real_t src_dof_26 = _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]; + const real_t src_dof_27 = _data_src_edge_2[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]; + const real_t src_dof_28 = _data_src_edge_2[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 5*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t src_dof_29 = _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]; + const real_t rho_dof_0 = _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]; + const real_t rho_dof_1 = _data_rhoVertex[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]; + const real_t rho_dof_2 = _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]; + const real_t rho_dof_3 = _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]; + const real_t rho_dof_4 = _data_rhoEdge[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]; + const real_t rho_dof_5 = _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6)) + 1]; + const real_t rho_dof_6 = _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]; + const real_t rho_dof_7 = _data_rhoEdge[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]; + const real_t rho_dof_8 = _data_rhoEdge[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 5*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t rho_dof_9 = _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]; + real_t q_acc_0_0 = 0.0; + real_t q_acc_0_1 = 0.0; + real_t q_acc_0_2 = 0.0; + real_t q_acc_0_3 = 0.0; + real_t q_acc_0_4 = 0.0; + real_t q_acc_0_5 = 0.0; + real_t q_acc_0_6 = 0.0; + real_t q_acc_0_7 = 0.0; + real_t q_acc_0_8 = 0.0; + real_t q_acc_0_9 = 0.0; + real_t q_acc_0_10 = 0.0; + real_t q_acc_0_11 = 0.0; + real_t q_acc_0_12 = 0.0; + real_t q_acc_0_13 = 0.0; + real_t q_acc_0_14 = 0.0; + real_t q_acc_0_15 = 0.0; + real_t q_acc_0_16 = 0.0; + real_t q_acc_0_17 = 0.0; + real_t q_acc_0_18 = 0.0; + real_t q_acc_0_19 = 0.0; + real_t q_acc_0_20 = 0.0; + real_t q_acc_0_21 = 0.0; + real_t q_acc_0_22 = 0.0; + real_t q_acc_0_23 = 0.0; + real_t q_acc_0_24 = 0.0; + real_t q_acc_0_25 = 0.0; + real_t q_acc_0_26 = 0.0; + real_t q_acc_0_27 = 0.0; + real_t q_acc_0_28 = 0.0; + real_t q_acc_0_29 = 0.0; + real_t q_acc_1_0 = 0.0; + real_t q_acc_1_1 = 0.0; + real_t q_acc_1_2 = 0.0; + real_t q_acc_1_3 = 0.0; + real_t q_acc_1_4 = 0.0; + real_t q_acc_1_5 = 0.0; + real_t q_acc_1_6 = 0.0; + real_t q_acc_1_7 = 0.0; + real_t q_acc_1_8 = 0.0; + real_t q_acc_1_9 = 0.0; + real_t q_acc_1_10 = 0.0; + real_t q_acc_1_11 = 0.0; + real_t q_acc_1_12 = 0.0; + real_t q_acc_1_13 = 0.0; + real_t q_acc_1_14 = 0.0; + real_t q_acc_1_15 = 0.0; + real_t q_acc_1_16 = 0.0; + real_t q_acc_1_17 = 0.0; + real_t q_acc_1_18 = 0.0; + real_t q_acc_1_19 = 0.0; + real_t q_acc_1_20 = 0.0; + real_t q_acc_1_21 = 0.0; + real_t q_acc_1_22 = 0.0; + real_t q_acc_1_23 = 0.0; + real_t q_acc_1_24 = 0.0; + real_t q_acc_1_25 = 0.0; + real_t q_acc_1_26 = 0.0; + real_t q_acc_1_27 = 0.0; + real_t q_acc_1_28 = 0.0; + real_t q_acc_1_29 = 0.0; + real_t q_acc_2_0 = 0.0; + real_t q_acc_2_1 = 0.0; + real_t q_acc_2_2 = 0.0; + real_t q_acc_2_3 = 0.0; + real_t q_acc_2_4 = 0.0; + real_t q_acc_2_5 = 0.0; + real_t q_acc_2_6 = 0.0; + real_t q_acc_2_7 = 0.0; + real_t q_acc_2_8 = 0.0; + real_t q_acc_2_9 = 0.0; + real_t q_acc_2_10 = 0.0; + real_t q_acc_2_11 = 0.0; + real_t q_acc_2_12 = 0.0; + real_t q_acc_2_13 = 0.0; + real_t q_acc_2_14 = 0.0; + real_t q_acc_2_15 = 0.0; + real_t q_acc_2_16 = 0.0; + real_t q_acc_2_17 = 0.0; + real_t q_acc_2_18 = 0.0; + real_t q_acc_2_19 = 0.0; + real_t q_acc_2_20 = 0.0; + real_t q_acc_2_21 = 0.0; + real_t q_acc_2_22 = 0.0; + real_t q_acc_2_23 = 0.0; + real_t q_acc_2_24 = 0.0; + real_t q_acc_2_25 = 0.0; + real_t q_acc_2_26 = 0.0; + real_t q_acc_2_27 = 0.0; + real_t q_acc_2_28 = 0.0; + real_t q_acc_2_29 = 0.0; + real_t q_acc_3_0 = 0.0; + real_t q_acc_3_1 = 0.0; + real_t q_acc_3_2 = 0.0; + real_t q_acc_3_3 = 0.0; + real_t q_acc_3_4 = 0.0; + real_t q_acc_3_5 = 0.0; + real_t q_acc_3_6 = 0.0; + real_t q_acc_3_7 = 0.0; + real_t q_acc_3_8 = 0.0; + real_t q_acc_3_9 = 0.0; + real_t q_acc_3_10 = 0.0; + real_t q_acc_3_11 = 0.0; + real_t q_acc_3_12 = 0.0; + real_t q_acc_3_13 = 0.0; + real_t q_acc_3_14 = 0.0; + real_t q_acc_3_15 = 0.0; + real_t q_acc_3_16 = 0.0; + real_t q_acc_3_17 = 0.0; + real_t q_acc_3_18 = 0.0; + real_t q_acc_3_19 = 0.0; + real_t q_acc_3_20 = 0.0; + real_t q_acc_3_21 = 0.0; + real_t q_acc_3_22 = 0.0; + real_t q_acc_3_23 = 0.0; + real_t q_acc_3_24 = 0.0; + real_t q_acc_3_25 = 0.0; + real_t q_acc_3_26 = 0.0; + real_t q_acc_3_27 = 0.0; + real_t q_acc_3_28 = 0.0; + real_t q_acc_3_29 = 0.0; + for (int64_t q = 0; q < 5; q += 1) + { + const real_t tmp_qloop_18 = p_affine_0_0 + (-p_affine_0_0 + p_affine_1_0)*_data_q_p_0[q] + (-p_affine_0_0 + p_affine_2_0)*_data_q_p_1[q] + (-p_affine_0_0 + p_affine_3_0)*_data_q_p_2[q]; + const real_t tmp_qloop_19 = (tmp_qloop_18*tmp_qloop_18); + const real_t tmp_qloop_22 = p_affine_0_2 + (-p_affine_0_2 + p_affine_1_2)*_data_q_p_0[q] + (-p_affine_0_2 + p_affine_2_2)*_data_q_p_1[q] + (-p_affine_0_2 + p_affine_3_2)*_data_q_p_2[q]; + const real_t tmp_qloop_23 = -tmp_qloop_12 - tmp_qloop_22; + const real_t tmp_qloop_24 = -tmp_qloop_1 - tmp_qloop_18; + const real_t tmp_qloop_25 = p_affine_0_1 + (-p_affine_0_1 + p_affine_1_1)*_data_q_p_0[q] + (-p_affine_0_1 + p_affine_2_1)*_data_q_p_1[q] + (-p_affine_0_1 + p_affine_3_1)*_data_q_p_2[q]; + const real_t tmp_qloop_26 = -tmp_qloop_25 - tmp_qloop_8; + const real_t tmp_qloop_27 = tmp_qloop_21*(tmp_qloop_10*tmp_qloop_23*tmp_qloop_9 + tmp_qloop_13*tmp_qloop_24*tmp_qloop_6 - tmp_qloop_14*tmp_qloop_24 - tmp_qloop_15*tmp_qloop_26 + tmp_qloop_2*tmp_qloop_26*tmp_qloop_3 - tmp_qloop_23*tmp_qloop_7); + const real_t tmp_qloop_28 = tmp_qloop_20 - tmp_qloop_27; + const real_t tmp_qloop_29 = (tmp_qloop_25*tmp_qloop_25); + const real_t tmp_qloop_30 = (tmp_qloop_22*tmp_qloop_22); + const real_t tmp_qloop_31 = tmp_qloop_19 + tmp_qloop_29 + tmp_qloop_30; + const real_t tmp_qloop_34 = tmp_qloop_18*tmp_qloop_33; + const real_t tmp_qloop_35 = tmp_qloop_28 + tmp_qloop_34; + const real_t tmp_qloop_36 = tmp_qloop_17*(-tmp_qloop_19*tmp_qloop_28 + tmp_qloop_31*tmp_qloop_35); + const real_t tmp_qloop_37 = pow(tmp_qloop_31, -1.5000000000000000); + const real_t tmp_qloop_38 = tmp_qloop_37*1.0; + const real_t tmp_qloop_41 = tmp_qloop_31*tmp_qloop_40; + const real_t tmp_qloop_42 = -tmp_qloop_25*tmp_qloop_28 - tmp_qloop_41; + const real_t tmp_qloop_43 = tmp_qloop_17*tmp_qloop_38; + const real_t tmp_qloop_44 = tmp_qloop_42*tmp_qloop_43; + const real_t tmp_qloop_47 = -tmp_qloop_28; + const real_t tmp_qloop_48 = tmp_qloop_22*tmp_qloop_47; + const real_t tmp_qloop_49 = tmp_qloop_31*tmp_qloop_46 + tmp_qloop_48; + const real_t tmp_qloop_50 = tmp_qloop_43*tmp_qloop_49; + const real_t tmp_qloop_51 = tmp_qloop_18*tmp_qloop_47 + tmp_qloop_31*tmp_qloop_33; + const real_t tmp_qloop_52 = tmp_qloop_43*tmp_qloop_51; + const real_t tmp_qloop_54 = tmp_qloop_25*tmp_qloop_53 + tmp_qloop_28; + const real_t tmp_qloop_55 = tmp_qloop_29*tmp_qloop_47 + tmp_qloop_31*tmp_qloop_54; + const real_t tmp_qloop_56 = tmp_qloop_22*tmp_qloop_46; + const real_t tmp_qloop_57 = tmp_qloop_28 + tmp_qloop_56; + const real_t tmp_qloop_58 = tmp_qloop_30*tmp_qloop_47 + tmp_qloop_31*tmp_qloop_57; + const real_t tmp_qloop_66 = pow(tmp_qloop_31, -2.5000000000000000)*3.0; + const real_t tmp_qloop_67 = tmp_qloop_36*tmp_qloop_66; + const real_t tmp_qloop_69 = tmp_qloop_18*2.0; + const real_t tmp_qloop_70 = -tmp_qloop_41; + const real_t tmp_qloop_71 = tmp_qloop_35*2.0; + const real_t tmp_qloop_73 = -tmp_qloop_31*tmp_qloop_72; + const real_t tmp_qloop_74 = -tmp_qloop_20 + tmp_qloop_27; + const real_t tmp_qloop_75 = tmp_qloop_18*tmp_qloop_68 + tmp_qloop_34*2.0 + tmp_qloop_74; + const real_t tmp_qloop_76 = tmp_qloop_17*tmp_qloop_66; + const real_t tmp_qloop_77 = tmp_qloop_51*tmp_qloop_76; + const real_t tmp_qloop_78 = tmp_qloop_25*tmp_qloop_77; + const real_t tmp_qloop_79 = tmp_qloop_25*2.0; + const real_t tmp_qloop_80 = tmp_qloop_18*tmp_qloop_40 + tmp_qloop_33*tmp_qloop_79; + const real_t tmp_qloop_81 = tmp_qloop_25*tmp_qloop_43; + const real_t tmp_qloop_82 = tmp_qloop_22*2.0; + const real_t tmp_qloop_83 = tmp_qloop_18*tmp_qloop_72 + tmp_qloop_33*tmp_qloop_82; + const real_t tmp_qloop_84 = -tmp_qloop_22*tmp_qloop_78; + const real_t tmp_qloop_85 = tmp_qloop_18*tmp_qloop_22; + const real_t tmp_qloop_86 = tmp_qloop_22*tmp_qloop_43; + const real_t tmp_qloop_87 = tmp_qloop_21*tmp_qloop_25*tmp_qloop_32 - tmp_qloop_40*tmp_qloop_69; + const real_t tmp_qloop_88 = tmp_qloop_18*tmp_qloop_43; + const real_t tmp_qloop_89 = tmp_qloop_42*tmp_qloop_76; + const real_t tmp_qloop_90 = -tmp_qloop_25*tmp_qloop_40 - tmp_qloop_28; + const real_t tmp_qloop_91 = tmp_qloop_25*tmp_qloop_89; + const real_t tmp_qloop_92 = tmp_qloop_25*tmp_qloop_72 - tmp_qloop_40*tmp_qloop_82; + const real_t tmp_qloop_93 = tmp_qloop_85*tmp_qloop_89; + const real_t tmp_qloop_94 = -tmp_qloop_31*tmp_qloop_68; + const real_t tmp_qloop_95 = tmp_qloop_55*tmp_qloop_76; + const real_t tmp_qloop_96 = tmp_qloop_22*tmp_qloop_68 + tmp_qloop_46*tmp_qloop_69; + const real_t tmp_qloop_97 = tmp_qloop_49*tmp_qloop_76; + const real_t tmp_qloop_98 = tmp_qloop_22*tmp_qloop_40 + tmp_qloop_46*tmp_qloop_79; + const real_t tmp_qloop_99 = tmp_qloop_25*tmp_qloop_97; + const real_t tmp_qloop_100 = tmp_qloop_18*tmp_qloop_99; + const real_t tmp_qloop_101 = tmp_qloop_22*tmp_qloop_72 + tmp_qloop_56*2.0 + tmp_qloop_74; + const real_t tmp_qloop_102 = tmp_qloop_58*tmp_qloop_76; + const real_t tmp_qloop_103 = (_data_q_p_0[q]*_data_q_p_0[q]); + const real_t tmp_qloop_104 = tmp_qloop_103*2.0; + const real_t tmp_qloop_105 = (_data_q_p_1[q]*_data_q_p_1[q]); + const real_t tmp_qloop_106 = tmp_qloop_105*2.0; + const real_t tmp_qloop_107 = (_data_q_p_2[q]*_data_q_p_2[q]); + const real_t tmp_qloop_108 = tmp_qloop_107*2.0; + const real_t tmp_qloop_109 = 4.0*_data_q_p_1[q]; + const real_t tmp_qloop_110 = tmp_qloop_109*_data_q_p_2[q]; + const real_t tmp_qloop_111 = 4.0*_data_q_p_0[q]; + const real_t tmp_qloop_112 = tmp_qloop_111*_data_q_p_1[q]; + const real_t tmp_qloop_113 = tmp_qloop_111*_data_q_p_2[q]; + const real_t tmp_qloop_114 = tmp_qloop_112 + tmp_qloop_113; + const real_t tmp_qloop_115 = tmp_qloop_104 + tmp_qloop_106 + tmp_qloop_108 + tmp_qloop_110 + tmp_qloop_114 - 3.0*_data_q_p_0[q] - 3.0*_data_q_p_1[q] - 3.0*_data_q_p_2[q] + 1.0; + const real_t tmp_qloop_116 = 4.0*_data_q_p_2[q]; + const real_t tmp_qloop_117 = tmp_qloop_109 + tmp_qloop_116; + const real_t tmp_qloop_118 = -rho_dof_8*tmp_qloop_109; + const real_t tmp_qloop_119 = rho_dof_0*(tmp_qloop_111 + tmp_qloop_117 - 3.0); + const real_t tmp_qloop_120 = -rho_dof_7*tmp_qloop_116 + tmp_qloop_119; + const real_t tmp_qloop_121 = rho_dof_1*(tmp_qloop_111 - 1.0) + rho_dof_5*tmp_qloop_116 + rho_dof_6*tmp_qloop_109 + rho_dof_9*(-tmp_qloop_117 - 8.0*_data_q_p_0[q] + 4.0) + tmp_qloop_118 + tmp_qloop_120; + const real_t tmp_qloop_122 = tmp_qloop_111 - 4.0; + const real_t tmp_qloop_123 = -rho_dof_9*tmp_qloop_111; + const real_t tmp_qloop_124 = rho_dof_2*(tmp_qloop_109 - 1.0) + rho_dof_4*tmp_qloop_116 + rho_dof_6*tmp_qloop_111 + rho_dof_8*(-tmp_qloop_116 - tmp_qloop_122 - 8.0*_data_q_p_1[q]) + tmp_qloop_120 + tmp_qloop_123; + const real_t tmp_qloop_125 = rho_dof_3*(tmp_qloop_116 - 1.0) + rho_dof_4*tmp_qloop_109 + rho_dof_5*tmp_qloop_111 + rho_dof_7*(-tmp_qloop_109 - tmp_qloop_122 - 8.0*_data_q_p_2[q]) + tmp_qloop_118 + tmp_qloop_119 + tmp_qloop_123; + const real_t tmp_qloop_127 = tmp_qloop_104 - _data_q_p_0[q]; + const real_t tmp_qloop_128 = tmp_qloop_106 - _data_q_p_1[q]; + const real_t tmp_qloop_129 = tmp_qloop_108 - _data_q_p_2[q]; + const real_t tmp_qloop_130 = tmp_qloop_107*-4.0 - tmp_qloop_110 - tmp_qloop_113 + tmp_qloop_116; + const real_t tmp_qloop_131 = tmp_qloop_105*4.0; + const real_t tmp_qloop_132 = tmp_qloop_109 - tmp_qloop_110 - tmp_qloop_112 - tmp_qloop_131; + const real_t tmp_qloop_133 = tmp_qloop_103*4.0; + const real_t tmp_qloop_134 = tmp_qloop_111 - tmp_qloop_114 - tmp_qloop_133; + const real_t tmp_qloop_158 = tmp_qloop_105*tmp_qloop_111; + const real_t jac_blending_0_0 = tmp_qloop_36*tmp_qloop_38; + const real_t jac_blending_0_1 = tmp_qloop_18*tmp_qloop_44; + const real_t jac_blending_0_2 = tmp_qloop_18*tmp_qloop_50; + const real_t jac_blending_1_0 = tmp_qloop_25*tmp_qloop_52; + const real_t jac_blending_1_1 = tmp_qloop_43*tmp_qloop_55; + const real_t tmp_qloop_63 = jac_blending_0_2*jac_blending_1_1; + const real_t jac_blending_1_2 = tmp_qloop_25*tmp_qloop_50; + const real_t tmp_qloop_60 = jac_blending_0_1*jac_blending_1_2; + const real_t jac_blending_2_0 = tmp_qloop_22*tmp_qloop_52; + const real_t jac_blending_2_1 = tmp_qloop_22*tmp_qloop_44; + const real_t tmp_qloop_61 = jac_blending_1_2*jac_blending_2_1; + const real_t jac_blending_2_2 = tmp_qloop_43*tmp_qloop_58; + const real_t tmp_qloop_59 = jac_blending_1_1*jac_blending_2_2; + const real_t tmp_qloop_62 = jac_blending_0_1*jac_blending_2_2; + const real_t tmp_qloop_64 = jac_blending_0_0*tmp_qloop_59 - jac_blending_0_0*tmp_qloop_61 + jac_blending_0_2*jac_blending_1_0*jac_blending_2_1 - jac_blending_1_0*tmp_qloop_62 + jac_blending_2_0*tmp_qloop_60 - jac_blending_2_0*tmp_qloop_63; + const real_t tmp_qloop_65 = 1.0 / (tmp_qloop_64); + const real_t abs_det_jac_blending = tmp_qloop_64; + const real_t tmp_qloop_135 = abs_det_jac_affine_WHITE_DOWN*abs_det_jac_blending*1.0 / (rho_dof_0*tmp_qloop_115 + rho_dof_1*tmp_qloop_127 + rho_dof_2*tmp_qloop_128 + rho_dof_3*tmp_qloop_129 + rho_dof_4*tmp_qloop_110 + rho_dof_5*tmp_qloop_113 + rho_dof_6*tmp_qloop_112 + rho_dof_7*tmp_qloop_130 + rho_dof_8*tmp_qloop_132 + rho_dof_9*tmp_qloop_134)*_data_q_w[q]; + const real_t tmp_qloop_136 = tmp_qloop_135*(1.0 - _data_q_p_0[q] - _data_q_p_1[q] - _data_q_p_2[q]); + const real_t tmp_qloop_142 = tmp_qloop_135*_data_q_p_0[q]; + const real_t tmp_qloop_144 = tmp_qloop_135*_data_q_p_2[q]; + const real_t tmp_qloop_147 = tmp_qloop_135*_data_q_p_1[q]; + const real_t tmp_qloop_159 = tmp_qloop_135*tmp_qloop_158; + const real_t tmp_qloop_161 = tmp_qloop_107*tmp_qloop_135; + const real_t jac_blending_inv_0_0 = tmp_qloop_65*(tmp_qloop_59 - tmp_qloop_61); + const real_t jac_blending_inv_0_1 = tmp_qloop_65*(jac_blending_0_2*jac_blending_2_1 - tmp_qloop_62); + const real_t jac_blending_inv_0_2 = tmp_qloop_65*(tmp_qloop_60 - tmp_qloop_63); + const real_t jac_blending_inv_1_0 = tmp_qloop_65*(-jac_blending_1_0*jac_blending_2_2 + jac_blending_1_2*jac_blending_2_0); + const real_t jac_blending_inv_1_1 = tmp_qloop_65*(jac_blending_0_0*jac_blending_2_2 - jac_blending_0_2*jac_blending_2_0); + const real_t jac_blending_inv_1_2 = tmp_qloop_65*(-jac_blending_0_0*jac_blending_1_2 + jac_blending_0_2*jac_blending_1_0); + const real_t jac_blending_inv_2_0 = tmp_qloop_65*(jac_blending_1_0*jac_blending_2_1 - jac_blending_1_1*jac_blending_2_0); + const real_t tmp_qloop_126 = tmp_qloop_121*(jac_affine_inv_0_0_WHITE_DOWN*jac_blending_inv_0_0 + jac_affine_inv_0_1_WHITE_DOWN*jac_blending_inv_1_0 + jac_affine_inv_0_2_WHITE_DOWN*jac_blending_inv_2_0) + tmp_qloop_124*(jac_affine_inv_1_0_WHITE_DOWN*jac_blending_inv_0_0 + jac_affine_inv_1_1_WHITE_DOWN*jac_blending_inv_1_0 + jac_affine_inv_1_2_WHITE_DOWN*jac_blending_inv_2_0) + tmp_qloop_125*(jac_affine_inv_2_0_WHITE_DOWN*jac_blending_inv_0_0 + jac_affine_inv_2_1_WHITE_DOWN*jac_blending_inv_1_0 + jac_affine_inv_2_2_WHITE_DOWN*jac_blending_inv_2_0); + const real_t tmp_qloop_137 = tmp_qloop_126*tmp_qloop_136; + const real_t tmp_qloop_143 = tmp_qloop_126*tmp_qloop_142; + const real_t tmp_qloop_145 = tmp_qloop_126*tmp_qloop_144; + const real_t tmp_qloop_146 = tmp_qloop_112*tmp_qloop_145; + const real_t tmp_qloop_148 = tmp_qloop_126*tmp_qloop_147; + const real_t tmp_qloop_157 = tmp_qloop_126*tmp_qloop_135; + const real_t tmp_qloop_160 = tmp_qloop_107*tmp_qloop_157; + const real_t jac_blending_inv_2_1 = tmp_qloop_65*(-jac_blending_0_0*jac_blending_2_1 + jac_blending_0_1*jac_blending_2_0); + const real_t tmp_qloop_138 = tmp_qloop_121*(jac_affine_inv_0_0_WHITE_DOWN*jac_blending_inv_0_1 + jac_affine_inv_0_1_WHITE_DOWN*jac_blending_inv_1_1 + jac_affine_inv_0_2_WHITE_DOWN*jac_blending_inv_2_1) + tmp_qloop_124*(jac_affine_inv_1_0_WHITE_DOWN*jac_blending_inv_0_1 + jac_affine_inv_1_1_WHITE_DOWN*jac_blending_inv_1_1 + jac_affine_inv_1_2_WHITE_DOWN*jac_blending_inv_2_1) + tmp_qloop_125*(jac_affine_inv_2_0_WHITE_DOWN*jac_blending_inv_0_1 + jac_affine_inv_2_1_WHITE_DOWN*jac_blending_inv_1_1 + jac_affine_inv_2_2_WHITE_DOWN*jac_blending_inv_2_1); + const real_t tmp_qloop_139 = tmp_qloop_136*tmp_qloop_138; + const real_t tmp_qloop_149 = tmp_qloop_138*tmp_qloop_142; + const real_t tmp_qloop_150 = tmp_qloop_138*tmp_qloop_144; + const real_t tmp_qloop_151 = tmp_qloop_112*tmp_qloop_150; + const real_t tmp_qloop_152 = tmp_qloop_138*tmp_qloop_147; + const real_t tmp_qloop_162 = tmp_qloop_138*tmp_qloop_161; + const real_t jac_blending_inv_2_2 = tmp_qloop_65*(jac_blending_0_0*jac_blending_1_1 - jac_blending_0_1*jac_blending_1_0); + const real_t tmp_qloop_140 = tmp_qloop_121*(jac_affine_inv_0_0_WHITE_DOWN*jac_blending_inv_0_2 + jac_affine_inv_0_1_WHITE_DOWN*jac_blending_inv_1_2 + jac_affine_inv_0_2_WHITE_DOWN*jac_blending_inv_2_2) + tmp_qloop_124*(jac_affine_inv_1_0_WHITE_DOWN*jac_blending_inv_0_2 + jac_affine_inv_1_1_WHITE_DOWN*jac_blending_inv_1_2 + jac_affine_inv_1_2_WHITE_DOWN*jac_blending_inv_2_2) + tmp_qloop_125*(jac_affine_inv_2_0_WHITE_DOWN*jac_blending_inv_0_2 + jac_affine_inv_2_1_WHITE_DOWN*jac_blending_inv_1_2 + jac_affine_inv_2_2_WHITE_DOWN*jac_blending_inv_2_2); + const real_t tmp_qloop_141 = tmp_qloop_136*tmp_qloop_140; + const real_t tmp_qloop_153 = tmp_qloop_140*tmp_qloop_142; + const real_t tmp_qloop_154 = tmp_qloop_140*tmp_qloop_144; + const real_t tmp_qloop_155 = tmp_qloop_112*tmp_qloop_154; + const real_t tmp_qloop_156 = tmp_qloop_140*tmp_qloop_147; + const real_t tmp_qloop_163 = tmp_qloop_140*tmp_qloop_161; + const real_t hessian_blending_0_0_0 = tmp_qloop_17*tmp_qloop_37*(tmp_qloop_19*tmp_qloop_68 - tmp_qloop_28*tmp_qloop_69 + tmp_qloop_31*(tmp_qloop_33 - tmp_qloop_68) + tmp_qloop_35*tmp_qloop_69)*1.0 - tmp_qloop_18*tmp_qloop_67; + const real_t hessian_blending_1_0_0 = tmp_qloop_17*tmp_qloop_37*(tmp_qloop_19*tmp_qloop_40 + tmp_qloop_25*tmp_qloop_71 + tmp_qloop_70)*1.0 - tmp_qloop_25*tmp_qloop_67; + const real_t hessian_blending_2_0_0 = tmp_qloop_17*tmp_qloop_37*(tmp_qloop_19*tmp_qloop_72 + tmp_qloop_22*tmp_qloop_71 + tmp_qloop_73)*1.0 - tmp_qloop_22*tmp_qloop_67; + const real_t hessian_blending_0_0_1 = tmp_qloop_17*tmp_qloop_25*tmp_qloop_37*tmp_qloop_75*1.0 - tmp_qloop_18*tmp_qloop_78; + const real_t hessian_blending_1_0_1 = -tmp_qloop_29*tmp_qloop_77 + tmp_qloop_52 + tmp_qloop_80*tmp_qloop_81; + const real_t hessian_blending_2_0_1 = tmp_qloop_81*tmp_qloop_83 + tmp_qloop_84; + const real_t hessian_blending_0_0_2 = tmp_qloop_17*tmp_qloop_22*tmp_qloop_37*tmp_qloop_75*1.0 - tmp_qloop_77*tmp_qloop_85; + const real_t hessian_blending_1_0_2 = tmp_qloop_80*tmp_qloop_86 + tmp_qloop_84; + const real_t hessian_blending_2_0_2 = -tmp_qloop_30*tmp_qloop_77 + tmp_qloop_52 + tmp_qloop_83*tmp_qloop_86; + const real_t hessian_blending_0_1_0 = -tmp_qloop_19*tmp_qloop_89 + tmp_qloop_44 + tmp_qloop_87*tmp_qloop_88; + const real_t hessian_blending_1_1_0 = tmp_qloop_17*tmp_qloop_18*tmp_qloop_37*tmp_qloop_90*1.0 - tmp_qloop_18*tmp_qloop_91; + const real_t hessian_blending_2_1_0 = tmp_qloop_88*tmp_qloop_92 - tmp_qloop_93; + const real_t hessian_blending_0_1_1 = tmp_qloop_17*tmp_qloop_37*(tmp_qloop_29*tmp_qloop_68 + tmp_qloop_54*tmp_qloop_69 + tmp_qloop_94)*1.0 - tmp_qloop_18*tmp_qloop_95; + const real_t hessian_blending_1_1_1 = tmp_qloop_17*tmp_qloop_37*(tmp_qloop_29*tmp_qloop_40 + tmp_qloop_31*(-tmp_qloop_40 + tmp_qloop_53) + tmp_qloop_47*tmp_qloop_79 + tmp_qloop_54*tmp_qloop_79)*1.0 - tmp_qloop_25*tmp_qloop_95; + const real_t hessian_blending_2_1_1 = tmp_qloop_17*tmp_qloop_37*(tmp_qloop_29*tmp_qloop_72 + tmp_qloop_54*tmp_qloop_82 + tmp_qloop_73)*1.0 - tmp_qloop_22*tmp_qloop_95; + const real_t hessian_blending_0_1_2 = tmp_qloop_17*tmp_qloop_22*tmp_qloop_37*tmp_qloop_87*1.0 - tmp_qloop_93; + const real_t hessian_blending_1_1_2 = tmp_qloop_17*tmp_qloop_22*tmp_qloop_37*tmp_qloop_90*1.0 - tmp_qloop_22*tmp_qloop_91; + const real_t hessian_blending_2_1_2 = -tmp_qloop_30*tmp_qloop_89 + tmp_qloop_44 + tmp_qloop_86*tmp_qloop_92; + const real_t hessian_blending_0_2_0 = -tmp_qloop_19*tmp_qloop_97 + tmp_qloop_50 + tmp_qloop_88*tmp_qloop_96; + const real_t hessian_blending_1_2_0 = -tmp_qloop_100 + tmp_qloop_17*tmp_qloop_18*tmp_qloop_37*tmp_qloop_98*1.0; + const real_t hessian_blending_2_2_0 = tmp_qloop_101*tmp_qloop_17*tmp_qloop_18*tmp_qloop_37*1.0 - tmp_qloop_85*tmp_qloop_97; + const real_t hessian_blending_0_2_1 = -tmp_qloop_100 + tmp_qloop_17*tmp_qloop_25*tmp_qloop_37*tmp_qloop_96*1.0; + const real_t hessian_blending_1_2_1 = -tmp_qloop_29*tmp_qloop_97 + tmp_qloop_50 + tmp_qloop_81*tmp_qloop_98; + const real_t hessian_blending_2_2_1 = tmp_qloop_101*tmp_qloop_17*tmp_qloop_25*tmp_qloop_37*1.0 - tmp_qloop_22*tmp_qloop_99; + const real_t hessian_blending_0_2_2 = -tmp_qloop_102*tmp_qloop_18 + tmp_qloop_17*tmp_qloop_37*(tmp_qloop_30*tmp_qloop_68 + tmp_qloop_57*tmp_qloop_69 + tmp_qloop_94)*1.0; + const real_t hessian_blending_1_2_2 = -tmp_qloop_102*tmp_qloop_25 + tmp_qloop_17*tmp_qloop_37*(tmp_qloop_30*tmp_qloop_40 + tmp_qloop_57*tmp_qloop_79 + tmp_qloop_70)*1.0; + const real_t hessian_blending_2_2_2 = -tmp_qloop_102*tmp_qloop_22 + tmp_qloop_17*tmp_qloop_37*(tmp_qloop_30*tmp_qloop_72 + tmp_qloop_31*(tmp_qloop_46 - tmp_qloop_72) + tmp_qloop_48*2.0 + tmp_qloop_57*tmp_qloop_82)*1.0; + const real_t q_tmp_0_0 = tmp_qloop_115*tmp_qloop_137; + const real_t q_tmp_0_1 = tmp_qloop_127*tmp_qloop_137; + const real_t q_tmp_0_2 = tmp_qloop_128*tmp_qloop_137; + const real_t q_tmp_0_3 = tmp_qloop_129*tmp_qloop_137; + const real_t q_tmp_0_4 = tmp_qloop_110*tmp_qloop_137; + const real_t q_tmp_0_5 = tmp_qloop_113*tmp_qloop_137; + const real_t q_tmp_0_6 = tmp_qloop_112*tmp_qloop_137; + const real_t q_tmp_0_7 = tmp_qloop_130*tmp_qloop_137; + const real_t q_tmp_0_8 = tmp_qloop_132*tmp_qloop_137; + const real_t q_tmp_0_9 = tmp_qloop_134*tmp_qloop_137; + const real_t q_tmp_0_10 = tmp_qloop_115*tmp_qloop_139; + const real_t q_tmp_0_11 = tmp_qloop_127*tmp_qloop_139; + const real_t q_tmp_0_12 = tmp_qloop_128*tmp_qloop_139; + const real_t q_tmp_0_13 = tmp_qloop_129*tmp_qloop_139; + const real_t q_tmp_0_14 = tmp_qloop_110*tmp_qloop_139; + const real_t q_tmp_0_15 = tmp_qloop_113*tmp_qloop_139; + const real_t q_tmp_0_16 = tmp_qloop_112*tmp_qloop_139; + const real_t q_tmp_0_17 = tmp_qloop_130*tmp_qloop_139; + const real_t q_tmp_0_18 = tmp_qloop_132*tmp_qloop_139; + const real_t q_tmp_0_19 = tmp_qloop_134*tmp_qloop_139; + const real_t q_tmp_0_20 = tmp_qloop_115*tmp_qloop_141; + const real_t q_tmp_0_21 = tmp_qloop_127*tmp_qloop_141; + const real_t q_tmp_0_22 = tmp_qloop_128*tmp_qloop_141; + const real_t q_tmp_0_23 = tmp_qloop_129*tmp_qloop_141; + const real_t q_tmp_0_24 = tmp_qloop_110*tmp_qloop_141; + const real_t q_tmp_0_25 = tmp_qloop_113*tmp_qloop_141; + const real_t q_tmp_0_26 = tmp_qloop_112*tmp_qloop_141; + const real_t q_tmp_0_27 = tmp_qloop_130*tmp_qloop_141; + const real_t q_tmp_0_28 = tmp_qloop_132*tmp_qloop_141; + const real_t q_tmp_0_29 = tmp_qloop_134*tmp_qloop_141; + const real_t q_tmp_1_0 = tmp_qloop_115*tmp_qloop_143; + const real_t q_tmp_1_1 = tmp_qloop_127*tmp_qloop_143; + const real_t q_tmp_1_2 = tmp_qloop_128*tmp_qloop_143; + const real_t q_tmp_1_3 = tmp_qloop_129*tmp_qloop_143; + const real_t q_tmp_1_4 = tmp_qloop_146; + const real_t q_tmp_1_5 = tmp_qloop_133*tmp_qloop_145; + const real_t q_tmp_1_6 = tmp_qloop_133*tmp_qloop_148; + const real_t q_tmp_1_7 = tmp_qloop_130*tmp_qloop_143; + const real_t q_tmp_1_8 = tmp_qloop_132*tmp_qloop_143; + const real_t q_tmp_1_9 = tmp_qloop_134*tmp_qloop_143; + const real_t q_tmp_1_10 = tmp_qloop_115*tmp_qloop_149; + const real_t q_tmp_1_11 = tmp_qloop_127*tmp_qloop_149; + const real_t q_tmp_1_12 = tmp_qloop_128*tmp_qloop_149; + const real_t q_tmp_1_13 = tmp_qloop_129*tmp_qloop_149; + const real_t q_tmp_1_14 = tmp_qloop_151; + const real_t q_tmp_1_15 = tmp_qloop_133*tmp_qloop_150; + const real_t q_tmp_1_16 = tmp_qloop_133*tmp_qloop_152; + const real_t q_tmp_1_17 = tmp_qloop_130*tmp_qloop_149; + const real_t q_tmp_1_18 = tmp_qloop_132*tmp_qloop_149; + const real_t q_tmp_1_19 = tmp_qloop_134*tmp_qloop_149; + const real_t q_tmp_1_20 = tmp_qloop_115*tmp_qloop_153; + const real_t q_tmp_1_21 = tmp_qloop_127*tmp_qloop_153; + const real_t q_tmp_1_22 = tmp_qloop_128*tmp_qloop_153; + const real_t q_tmp_1_23 = tmp_qloop_129*tmp_qloop_153; + const real_t q_tmp_1_24 = tmp_qloop_155; + const real_t q_tmp_1_25 = tmp_qloop_133*tmp_qloop_154; + const real_t q_tmp_1_26 = tmp_qloop_133*tmp_qloop_156; + const real_t q_tmp_1_27 = tmp_qloop_130*tmp_qloop_153; + const real_t q_tmp_1_28 = tmp_qloop_132*tmp_qloop_153; + const real_t q_tmp_1_29 = tmp_qloop_134*tmp_qloop_153; + const real_t q_tmp_2_0 = tmp_qloop_115*tmp_qloop_148; + const real_t q_tmp_2_1 = tmp_qloop_127*tmp_qloop_148; + const real_t q_tmp_2_2 = tmp_qloop_128*tmp_qloop_148; + const real_t q_tmp_2_3 = tmp_qloop_129*tmp_qloop_148; + const real_t q_tmp_2_4 = tmp_qloop_131*tmp_qloop_145; + const real_t q_tmp_2_5 = tmp_qloop_146; + const real_t q_tmp_2_6 = tmp_qloop_157*tmp_qloop_158; + const real_t q_tmp_2_7 = tmp_qloop_130*tmp_qloop_148; + const real_t q_tmp_2_8 = tmp_qloop_132*tmp_qloop_148; + const real_t q_tmp_2_9 = tmp_qloop_134*tmp_qloop_148; + const real_t q_tmp_2_10 = tmp_qloop_115*tmp_qloop_152; + const real_t q_tmp_2_11 = tmp_qloop_127*tmp_qloop_152; + const real_t q_tmp_2_12 = tmp_qloop_128*tmp_qloop_152; + const real_t q_tmp_2_13 = tmp_qloop_129*tmp_qloop_152; + const real_t q_tmp_2_14 = tmp_qloop_131*tmp_qloop_150; + const real_t q_tmp_2_15 = tmp_qloop_151; + const real_t q_tmp_2_16 = tmp_qloop_138*tmp_qloop_159; + const real_t q_tmp_2_17 = tmp_qloop_130*tmp_qloop_152; + const real_t q_tmp_2_18 = tmp_qloop_132*tmp_qloop_152; + const real_t q_tmp_2_19 = tmp_qloop_134*tmp_qloop_152; + const real_t q_tmp_2_20 = tmp_qloop_115*tmp_qloop_156; + const real_t q_tmp_2_21 = tmp_qloop_127*tmp_qloop_156; + const real_t q_tmp_2_22 = tmp_qloop_128*tmp_qloop_156; + const real_t q_tmp_2_23 = tmp_qloop_129*tmp_qloop_156; + const real_t q_tmp_2_24 = tmp_qloop_131*tmp_qloop_154; + const real_t q_tmp_2_25 = tmp_qloop_155; + const real_t q_tmp_2_26 = tmp_qloop_140*tmp_qloop_159; + const real_t q_tmp_2_27 = tmp_qloop_130*tmp_qloop_156; + const real_t q_tmp_2_28 = tmp_qloop_132*tmp_qloop_156; + const real_t q_tmp_2_29 = tmp_qloop_134*tmp_qloop_156; + const real_t q_tmp_3_0 = tmp_qloop_115*tmp_qloop_145; + const real_t q_tmp_3_1 = tmp_qloop_127*tmp_qloop_145; + const real_t q_tmp_3_2 = tmp_qloop_128*tmp_qloop_145; + const real_t q_tmp_3_3 = tmp_qloop_129*tmp_qloop_145; + const real_t q_tmp_3_4 = tmp_qloop_109*tmp_qloop_160; + const real_t q_tmp_3_5 = tmp_qloop_111*tmp_qloop_160; + const real_t q_tmp_3_6 = tmp_qloop_146; + const real_t q_tmp_3_7 = tmp_qloop_130*tmp_qloop_145; + const real_t q_tmp_3_8 = tmp_qloop_132*tmp_qloop_145; + const real_t q_tmp_3_9 = tmp_qloop_134*tmp_qloop_145; + const real_t q_tmp_3_10 = tmp_qloop_115*tmp_qloop_150; + const real_t q_tmp_3_11 = tmp_qloop_127*tmp_qloop_150; + const real_t q_tmp_3_12 = tmp_qloop_128*tmp_qloop_150; + const real_t q_tmp_3_13 = tmp_qloop_129*tmp_qloop_150; + const real_t q_tmp_3_14 = tmp_qloop_109*tmp_qloop_162; + const real_t q_tmp_3_15 = tmp_qloop_111*tmp_qloop_162; + const real_t q_tmp_3_16 = tmp_qloop_151; + const real_t q_tmp_3_17 = tmp_qloop_130*tmp_qloop_150; + const real_t q_tmp_3_18 = tmp_qloop_132*tmp_qloop_150; + const real_t q_tmp_3_19 = tmp_qloop_134*tmp_qloop_150; + const real_t q_tmp_3_20 = tmp_qloop_115*tmp_qloop_154; + const real_t q_tmp_3_21 = tmp_qloop_127*tmp_qloop_154; + const real_t q_tmp_3_22 = tmp_qloop_128*tmp_qloop_154; + const real_t q_tmp_3_23 = tmp_qloop_129*tmp_qloop_154; + const real_t q_tmp_3_24 = tmp_qloop_109*tmp_qloop_163; + const real_t q_tmp_3_25 = tmp_qloop_111*tmp_qloop_163; + const real_t q_tmp_3_26 = tmp_qloop_155; + const real_t q_tmp_3_27 = tmp_qloop_130*tmp_qloop_154; + const real_t q_tmp_3_28 = tmp_qloop_132*tmp_qloop_154; + const real_t q_tmp_3_29 = tmp_qloop_134*tmp_qloop_154; + q_acc_0_0 = q_acc_0_0 + q_tmp_0_0; + q_acc_0_1 = q_acc_0_1 + q_tmp_0_1; + q_acc_0_2 = q_acc_0_2 + q_tmp_0_2; + q_acc_0_3 = q_acc_0_3 + q_tmp_0_3; + q_acc_0_4 = q_acc_0_4 + q_tmp_0_4; + q_acc_0_5 = q_acc_0_5 + q_tmp_0_5; + q_acc_0_6 = q_acc_0_6 + q_tmp_0_6; + q_acc_0_7 = q_acc_0_7 + q_tmp_0_7; + q_acc_0_8 = q_acc_0_8 + q_tmp_0_8; + q_acc_0_9 = q_acc_0_9 + q_tmp_0_9; + q_acc_0_10 = q_acc_0_10 + q_tmp_0_10; + q_acc_0_11 = q_acc_0_11 + q_tmp_0_11; + q_acc_0_12 = q_acc_0_12 + q_tmp_0_12; + q_acc_0_13 = q_acc_0_13 + q_tmp_0_13; + q_acc_0_14 = q_acc_0_14 + q_tmp_0_14; + q_acc_0_15 = q_acc_0_15 + q_tmp_0_15; + q_acc_0_16 = q_acc_0_16 + q_tmp_0_16; + q_acc_0_17 = q_acc_0_17 + q_tmp_0_17; + q_acc_0_18 = q_acc_0_18 + q_tmp_0_18; + q_acc_0_19 = q_acc_0_19 + q_tmp_0_19; + q_acc_0_20 = q_acc_0_20 + q_tmp_0_20; + q_acc_0_21 = q_acc_0_21 + q_tmp_0_21; + q_acc_0_22 = q_acc_0_22 + q_tmp_0_22; + q_acc_0_23 = q_acc_0_23 + q_tmp_0_23; + q_acc_0_24 = q_acc_0_24 + q_tmp_0_24; + q_acc_0_25 = q_acc_0_25 + q_tmp_0_25; + q_acc_0_26 = q_acc_0_26 + q_tmp_0_26; + q_acc_0_27 = q_acc_0_27 + q_tmp_0_27; + q_acc_0_28 = q_acc_0_28 + q_tmp_0_28; + q_acc_0_29 = q_acc_0_29 + q_tmp_0_29; + q_acc_1_0 = q_acc_1_0 + q_tmp_1_0; + q_acc_1_1 = q_acc_1_1 + q_tmp_1_1; + q_acc_1_2 = q_acc_1_2 + q_tmp_1_2; + q_acc_1_3 = q_acc_1_3 + q_tmp_1_3; + q_acc_1_4 = q_acc_1_4 + q_tmp_1_4; + q_acc_1_5 = q_acc_1_5 + q_tmp_1_5; + q_acc_1_6 = q_acc_1_6 + q_tmp_1_6; + q_acc_1_7 = q_acc_1_7 + q_tmp_1_7; + q_acc_1_8 = q_acc_1_8 + q_tmp_1_8; + q_acc_1_9 = q_acc_1_9 + q_tmp_1_9; + q_acc_1_10 = q_acc_1_10 + q_tmp_1_10; + q_acc_1_11 = q_acc_1_11 + q_tmp_1_11; + q_acc_1_12 = q_acc_1_12 + q_tmp_1_12; + q_acc_1_13 = q_acc_1_13 + q_tmp_1_13; + q_acc_1_14 = q_acc_1_14 + q_tmp_1_14; + q_acc_1_15 = q_acc_1_15 + q_tmp_1_15; + q_acc_1_16 = q_acc_1_16 + q_tmp_1_16; + q_acc_1_17 = q_acc_1_17 + q_tmp_1_17; + q_acc_1_18 = q_acc_1_18 + q_tmp_1_18; + q_acc_1_19 = q_acc_1_19 + q_tmp_1_19; + q_acc_1_20 = q_acc_1_20 + q_tmp_1_20; + q_acc_1_21 = q_acc_1_21 + q_tmp_1_21; + q_acc_1_22 = q_acc_1_22 + q_tmp_1_22; + q_acc_1_23 = q_acc_1_23 + q_tmp_1_23; + q_acc_1_24 = q_acc_1_24 + q_tmp_1_24; + q_acc_1_25 = q_acc_1_25 + q_tmp_1_25; + q_acc_1_26 = q_acc_1_26 + q_tmp_1_26; + q_acc_1_27 = q_acc_1_27 + q_tmp_1_27; + q_acc_1_28 = q_acc_1_28 + q_tmp_1_28; + q_acc_1_29 = q_acc_1_29 + q_tmp_1_29; + q_acc_2_0 = q_acc_2_0 + q_tmp_2_0; + q_acc_2_1 = q_acc_2_1 + q_tmp_2_1; + q_acc_2_2 = q_acc_2_2 + q_tmp_2_2; + q_acc_2_3 = q_acc_2_3 + q_tmp_2_3; + q_acc_2_4 = q_acc_2_4 + q_tmp_2_4; + q_acc_2_5 = q_acc_2_5 + q_tmp_2_5; + q_acc_2_6 = q_acc_2_6 + q_tmp_2_6; + q_acc_2_7 = q_acc_2_7 + q_tmp_2_7; + q_acc_2_8 = q_acc_2_8 + q_tmp_2_8; + q_acc_2_9 = q_acc_2_9 + q_tmp_2_9; + q_acc_2_10 = q_acc_2_10 + q_tmp_2_10; + q_acc_2_11 = q_acc_2_11 + q_tmp_2_11; + q_acc_2_12 = q_acc_2_12 + q_tmp_2_12; + q_acc_2_13 = q_acc_2_13 + q_tmp_2_13; + q_acc_2_14 = q_acc_2_14 + q_tmp_2_14; + q_acc_2_15 = q_acc_2_15 + q_tmp_2_15; + q_acc_2_16 = q_acc_2_16 + q_tmp_2_16; + q_acc_2_17 = q_acc_2_17 + q_tmp_2_17; + q_acc_2_18 = q_acc_2_18 + q_tmp_2_18; + q_acc_2_19 = q_acc_2_19 + q_tmp_2_19; + q_acc_2_20 = q_acc_2_20 + q_tmp_2_20; + q_acc_2_21 = q_acc_2_21 + q_tmp_2_21; + q_acc_2_22 = q_acc_2_22 + q_tmp_2_22; + q_acc_2_23 = q_acc_2_23 + q_tmp_2_23; + q_acc_2_24 = q_acc_2_24 + q_tmp_2_24; + q_acc_2_25 = q_acc_2_25 + q_tmp_2_25; + q_acc_2_26 = q_acc_2_26 + q_tmp_2_26; + q_acc_2_27 = q_acc_2_27 + q_tmp_2_27; + q_acc_2_28 = q_acc_2_28 + q_tmp_2_28; + q_acc_2_29 = q_acc_2_29 + q_tmp_2_29; + q_acc_3_0 = q_acc_3_0 + q_tmp_3_0; + q_acc_3_1 = q_acc_3_1 + q_tmp_3_1; + q_acc_3_2 = q_acc_3_2 + q_tmp_3_2; + q_acc_3_3 = q_acc_3_3 + q_tmp_3_3; + q_acc_3_4 = q_acc_3_4 + q_tmp_3_4; + q_acc_3_5 = q_acc_3_5 + q_tmp_3_5; + q_acc_3_6 = q_acc_3_6 + q_tmp_3_6; + q_acc_3_7 = q_acc_3_7 + q_tmp_3_7; + q_acc_3_8 = q_acc_3_8 + q_tmp_3_8; + q_acc_3_9 = q_acc_3_9 + q_tmp_3_9; + q_acc_3_10 = q_acc_3_10 + q_tmp_3_10; + q_acc_3_11 = q_acc_3_11 + q_tmp_3_11; + q_acc_3_12 = q_acc_3_12 + q_tmp_3_12; + q_acc_3_13 = q_acc_3_13 + q_tmp_3_13; + q_acc_3_14 = q_acc_3_14 + q_tmp_3_14; + q_acc_3_15 = q_acc_3_15 + q_tmp_3_15; + q_acc_3_16 = q_acc_3_16 + q_tmp_3_16; + q_acc_3_17 = q_acc_3_17 + q_tmp_3_17; + q_acc_3_18 = q_acc_3_18 + q_tmp_3_18; + q_acc_3_19 = q_acc_3_19 + q_tmp_3_19; + q_acc_3_20 = q_acc_3_20 + q_tmp_3_20; + q_acc_3_21 = q_acc_3_21 + q_tmp_3_21; + q_acc_3_22 = q_acc_3_22 + q_tmp_3_22; + q_acc_3_23 = q_acc_3_23 + q_tmp_3_23; + q_acc_3_24 = q_acc_3_24 + q_tmp_3_24; + q_acc_3_25 = q_acc_3_25 + q_tmp_3_25; + q_acc_3_26 = q_acc_3_26 + q_tmp_3_26; + q_acc_3_27 = q_acc_3_27 + q_tmp_3_27; + q_acc_3_28 = q_acc_3_28 + q_tmp_3_28; + q_acc_3_29 = q_acc_3_29 + q_tmp_3_29; + } + const real_t elMatVec_0 = q_acc_0_0*src_dof_0 + q_acc_0_1*src_dof_1 + q_acc_0_10*src_dof_10 + q_acc_0_11*src_dof_11 + q_acc_0_12*src_dof_12 + q_acc_0_13*src_dof_13 + q_acc_0_14*src_dof_14 + q_acc_0_15*src_dof_15 + q_acc_0_16*src_dof_16 + q_acc_0_17*src_dof_17 + q_acc_0_18*src_dof_18 + q_acc_0_19*src_dof_19 + q_acc_0_2*src_dof_2 + q_acc_0_20*src_dof_20 + q_acc_0_21*src_dof_21 + q_acc_0_22*src_dof_22 + q_acc_0_23*src_dof_23 + q_acc_0_24*src_dof_24 + q_acc_0_25*src_dof_25 + q_acc_0_26*src_dof_26 + q_acc_0_27*src_dof_27 + q_acc_0_28*src_dof_28 + q_acc_0_29*src_dof_29 + q_acc_0_3*src_dof_3 + q_acc_0_4*src_dof_4 + q_acc_0_5*src_dof_5 + q_acc_0_6*src_dof_6 + q_acc_0_7*src_dof_7 + q_acc_0_8*src_dof_8 + q_acc_0_9*src_dof_9; + const real_t elMatVec_1 = q_acc_1_0*src_dof_0 + q_acc_1_1*src_dof_1 + q_acc_1_10*src_dof_10 + q_acc_1_11*src_dof_11 + q_acc_1_12*src_dof_12 + q_acc_1_13*src_dof_13 + q_acc_1_14*src_dof_14 + q_acc_1_15*src_dof_15 + q_acc_1_16*src_dof_16 + q_acc_1_17*src_dof_17 + q_acc_1_18*src_dof_18 + q_acc_1_19*src_dof_19 + q_acc_1_2*src_dof_2 + q_acc_1_20*src_dof_20 + q_acc_1_21*src_dof_21 + q_acc_1_22*src_dof_22 + q_acc_1_23*src_dof_23 + q_acc_1_24*src_dof_24 + q_acc_1_25*src_dof_25 + q_acc_1_26*src_dof_26 + q_acc_1_27*src_dof_27 + q_acc_1_28*src_dof_28 + q_acc_1_29*src_dof_29 + q_acc_1_3*src_dof_3 + q_acc_1_4*src_dof_4 + q_acc_1_5*src_dof_5 + q_acc_1_6*src_dof_6 + q_acc_1_7*src_dof_7 + q_acc_1_8*src_dof_8 + q_acc_1_9*src_dof_9; + const real_t elMatVec_2 = q_acc_2_0*src_dof_0 + q_acc_2_1*src_dof_1 + q_acc_2_10*src_dof_10 + q_acc_2_11*src_dof_11 + q_acc_2_12*src_dof_12 + q_acc_2_13*src_dof_13 + q_acc_2_14*src_dof_14 + q_acc_2_15*src_dof_15 + q_acc_2_16*src_dof_16 + q_acc_2_17*src_dof_17 + q_acc_2_18*src_dof_18 + q_acc_2_19*src_dof_19 + q_acc_2_2*src_dof_2 + q_acc_2_20*src_dof_20 + q_acc_2_21*src_dof_21 + q_acc_2_22*src_dof_22 + q_acc_2_23*src_dof_23 + q_acc_2_24*src_dof_24 + q_acc_2_25*src_dof_25 + q_acc_2_26*src_dof_26 + q_acc_2_27*src_dof_27 + q_acc_2_28*src_dof_28 + q_acc_2_29*src_dof_29 + q_acc_2_3*src_dof_3 + q_acc_2_4*src_dof_4 + q_acc_2_5*src_dof_5 + q_acc_2_6*src_dof_6 + q_acc_2_7*src_dof_7 + q_acc_2_8*src_dof_8 + q_acc_2_9*src_dof_9; + const real_t elMatVec_3 = q_acc_3_0*src_dof_0 + q_acc_3_1*src_dof_1 + q_acc_3_10*src_dof_10 + q_acc_3_11*src_dof_11 + q_acc_3_12*src_dof_12 + q_acc_3_13*src_dof_13 + q_acc_3_14*src_dof_14 + q_acc_3_15*src_dof_15 + q_acc_3_16*src_dof_16 + q_acc_3_17*src_dof_17 + q_acc_3_18*src_dof_18 + q_acc_3_19*src_dof_19 + q_acc_3_2*src_dof_2 + q_acc_3_20*src_dof_20 + q_acc_3_21*src_dof_21 + q_acc_3_22*src_dof_22 + q_acc_3_23*src_dof_23 + q_acc_3_24*src_dof_24 + q_acc_3_25*src_dof_25 + q_acc_3_26*src_dof_26 + q_acc_3_27*src_dof_27 + q_acc_3_28*src_dof_28 + q_acc_3_29*src_dof_29 + q_acc_3_3*src_dof_3 + q_acc_3_4*src_dof_4 + q_acc_3_5*src_dof_5 + q_acc_3_6*src_dof_6 + q_acc_3_7*src_dof_7 + q_acc_3_8*src_dof_8 + q_acc_3_9*src_dof_9; + _data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1] = elMatVec_0 + _data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]; + _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1] = elMatVec_1 + _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]; + _data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))] = elMatVec_2 + _data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]; + _data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1] = elMatVec_3 + _data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]; + } + } + } + const real_t tmp_coords_jac_0_BLUE_UP = 1.0 / (micro_edges_per_macro_edge_float)*1.0; + const real_t tmp_coords_jac_1_BLUE_UP = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0); + const real_t tmp_coords_jac_2_BLUE_UP = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1); + const real_t tmp_coords_jac_3_BLUE_UP = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2); + const real_t tmp_coords_jac_4_BLUE_UP = tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0); + const real_t tmp_coords_jac_5_BLUE_UP = tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1); + const real_t tmp_coords_jac_6_BLUE_UP = tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2); + const real_t p_affine_const_0_0_BLUE_UP = tmp_coords_jac_1_BLUE_UP; + const real_t p_affine_const_0_1_BLUE_UP = tmp_coords_jac_2_BLUE_UP; + const real_t p_affine_const_0_2_BLUE_UP = tmp_coords_jac_3_BLUE_UP; + const real_t p_affine_const_1_0_BLUE_UP = macro_vertex_coord_id_0comp0 + tmp_coords_jac_4_BLUE_UP; + const real_t p_affine_const_1_1_BLUE_UP = macro_vertex_coord_id_0comp1 + tmp_coords_jac_5_BLUE_UP; + const real_t p_affine_const_1_2_BLUE_UP = macro_vertex_coord_id_0comp2 + tmp_coords_jac_6_BLUE_UP; + const real_t p_affine_const_2_0_BLUE_UP = tmp_coords_jac_1_BLUE_UP + tmp_coords_jac_4_BLUE_UP; + const real_t p_affine_const_2_1_BLUE_UP = tmp_coords_jac_2_BLUE_UP + tmp_coords_jac_5_BLUE_UP; + const real_t p_affine_const_2_2_BLUE_UP = tmp_coords_jac_3_BLUE_UP + tmp_coords_jac_6_BLUE_UP; + const real_t p_affine_const_3_0_BLUE_UP = tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0) + tmp_coords_jac_1_BLUE_UP; + const real_t p_affine_const_3_1_BLUE_UP = tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1) + tmp_coords_jac_2_BLUE_UP; + const real_t p_affine_const_3_2_BLUE_UP = tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2) + tmp_coords_jac_3_BLUE_UP; + const real_t jac_affine_0_0_BLUE_UP = -p_affine_const_0_0_BLUE_UP + p_affine_const_1_0_BLUE_UP; + const real_t jac_affine_0_1_BLUE_UP = -p_affine_const_0_0_BLUE_UP + p_affine_const_2_0_BLUE_UP; + const real_t jac_affine_0_2_BLUE_UP = -p_affine_const_0_0_BLUE_UP + p_affine_const_3_0_BLUE_UP; + const real_t jac_affine_1_0_BLUE_UP = -p_affine_const_0_1_BLUE_UP + p_affine_const_1_1_BLUE_UP; + const real_t jac_affine_1_1_BLUE_UP = -p_affine_const_0_1_BLUE_UP + p_affine_const_2_1_BLUE_UP; + const real_t tmp_coords_jac_11_BLUE_UP = jac_affine_0_2_BLUE_UP*jac_affine_1_1_BLUE_UP; + const real_t jac_affine_1_2_BLUE_UP = -p_affine_const_0_1_BLUE_UP + p_affine_const_3_1_BLUE_UP; + const real_t tmp_coords_jac_9_BLUE_UP = jac_affine_0_1_BLUE_UP*jac_affine_1_2_BLUE_UP; + const real_t jac_affine_2_0_BLUE_UP = -p_affine_const_0_2_BLUE_UP + p_affine_const_1_2_BLUE_UP; + const real_t jac_affine_2_1_BLUE_UP = -p_affine_const_0_2_BLUE_UP + p_affine_const_2_2_BLUE_UP; + const real_t tmp_coords_jac_8_BLUE_UP = jac_affine_1_2_BLUE_UP*jac_affine_2_1_BLUE_UP; + const real_t jac_affine_2_2_BLUE_UP = -p_affine_const_0_2_BLUE_UP + p_affine_const_3_2_BLUE_UP; + const real_t tmp_coords_jac_7_BLUE_UP = jac_affine_1_1_BLUE_UP*jac_affine_2_2_BLUE_UP; + const real_t tmp_coords_jac_10_BLUE_UP = jac_affine_0_1_BLUE_UP*jac_affine_2_2_BLUE_UP; + const real_t tmp_coords_jac_12_BLUE_UP = jac_affine_0_0_BLUE_UP*tmp_coords_jac_7_BLUE_UP - jac_affine_0_0_BLUE_UP*tmp_coords_jac_8_BLUE_UP + jac_affine_0_2_BLUE_UP*jac_affine_1_0_BLUE_UP*jac_affine_2_1_BLUE_UP - jac_affine_1_0_BLUE_UP*tmp_coords_jac_10_BLUE_UP - jac_affine_2_0_BLUE_UP*tmp_coords_jac_11_BLUE_UP + jac_affine_2_0_BLUE_UP*tmp_coords_jac_9_BLUE_UP; + const real_t tmp_coords_jac_13_BLUE_UP = 1.0 / (tmp_coords_jac_12_BLUE_UP); + const real_t jac_affine_inv_0_0_BLUE_UP = tmp_coords_jac_13_BLUE_UP*(tmp_coords_jac_7_BLUE_UP - tmp_coords_jac_8_BLUE_UP); + const real_t jac_affine_inv_0_1_BLUE_UP = tmp_coords_jac_13_BLUE_UP*(jac_affine_0_2_BLUE_UP*jac_affine_2_1_BLUE_UP - tmp_coords_jac_10_BLUE_UP); + const real_t jac_affine_inv_0_2_BLUE_UP = tmp_coords_jac_13_BLUE_UP*(-tmp_coords_jac_11_BLUE_UP + tmp_coords_jac_9_BLUE_UP); + const real_t jac_affine_inv_1_0_BLUE_UP = tmp_coords_jac_13_BLUE_UP*(-jac_affine_1_0_BLUE_UP*jac_affine_2_2_BLUE_UP + jac_affine_1_2_BLUE_UP*jac_affine_2_0_BLUE_UP); + const real_t jac_affine_inv_1_1_BLUE_UP = tmp_coords_jac_13_BLUE_UP*(jac_affine_0_0_BLUE_UP*jac_affine_2_2_BLUE_UP - jac_affine_0_2_BLUE_UP*jac_affine_2_0_BLUE_UP); + const real_t jac_affine_inv_1_2_BLUE_UP = tmp_coords_jac_13_BLUE_UP*(-jac_affine_0_0_BLUE_UP*jac_affine_1_2_BLUE_UP + jac_affine_0_2_BLUE_UP*jac_affine_1_0_BLUE_UP); + const real_t jac_affine_inv_2_0_BLUE_UP = tmp_coords_jac_13_BLUE_UP*(jac_affine_1_0_BLUE_UP*jac_affine_2_1_BLUE_UP - jac_affine_1_1_BLUE_UP*jac_affine_2_0_BLUE_UP); + const real_t jac_affine_inv_2_1_BLUE_UP = tmp_coords_jac_13_BLUE_UP*(-jac_affine_0_0_BLUE_UP*jac_affine_2_1_BLUE_UP + jac_affine_0_1_BLUE_UP*jac_affine_2_0_BLUE_UP); + const real_t jac_affine_inv_2_2_BLUE_UP = tmp_coords_jac_13_BLUE_UP*(jac_affine_0_0_BLUE_UP*jac_affine_1_1_BLUE_UP - jac_affine_0_1_BLUE_UP*jac_affine_1_0_BLUE_UP); + const real_t abs_det_jac_affine_BLUE_UP = abs(tmp_coords_jac_12_BLUE_UP); + { + /* CellType.BLUE_UP */ + for (int64_t ctr_2 = 0; ctr_2 < micro_edges_per_macro_edge; ctr_2 += 1) + for (int64_t ctr_1 = 0; ctr_1 < -ctr_2 + micro_edges_per_macro_edge; ctr_1 += 1) + { + for (int64_t ctr_0 = 0; ctr_0 < (int64_t)((-ctr_1 - ctr_2 + micro_edges_per_macro_edge - 1) / (4)) * (4); ctr_0 += 4) + { + + const int64_t phantom_ctr_0 = ctr_0; + real_t _data_float_loop_ctr_array_dim_0[4]; + _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0; + _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1; + _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2; + _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3; + real_t _data_float_loop_ctr_array_dim_1[4]; + _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1; + real_t _data_float_loop_ctr_array_dim_2[4]; + _data_float_loop_ctr_array_dim_2[0] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[1] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[2] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[3] = (real_t) ctr_2; + + const __m256d p_affine_0_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)); + const __m256d p_affine_0_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)); + const __m256d p_affine_0_2 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)); + const __m256d p_affine_1_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)); + const __m256d p_affine_1_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)); + const __m256d p_affine_1_2 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)); + const __m256d p_affine_2_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)); + const __m256d p_affine_2_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)); + const __m256d p_affine_2_2 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)); + const __m256d p_affine_3_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)); + const __m256d p_affine_3_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)); + const __m256d p_affine_3_2 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)); + const __m256d src_dof_0 = _mm256_loadu_pd(& _data_src_vertex_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]); + const __m256d src_dof_1 = _mm256_loadu_pd(& _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]); + const __m256d src_dof_2 = _mm256_loadu_pd(& _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]); + const __m256d src_dof_3 = _mm256_loadu_pd(& _data_src_vertex_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]); + const __m256d src_dof_4 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]); + const __m256d src_dof_5 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge - 1)*(micro_edges_per_macro_edge + 1)) / (6)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]); + const __m256d src_dof_6 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d src_dof_7 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]); + const __m256d src_dof_8 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]); + const __m256d src_dof_9 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d src_dof_10 = _mm256_loadu_pd(& _data_src_vertex_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]); + const __m256d src_dof_11 = _mm256_loadu_pd(& _data_src_vertex_1[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]); + const __m256d src_dof_12 = _mm256_loadu_pd(& _data_src_vertex_1[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]); + const __m256d src_dof_13 = _mm256_loadu_pd(& _data_src_vertex_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]); + const __m256d src_dof_14 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]); + const __m256d src_dof_15 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge - 1)*(micro_edges_per_macro_edge + 1)) / (6)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]); + const __m256d src_dof_16 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d src_dof_17 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]); + const __m256d src_dof_18 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]); + const __m256d src_dof_19 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d src_dof_20 = _mm256_loadu_pd(& _data_src_vertex_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]); + const __m256d src_dof_21 = _mm256_loadu_pd(& _data_src_vertex_2[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]); + const __m256d src_dof_22 = _mm256_loadu_pd(& _data_src_vertex_2[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]); + const __m256d src_dof_23 = _mm256_loadu_pd(& _data_src_vertex_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]); + const __m256d src_dof_24 = _mm256_loadu_pd(& _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]); + const __m256d src_dof_25 = _mm256_loadu_pd(& _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge - 1)*(micro_edges_per_macro_edge + 1)) / (6)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]); + const __m256d src_dof_26 = _mm256_loadu_pd(& _data_src_edge_2[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d src_dof_27 = _mm256_loadu_pd(& _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]); + const __m256d src_dof_28 = _mm256_loadu_pd(& _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]); + const __m256d src_dof_29 = _mm256_loadu_pd(& _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d rho_dof_0 = _mm256_loadu_pd(& _data_rhoVertex[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]); + const __m256d rho_dof_1 = _mm256_loadu_pd(& _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]); + const __m256d rho_dof_2 = _mm256_loadu_pd(& _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]); + const __m256d rho_dof_3 = _mm256_loadu_pd(& _data_rhoVertex[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]); + const __m256d rho_dof_4 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]); + const __m256d rho_dof_5 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge - 1)*(micro_edges_per_macro_edge + 1)) / (6)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]); + const __m256d rho_dof_6 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d rho_dof_7 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]); + const __m256d rho_dof_8 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]); + const __m256d rho_dof_9 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + __m256d q_acc_0_0 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_1 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_2 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_3 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_4 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_5 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_6 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_7 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_8 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_9 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_10 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_11 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_12 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_13 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_14 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_15 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_16 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_17 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_18 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_19 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_20 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_21 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_22 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_23 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_24 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_25 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_26 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_27 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_28 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_29 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_0 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_1 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_2 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_3 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_4 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_5 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_6 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_7 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_8 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_9 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_10 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_11 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_12 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_13 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_14 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_15 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_16 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_17 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_18 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_19 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_20 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_21 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_22 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_23 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_24 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_25 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_26 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_27 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_28 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_29 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_0 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_1 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_2 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_3 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_4 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_5 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_6 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_7 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_8 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_9 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_10 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_11 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_12 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_13 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_14 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_15 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_16 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_17 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_18 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_19 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_20 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_21 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_22 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_23 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_24 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_25 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_26 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_27 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_28 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_29 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_0 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_1 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_2 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_3 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_4 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_5 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_6 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_7 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_8 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_9 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_10 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_11 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_12 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_13 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_14 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_15 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_16 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_17 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_18 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_19 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_20 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_21 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_22 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_23 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_24 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_25 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_26 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_27 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_28 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_29 = _mm256_set_pd(0.0,0.0,0.0,0.0); + for (int64_t q = 0; q < 5; q += 1) + { + const __m256d tmp_qloop_18 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_1_0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_2_0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_3_0),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]))),p_affine_0_0); + const __m256d tmp_qloop_19 = _mm256_mul_pd(tmp_qloop_18,tmp_qloop_18); + const __m256d tmp_qloop_22 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_2,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_1_2),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_2,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_2_2),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_2,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_3_2),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]))),p_affine_0_2); + const __m256d tmp_qloop_23 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_qloop_12,tmp_qloop_12,tmp_qloop_12,tmp_qloop_12)),_mm256_mul_pd(tmp_qloop_22,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))); + const __m256d tmp_qloop_24 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_qloop_1,tmp_qloop_1,tmp_qloop_1,tmp_qloop_1)),_mm256_mul_pd(tmp_qloop_18,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))); + const __m256d tmp_qloop_25 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_1_1),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_2_1),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_3_1),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]))),p_affine_0_1); + const __m256d tmp_qloop_26 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_qloop_8,tmp_qloop_8,tmp_qloop_8,tmp_qloop_8)),_mm256_mul_pd(tmp_qloop_25,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))); + const __m256d tmp_qloop_27 = _mm256_mul_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_24,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_14,tmp_qloop_14,tmp_qloop_14,tmp_qloop_14)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_26,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_15,tmp_qloop_15,tmp_qloop_15,tmp_qloop_15))),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_23,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_7,tmp_qloop_7,tmp_qloop_7,tmp_qloop_7))),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_23,_mm256_set_pd(tmp_qloop_10,tmp_qloop_10,tmp_qloop_10,tmp_qloop_10)),_mm256_set_pd(tmp_qloop_9,tmp_qloop_9,tmp_qloop_9,tmp_qloop_9))),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_24,_mm256_set_pd(tmp_qloop_13,tmp_qloop_13,tmp_qloop_13,tmp_qloop_13)),_mm256_set_pd(tmp_qloop_6,tmp_qloop_6,tmp_qloop_6,tmp_qloop_6))),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_26,_mm256_set_pd(tmp_qloop_2,tmp_qloop_2,tmp_qloop_2,tmp_qloop_2)),_mm256_set_pd(tmp_qloop_3,tmp_qloop_3,tmp_qloop_3,tmp_qloop_3))),_mm256_set_pd(tmp_qloop_21,tmp_qloop_21,tmp_qloop_21,tmp_qloop_21)); + const __m256d tmp_qloop_28 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_27,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_20,tmp_qloop_20,tmp_qloop_20,tmp_qloop_20)); + const __m256d tmp_qloop_29 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_25); + const __m256d tmp_qloop_30 = _mm256_mul_pd(tmp_qloop_22,tmp_qloop_22); + const __m256d tmp_qloop_31 = _mm256_add_pd(_mm256_add_pd(tmp_qloop_19,tmp_qloop_29),tmp_qloop_30); + const __m256d tmp_qloop_34 = _mm256_mul_pd(tmp_qloop_18,_mm256_set_pd(tmp_qloop_33,tmp_qloop_33,tmp_qloop_33,tmp_qloop_33)); + const __m256d tmp_qloop_35 = _mm256_add_pd(tmp_qloop_28,tmp_qloop_34); + const __m256d tmp_qloop_36 = _mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_31,tmp_qloop_35),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_19,tmp_qloop_28),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17)); + const __m256d tmp_qloop_37 = _mm256_div_pd(_mm256_sqrt_pd(tmp_qloop_31),_mm256_mul_pd(tmp_qloop_31,tmp_qloop_31)); + const __m256d tmp_qloop_38 = _mm256_mul_pd(tmp_qloop_37,_mm256_set_pd(1.0,1.0,1.0,1.0)); + const __m256d tmp_qloop_41 = _mm256_mul_pd(tmp_qloop_31,_mm256_set_pd(tmp_qloop_40,tmp_qloop_40,tmp_qloop_40,tmp_qloop_40)); + const __m256d tmp_qloop_42 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_41,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_25,tmp_qloop_28),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))); + const __m256d tmp_qloop_43 = _mm256_mul_pd(tmp_qloop_38,_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17)); + const __m256d tmp_qloop_44 = _mm256_mul_pd(tmp_qloop_42,tmp_qloop_43); + const __m256d tmp_qloop_47 = _mm256_mul_pd(tmp_qloop_28,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)); + const __m256d tmp_qloop_48 = _mm256_mul_pd(tmp_qloop_22,tmp_qloop_47); + const __m256d tmp_qloop_49 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_31,_mm256_set_pd(tmp_qloop_46,tmp_qloop_46,tmp_qloop_46,tmp_qloop_46)),tmp_qloop_48); + const __m256d tmp_qloop_50 = _mm256_mul_pd(tmp_qloop_43,tmp_qloop_49); + const __m256d tmp_qloop_51 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_31,_mm256_set_pd(tmp_qloop_33,tmp_qloop_33,tmp_qloop_33,tmp_qloop_33)),_mm256_mul_pd(tmp_qloop_18,tmp_qloop_47)); + const __m256d tmp_qloop_52 = _mm256_mul_pd(tmp_qloop_43,tmp_qloop_51); + const __m256d tmp_qloop_54 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_25,_mm256_set_pd(tmp_qloop_53,tmp_qloop_53,tmp_qloop_53,tmp_qloop_53)),tmp_qloop_28); + const __m256d tmp_qloop_55 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_29,tmp_qloop_47),_mm256_mul_pd(tmp_qloop_31,tmp_qloop_54)); + const __m256d tmp_qloop_56 = _mm256_mul_pd(tmp_qloop_22,_mm256_set_pd(tmp_qloop_46,tmp_qloop_46,tmp_qloop_46,tmp_qloop_46)); + const __m256d tmp_qloop_57 = _mm256_add_pd(tmp_qloop_28,tmp_qloop_56); + const __m256d tmp_qloop_58 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_30,tmp_qloop_47),_mm256_mul_pd(tmp_qloop_31,tmp_qloop_57)); + const __m256d tmp_qloop_66 = _mm256_mul_pd(_mm256_div_pd(_mm256_sqrt_pd(tmp_qloop_31),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_31,tmp_qloop_31),tmp_qloop_31)),_mm256_set_pd(3.0,3.0,3.0,3.0)); + const __m256d tmp_qloop_67 = _mm256_mul_pd(tmp_qloop_36,tmp_qloop_66); + const __m256d tmp_qloop_69 = _mm256_mul_pd(tmp_qloop_18,_mm256_set_pd(2.0,2.0,2.0,2.0)); + const __m256d tmp_qloop_70 = _mm256_mul_pd(tmp_qloop_41,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)); + const __m256d tmp_qloop_71 = _mm256_mul_pd(tmp_qloop_35,_mm256_set_pd(2.0,2.0,2.0,2.0)); + const __m256d tmp_qloop_73 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_31,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_72,tmp_qloop_72,tmp_qloop_72,tmp_qloop_72)); + const __m256d tmp_qloop_74 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_qloop_20,tmp_qloop_20,tmp_qloop_20,tmp_qloop_20)),tmp_qloop_27); + const __m256d tmp_qloop_75 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_34,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_mul_pd(tmp_qloop_18,_mm256_set_pd(tmp_qloop_68,tmp_qloop_68,tmp_qloop_68,tmp_qloop_68))),tmp_qloop_74); + const __m256d tmp_qloop_76 = _mm256_mul_pd(tmp_qloop_66,_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17)); + const __m256d tmp_qloop_77 = _mm256_mul_pd(tmp_qloop_51,tmp_qloop_76); + const __m256d tmp_qloop_78 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_77); + const __m256d tmp_qloop_79 = _mm256_mul_pd(tmp_qloop_25,_mm256_set_pd(2.0,2.0,2.0,2.0)); + const __m256d tmp_qloop_80 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_79,_mm256_set_pd(tmp_qloop_33,tmp_qloop_33,tmp_qloop_33,tmp_qloop_33)),_mm256_mul_pd(tmp_qloop_18,_mm256_set_pd(tmp_qloop_40,tmp_qloop_40,tmp_qloop_40,tmp_qloop_40))); + const __m256d tmp_qloop_81 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_43); + const __m256d tmp_qloop_82 = _mm256_mul_pd(tmp_qloop_22,_mm256_set_pd(2.0,2.0,2.0,2.0)); + const __m256d tmp_qloop_83 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_82,_mm256_set_pd(tmp_qloop_33,tmp_qloop_33,tmp_qloop_33,tmp_qloop_33)),_mm256_mul_pd(tmp_qloop_18,_mm256_set_pd(tmp_qloop_72,tmp_qloop_72,tmp_qloop_72,tmp_qloop_72))); + const __m256d tmp_qloop_84 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_22,tmp_qloop_78),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)); + const __m256d tmp_qloop_85 = _mm256_mul_pd(tmp_qloop_18,tmp_qloop_22); + const __m256d tmp_qloop_86 = _mm256_mul_pd(tmp_qloop_22,tmp_qloop_43); + const __m256d tmp_qloop_87 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_69,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_40,tmp_qloop_40,tmp_qloop_40,tmp_qloop_40)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_25,_mm256_set_pd(tmp_qloop_21,tmp_qloop_21,tmp_qloop_21,tmp_qloop_21)),_mm256_set_pd(tmp_qloop_32,tmp_qloop_32,tmp_qloop_32,tmp_qloop_32))); + const __m256d tmp_qloop_88 = _mm256_mul_pd(tmp_qloop_18,tmp_qloop_43); + const __m256d tmp_qloop_89 = _mm256_mul_pd(tmp_qloop_42,tmp_qloop_76); + const __m256d tmp_qloop_90 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_28,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_25,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_40,tmp_qloop_40,tmp_qloop_40,tmp_qloop_40))); + const __m256d tmp_qloop_91 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_89); + const __m256d tmp_qloop_92 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_25,_mm256_set_pd(tmp_qloop_72,tmp_qloop_72,tmp_qloop_72,tmp_qloop_72)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_82,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_40,tmp_qloop_40,tmp_qloop_40,tmp_qloop_40))); + const __m256d tmp_qloop_93 = _mm256_mul_pd(tmp_qloop_85,tmp_qloop_89); + const __m256d tmp_qloop_94 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_31,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_68,tmp_qloop_68,tmp_qloop_68,tmp_qloop_68)); + const __m256d tmp_qloop_95 = _mm256_mul_pd(tmp_qloop_55,tmp_qloop_76); + const __m256d tmp_qloop_96 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_69,_mm256_set_pd(tmp_qloop_46,tmp_qloop_46,tmp_qloop_46,tmp_qloop_46)),_mm256_mul_pd(tmp_qloop_22,_mm256_set_pd(tmp_qloop_68,tmp_qloop_68,tmp_qloop_68,tmp_qloop_68))); + const __m256d tmp_qloop_97 = _mm256_mul_pd(tmp_qloop_49,tmp_qloop_76); + const __m256d tmp_qloop_98 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_22,_mm256_set_pd(tmp_qloop_40,tmp_qloop_40,tmp_qloop_40,tmp_qloop_40)),_mm256_mul_pd(tmp_qloop_79,_mm256_set_pd(tmp_qloop_46,tmp_qloop_46,tmp_qloop_46,tmp_qloop_46))); + const __m256d tmp_qloop_99 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_97); + const __m256d tmp_qloop_100 = _mm256_mul_pd(tmp_qloop_18,tmp_qloop_99); + const __m256d tmp_qloop_101 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_56,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_mul_pd(tmp_qloop_22,_mm256_set_pd(tmp_qloop_72,tmp_qloop_72,tmp_qloop_72,tmp_qloop_72))),tmp_qloop_74); + const __m256d tmp_qloop_102 = _mm256_mul_pd(tmp_qloop_58,tmp_qloop_76); + const __m256d tmp_qloop_103 = _mm256_mul_pd(_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])); + const __m256d tmp_qloop_104 = _mm256_mul_pd(tmp_qloop_103,_mm256_set_pd(2.0,2.0,2.0,2.0)); + const __m256d tmp_qloop_105 = _mm256_mul_pd(_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])); + const __m256d tmp_qloop_106 = _mm256_mul_pd(tmp_qloop_105,_mm256_set_pd(2.0,2.0,2.0,2.0)); + const __m256d tmp_qloop_107 = _mm256_mul_pd(_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q])); + const __m256d tmp_qloop_108 = _mm256_mul_pd(tmp_qloop_107,_mm256_set_pd(2.0,2.0,2.0,2.0)); + const __m256d tmp_qloop_109 = _mm256_mul_pd(_mm256_set_pd(4.0,4.0,4.0,4.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])); + const __m256d tmp_qloop_110 = _mm256_mul_pd(tmp_qloop_109,_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q])); + const __m256d tmp_qloop_111 = _mm256_mul_pd(_mm256_set_pd(4.0,4.0,4.0,4.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])); + const __m256d tmp_qloop_112 = _mm256_mul_pd(tmp_qloop_111,_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])); + const __m256d tmp_qloop_113 = _mm256_mul_pd(tmp_qloop_111,_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q])); + const __m256d tmp_qloop_114 = _mm256_add_pd(tmp_qloop_112,tmp_qloop_113); + const __m256d tmp_qloop_115 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),_mm256_mul_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),tmp_qloop_104),tmp_qloop_106),tmp_qloop_108),tmp_qloop_110),tmp_qloop_114); + const __m256d tmp_qloop_116 = _mm256_mul_pd(_mm256_set_pd(4.0,4.0,4.0,4.0),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q])); + const __m256d tmp_qloop_117 = _mm256_add_pd(tmp_qloop_109,tmp_qloop_116); + const __m256d tmp_qloop_118 = _mm256_mul_pd(_mm256_mul_pd(rho_dof_8,tmp_qloop_109),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)); + const __m256d tmp_qloop_119 = _mm256_mul_pd(rho_dof_0,_mm256_add_pd(_mm256_add_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),tmp_qloop_111),tmp_qloop_117)); + const __m256d tmp_qloop_120 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(rho_dof_7,tmp_qloop_116),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),tmp_qloop_119); + const __m256d tmp_qloop_121 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(rho_dof_1,_mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),tmp_qloop_111)),_mm256_mul_pd(rho_dof_9,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_117,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_set_pd(-8.0,-8.0,-8.0,-8.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]))),_mm256_set_pd(4.0,4.0,4.0,4.0)))),_mm256_mul_pd(rho_dof_5,tmp_qloop_116)),_mm256_mul_pd(rho_dof_6,tmp_qloop_109)),tmp_qloop_118),tmp_qloop_120); + const __m256d tmp_qloop_122 = _mm256_add_pd(_mm256_set_pd(-4.0,-4.0,-4.0,-4.0),tmp_qloop_111); + const __m256d tmp_qloop_123 = _mm256_mul_pd(_mm256_mul_pd(rho_dof_9,tmp_qloop_111),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)); + const __m256d tmp_qloop_124 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(rho_dof_2,_mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),tmp_qloop_109)),_mm256_mul_pd(rho_dof_8,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_116,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_122,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(_mm256_set_pd(-8.0,-8.0,-8.0,-8.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))))),_mm256_mul_pd(rho_dof_4,tmp_qloop_116)),_mm256_mul_pd(rho_dof_6,tmp_qloop_111)),tmp_qloop_120),tmp_qloop_123); + const __m256d tmp_qloop_125 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(rho_dof_3,_mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),tmp_qloop_116)),_mm256_mul_pd(rho_dof_7,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_109,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_122,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(_mm256_set_pd(-8.0,-8.0,-8.0,-8.0),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]))))),_mm256_mul_pd(rho_dof_4,tmp_qloop_109)),_mm256_mul_pd(rho_dof_5,tmp_qloop_111)),tmp_qloop_118),tmp_qloop_119),tmp_qloop_123); + const __m256d tmp_qloop_127 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),tmp_qloop_104); + const __m256d tmp_qloop_128 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])),tmp_qloop_106); + const __m256d tmp_qloop_129 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q])),tmp_qloop_108); + const __m256d tmp_qloop_130 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_110,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_113,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(tmp_qloop_107,_mm256_set_pd(-4.0,-4.0,-4.0,-4.0))),tmp_qloop_116); + const __m256d tmp_qloop_131 = _mm256_mul_pd(tmp_qloop_105,_mm256_set_pd(4.0,4.0,4.0,4.0)); + const __m256d tmp_qloop_132 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_110,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_112,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(tmp_qloop_131,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_109); + const __m256d tmp_qloop_133 = _mm256_mul_pd(tmp_qloop_103,_mm256_set_pd(4.0,4.0,4.0,4.0)); + const __m256d tmp_qloop_134 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_114,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_133,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_111); + const __m256d tmp_qloop_158 = _mm256_mul_pd(tmp_qloop_105,tmp_qloop_111); + const __m256d jac_blending_0_0 = _mm256_mul_pd(tmp_qloop_36,tmp_qloop_38); + const __m256d jac_blending_0_1 = _mm256_mul_pd(tmp_qloop_18,tmp_qloop_44); + const __m256d jac_blending_0_2 = _mm256_mul_pd(tmp_qloop_18,tmp_qloop_50); + const __m256d jac_blending_1_0 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_52); + const __m256d jac_blending_1_1 = _mm256_mul_pd(tmp_qloop_43,tmp_qloop_55); + const __m256d tmp_qloop_63 = _mm256_mul_pd(jac_blending_0_2,jac_blending_1_1); + const __m256d jac_blending_1_2 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_50); + const __m256d tmp_qloop_60 = _mm256_mul_pd(jac_blending_0_1,jac_blending_1_2); + const __m256d jac_blending_2_0 = _mm256_mul_pd(tmp_qloop_22,tmp_qloop_52); + const __m256d jac_blending_2_1 = _mm256_mul_pd(tmp_qloop_22,tmp_qloop_44); + const __m256d tmp_qloop_61 = _mm256_mul_pd(jac_blending_1_2,jac_blending_2_1); + const __m256d jac_blending_2_2 = _mm256_mul_pd(tmp_qloop_43,tmp_qloop_58); + const __m256d tmp_qloop_59 = _mm256_mul_pd(jac_blending_1_1,jac_blending_2_2); + const __m256d tmp_qloop_62 = _mm256_mul_pd(jac_blending_0_1,jac_blending_2_2); + const __m256d tmp_qloop_64 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(jac_blending_0_0,tmp_qloop_59),_mm256_mul_pd(jac_blending_2_0,tmp_qloop_60)),_mm256_mul_pd(_mm256_mul_pd(jac_blending_0_0,tmp_qloop_61),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(_mm256_mul_pd(jac_blending_1_0,tmp_qloop_62),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(_mm256_mul_pd(jac_blending_2_0,tmp_qloop_63),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(_mm256_mul_pd(jac_blending_0_2,jac_blending_1_0),jac_blending_2_1)); + const __m256d tmp_qloop_65 = _mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),tmp_qloop_64); + const __m256d abs_det_jac_blending = tmp_qloop_64; + const __m256d tmp_qloop_135 = _mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(abs_det_jac_blending,_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(rho_dof_0,tmp_qloop_115),_mm256_mul_pd(rho_dof_1,tmp_qloop_127)),_mm256_mul_pd(rho_dof_2,tmp_qloop_128)),_mm256_mul_pd(rho_dof_3,tmp_qloop_129)),_mm256_mul_pd(rho_dof_4,tmp_qloop_110)),_mm256_mul_pd(rho_dof_5,tmp_qloop_113)),_mm256_mul_pd(rho_dof_6,tmp_qloop_112)),_mm256_mul_pd(rho_dof_7,tmp_qloop_130)),_mm256_mul_pd(rho_dof_8,tmp_qloop_132)),_mm256_mul_pd(rho_dof_9,tmp_qloop_134)))),_mm256_set_pd(_data_q_w[q],_data_q_w[q],_data_q_w[q],_data_q_w[q])),_mm256_set_pd(abs_det_jac_affine_BLUE_UP,abs_det_jac_affine_BLUE_UP,abs_det_jac_affine_BLUE_UP,abs_det_jac_affine_BLUE_UP)); + const __m256d tmp_qloop_136 = _mm256_mul_pd(tmp_qloop_135,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]))),_mm256_set_pd(1.0,1.0,1.0,1.0))); + const __m256d tmp_qloop_142 = _mm256_mul_pd(tmp_qloop_135,_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])); + const __m256d tmp_qloop_144 = _mm256_mul_pd(tmp_qloop_135,_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q])); + const __m256d tmp_qloop_147 = _mm256_mul_pd(tmp_qloop_135,_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])); + const __m256d tmp_qloop_159 = _mm256_mul_pd(tmp_qloop_135,tmp_qloop_158); + const __m256d tmp_qloop_161 = _mm256_mul_pd(tmp_qloop_107,tmp_qloop_135); + const __m256d jac_blending_inv_0_0 = _mm256_mul_pd(tmp_qloop_65,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_61,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),tmp_qloop_59)); + const __m256d jac_blending_inv_0_1 = _mm256_mul_pd(tmp_qloop_65,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_62,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(jac_blending_0_2,jac_blending_2_1))); + const __m256d jac_blending_inv_0_2 = _mm256_mul_pd(tmp_qloop_65,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_63,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),tmp_qloop_60)); + const __m256d jac_blending_inv_1_0 = _mm256_mul_pd(tmp_qloop_65,_mm256_add_pd(_mm256_mul_pd(jac_blending_1_2,jac_blending_2_0),_mm256_mul_pd(_mm256_mul_pd(jac_blending_1_0,jac_blending_2_2),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))); + const __m256d jac_blending_inv_1_1 = _mm256_mul_pd(tmp_qloop_65,_mm256_add_pd(_mm256_mul_pd(jac_blending_0_0,jac_blending_2_2),_mm256_mul_pd(_mm256_mul_pd(jac_blending_0_2,jac_blending_2_0),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))); + const __m256d jac_blending_inv_1_2 = _mm256_mul_pd(tmp_qloop_65,_mm256_add_pd(_mm256_mul_pd(jac_blending_0_2,jac_blending_1_0),_mm256_mul_pd(_mm256_mul_pd(jac_blending_0_0,jac_blending_1_2),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))); + const __m256d jac_blending_inv_2_0 = _mm256_mul_pd(tmp_qloop_65,_mm256_add_pd(_mm256_mul_pd(jac_blending_1_0,jac_blending_2_1),_mm256_mul_pd(_mm256_mul_pd(jac_blending_1_1,jac_blending_2_0),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))); + const __m256d tmp_qloop_126 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_121,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,_mm256_set_pd(jac_affine_inv_0_0_BLUE_UP,jac_affine_inv_0_0_BLUE_UP,jac_affine_inv_0_0_BLUE_UP,jac_affine_inv_0_0_BLUE_UP)),_mm256_mul_pd(jac_blending_inv_1_0,_mm256_set_pd(jac_affine_inv_0_1_BLUE_UP,jac_affine_inv_0_1_BLUE_UP,jac_affine_inv_0_1_BLUE_UP,jac_affine_inv_0_1_BLUE_UP))),_mm256_mul_pd(jac_blending_inv_2_0,_mm256_set_pd(jac_affine_inv_0_2_BLUE_UP,jac_affine_inv_0_2_BLUE_UP,jac_affine_inv_0_2_BLUE_UP,jac_affine_inv_0_2_BLUE_UP)))),_mm256_mul_pd(tmp_qloop_124,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,_mm256_set_pd(jac_affine_inv_1_0_BLUE_UP,jac_affine_inv_1_0_BLUE_UP,jac_affine_inv_1_0_BLUE_UP,jac_affine_inv_1_0_BLUE_UP)),_mm256_mul_pd(jac_blending_inv_1_0,_mm256_set_pd(jac_affine_inv_1_1_BLUE_UP,jac_affine_inv_1_1_BLUE_UP,jac_affine_inv_1_1_BLUE_UP,jac_affine_inv_1_1_BLUE_UP))),_mm256_mul_pd(jac_blending_inv_2_0,_mm256_set_pd(jac_affine_inv_1_2_BLUE_UP,jac_affine_inv_1_2_BLUE_UP,jac_affine_inv_1_2_BLUE_UP,jac_affine_inv_1_2_BLUE_UP))))),_mm256_mul_pd(tmp_qloop_125,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,_mm256_set_pd(jac_affine_inv_2_0_BLUE_UP,jac_affine_inv_2_0_BLUE_UP,jac_affine_inv_2_0_BLUE_UP,jac_affine_inv_2_0_BLUE_UP)),_mm256_mul_pd(jac_blending_inv_1_0,_mm256_set_pd(jac_affine_inv_2_1_BLUE_UP,jac_affine_inv_2_1_BLUE_UP,jac_affine_inv_2_1_BLUE_UP,jac_affine_inv_2_1_BLUE_UP))),_mm256_mul_pd(jac_blending_inv_2_0,_mm256_set_pd(jac_affine_inv_2_2_BLUE_UP,jac_affine_inv_2_2_BLUE_UP,jac_affine_inv_2_2_BLUE_UP,jac_affine_inv_2_2_BLUE_UP))))); + const __m256d tmp_qloop_137 = _mm256_mul_pd(tmp_qloop_126,tmp_qloop_136); + const __m256d tmp_qloop_143 = _mm256_mul_pd(tmp_qloop_126,tmp_qloop_142); + const __m256d tmp_qloop_145 = _mm256_mul_pd(tmp_qloop_126,tmp_qloop_144); + const __m256d tmp_qloop_146 = _mm256_mul_pd(tmp_qloop_112,tmp_qloop_145); + const __m256d tmp_qloop_148 = _mm256_mul_pd(tmp_qloop_126,tmp_qloop_147); + const __m256d tmp_qloop_157 = _mm256_mul_pd(tmp_qloop_126,tmp_qloop_135); + const __m256d tmp_qloop_160 = _mm256_mul_pd(tmp_qloop_107,tmp_qloop_157); + const __m256d jac_blending_inv_2_1 = _mm256_mul_pd(tmp_qloop_65,_mm256_add_pd(_mm256_mul_pd(jac_blending_0_1,jac_blending_2_0),_mm256_mul_pd(_mm256_mul_pd(jac_blending_0_0,jac_blending_2_1),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))); + const __m256d tmp_qloop_138 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_121,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,_mm256_set_pd(jac_affine_inv_0_0_BLUE_UP,jac_affine_inv_0_0_BLUE_UP,jac_affine_inv_0_0_BLUE_UP,jac_affine_inv_0_0_BLUE_UP)),_mm256_mul_pd(jac_blending_inv_1_1,_mm256_set_pd(jac_affine_inv_0_1_BLUE_UP,jac_affine_inv_0_1_BLUE_UP,jac_affine_inv_0_1_BLUE_UP,jac_affine_inv_0_1_BLUE_UP))),_mm256_mul_pd(jac_blending_inv_2_1,_mm256_set_pd(jac_affine_inv_0_2_BLUE_UP,jac_affine_inv_0_2_BLUE_UP,jac_affine_inv_0_2_BLUE_UP,jac_affine_inv_0_2_BLUE_UP)))),_mm256_mul_pd(tmp_qloop_124,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,_mm256_set_pd(jac_affine_inv_1_0_BLUE_UP,jac_affine_inv_1_0_BLUE_UP,jac_affine_inv_1_0_BLUE_UP,jac_affine_inv_1_0_BLUE_UP)),_mm256_mul_pd(jac_blending_inv_1_1,_mm256_set_pd(jac_affine_inv_1_1_BLUE_UP,jac_affine_inv_1_1_BLUE_UP,jac_affine_inv_1_1_BLUE_UP,jac_affine_inv_1_1_BLUE_UP))),_mm256_mul_pd(jac_blending_inv_2_1,_mm256_set_pd(jac_affine_inv_1_2_BLUE_UP,jac_affine_inv_1_2_BLUE_UP,jac_affine_inv_1_2_BLUE_UP,jac_affine_inv_1_2_BLUE_UP))))),_mm256_mul_pd(tmp_qloop_125,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,_mm256_set_pd(jac_affine_inv_2_0_BLUE_UP,jac_affine_inv_2_0_BLUE_UP,jac_affine_inv_2_0_BLUE_UP,jac_affine_inv_2_0_BLUE_UP)),_mm256_mul_pd(jac_blending_inv_1_1,_mm256_set_pd(jac_affine_inv_2_1_BLUE_UP,jac_affine_inv_2_1_BLUE_UP,jac_affine_inv_2_1_BLUE_UP,jac_affine_inv_2_1_BLUE_UP))),_mm256_mul_pd(jac_blending_inv_2_1,_mm256_set_pd(jac_affine_inv_2_2_BLUE_UP,jac_affine_inv_2_2_BLUE_UP,jac_affine_inv_2_2_BLUE_UP,jac_affine_inv_2_2_BLUE_UP))))); + const __m256d tmp_qloop_139 = _mm256_mul_pd(tmp_qloop_136,tmp_qloop_138); + const __m256d tmp_qloop_149 = _mm256_mul_pd(tmp_qloop_138,tmp_qloop_142); + const __m256d tmp_qloop_150 = _mm256_mul_pd(tmp_qloop_138,tmp_qloop_144); + const __m256d tmp_qloop_151 = _mm256_mul_pd(tmp_qloop_112,tmp_qloop_150); + const __m256d tmp_qloop_152 = _mm256_mul_pd(tmp_qloop_138,tmp_qloop_147); + const __m256d tmp_qloop_162 = _mm256_mul_pd(tmp_qloop_138,tmp_qloop_161); + const __m256d jac_blending_inv_2_2 = _mm256_mul_pd(tmp_qloop_65,_mm256_add_pd(_mm256_mul_pd(jac_blending_0_0,jac_blending_1_1),_mm256_mul_pd(_mm256_mul_pd(jac_blending_0_1,jac_blending_1_0),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))); + const __m256d tmp_qloop_140 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_121,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_2,_mm256_set_pd(jac_affine_inv_0_0_BLUE_UP,jac_affine_inv_0_0_BLUE_UP,jac_affine_inv_0_0_BLUE_UP,jac_affine_inv_0_0_BLUE_UP)),_mm256_mul_pd(jac_blending_inv_1_2,_mm256_set_pd(jac_affine_inv_0_1_BLUE_UP,jac_affine_inv_0_1_BLUE_UP,jac_affine_inv_0_1_BLUE_UP,jac_affine_inv_0_1_BLUE_UP))),_mm256_mul_pd(jac_blending_inv_2_2,_mm256_set_pd(jac_affine_inv_0_2_BLUE_UP,jac_affine_inv_0_2_BLUE_UP,jac_affine_inv_0_2_BLUE_UP,jac_affine_inv_0_2_BLUE_UP)))),_mm256_mul_pd(tmp_qloop_124,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_2,_mm256_set_pd(jac_affine_inv_1_0_BLUE_UP,jac_affine_inv_1_0_BLUE_UP,jac_affine_inv_1_0_BLUE_UP,jac_affine_inv_1_0_BLUE_UP)),_mm256_mul_pd(jac_blending_inv_1_2,_mm256_set_pd(jac_affine_inv_1_1_BLUE_UP,jac_affine_inv_1_1_BLUE_UP,jac_affine_inv_1_1_BLUE_UP,jac_affine_inv_1_1_BLUE_UP))),_mm256_mul_pd(jac_blending_inv_2_2,_mm256_set_pd(jac_affine_inv_1_2_BLUE_UP,jac_affine_inv_1_2_BLUE_UP,jac_affine_inv_1_2_BLUE_UP,jac_affine_inv_1_2_BLUE_UP))))),_mm256_mul_pd(tmp_qloop_125,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_2,_mm256_set_pd(jac_affine_inv_2_0_BLUE_UP,jac_affine_inv_2_0_BLUE_UP,jac_affine_inv_2_0_BLUE_UP,jac_affine_inv_2_0_BLUE_UP)),_mm256_mul_pd(jac_blending_inv_1_2,_mm256_set_pd(jac_affine_inv_2_1_BLUE_UP,jac_affine_inv_2_1_BLUE_UP,jac_affine_inv_2_1_BLUE_UP,jac_affine_inv_2_1_BLUE_UP))),_mm256_mul_pd(jac_blending_inv_2_2,_mm256_set_pd(jac_affine_inv_2_2_BLUE_UP,jac_affine_inv_2_2_BLUE_UP,jac_affine_inv_2_2_BLUE_UP,jac_affine_inv_2_2_BLUE_UP))))); + const __m256d tmp_qloop_141 = _mm256_mul_pd(tmp_qloop_136,tmp_qloop_140); + const __m256d tmp_qloop_153 = _mm256_mul_pd(tmp_qloop_140,tmp_qloop_142); + const __m256d tmp_qloop_154 = _mm256_mul_pd(tmp_qloop_140,tmp_qloop_144); + const __m256d tmp_qloop_155 = _mm256_mul_pd(tmp_qloop_112,tmp_qloop_154); + const __m256d tmp_qloop_156 = _mm256_mul_pd(tmp_qloop_140,tmp_qloop_147); + const __m256d tmp_qloop_163 = _mm256_mul_pd(tmp_qloop_140,tmp_qloop_161); + const __m256d hessian_blending_0_0_0 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_18,tmp_qloop_67),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_37,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_31,_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_qloop_68,tmp_qloop_68,tmp_qloop_68,tmp_qloop_68)),_mm256_set_pd(tmp_qloop_33,tmp_qloop_33,tmp_qloop_33,tmp_qloop_33))),_mm256_mul_pd(tmp_qloop_19,_mm256_set_pd(tmp_qloop_68,tmp_qloop_68,tmp_qloop_68,tmp_qloop_68))),_mm256_mul_pd(tmp_qloop_35,tmp_qloop_69)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_28,tmp_qloop_69),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17))); + const __m256d hessian_blending_1_0_0 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_25,tmp_qloop_67),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_37,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_19,_mm256_set_pd(tmp_qloop_40,tmp_qloop_40,tmp_qloop_40,tmp_qloop_40)),_mm256_mul_pd(tmp_qloop_25,tmp_qloop_71)),tmp_qloop_70)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17))); + const __m256d hessian_blending_2_0_0 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_22,tmp_qloop_67),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_37,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_19,_mm256_set_pd(tmp_qloop_72,tmp_qloop_72,tmp_qloop_72,tmp_qloop_72)),_mm256_mul_pd(tmp_qloop_22,tmp_qloop_71)),tmp_qloop_73)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17))); + const __m256d hessian_blending_0_0_1 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_18,tmp_qloop_78),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_25,tmp_qloop_37),tmp_qloop_75),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17))); + const __m256d hessian_blending_1_0_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_80,tmp_qloop_81),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_29,tmp_qloop_77),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_52); + const __m256d hessian_blending_2_0_1 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_81,tmp_qloop_83),tmp_qloop_84); + const __m256d hessian_blending_0_0_2 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_77,tmp_qloop_85),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_22,tmp_qloop_37),tmp_qloop_75),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17))); + const __m256d hessian_blending_1_0_2 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_80,tmp_qloop_86),tmp_qloop_84); + const __m256d hessian_blending_2_0_2 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_83,tmp_qloop_86),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_30,tmp_qloop_77),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_52); + const __m256d hessian_blending_0_1_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_87,tmp_qloop_88),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_19,tmp_qloop_89),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_44); + const __m256d hessian_blending_1_1_0 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_18,tmp_qloop_91),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_18,tmp_qloop_37),tmp_qloop_90),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17))); + const __m256d hessian_blending_2_1_0 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_93,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_88,tmp_qloop_92)); + const __m256d hessian_blending_0_1_1 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_18,tmp_qloop_95),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_37,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_29,_mm256_set_pd(tmp_qloop_68,tmp_qloop_68,tmp_qloop_68,tmp_qloop_68)),_mm256_mul_pd(tmp_qloop_54,tmp_qloop_69)),tmp_qloop_94)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17))); + const __m256d hessian_blending_1_1_1 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_25,tmp_qloop_95),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_37,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_31,_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_qloop_40,tmp_qloop_40,tmp_qloop_40,tmp_qloop_40)),_mm256_set_pd(tmp_qloop_53,tmp_qloop_53,tmp_qloop_53,tmp_qloop_53))),_mm256_mul_pd(tmp_qloop_29,_mm256_set_pd(tmp_qloop_40,tmp_qloop_40,tmp_qloop_40,tmp_qloop_40))),_mm256_mul_pd(tmp_qloop_47,tmp_qloop_79)),_mm256_mul_pd(tmp_qloop_54,tmp_qloop_79))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17))); + const __m256d hessian_blending_2_1_1 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_22,tmp_qloop_95),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_37,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_29,_mm256_set_pd(tmp_qloop_72,tmp_qloop_72,tmp_qloop_72,tmp_qloop_72)),_mm256_mul_pd(tmp_qloop_54,tmp_qloop_82)),tmp_qloop_73)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17))); + const __m256d hessian_blending_0_1_2 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_93,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_22,tmp_qloop_37),tmp_qloop_87),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17))); + const __m256d hessian_blending_1_1_2 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_22,tmp_qloop_91),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_22,tmp_qloop_37),tmp_qloop_90),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17))); + const __m256d hessian_blending_2_1_2 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_86,tmp_qloop_92),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_30,tmp_qloop_89),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_44); + const __m256d hessian_blending_0_2_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_88,tmp_qloop_96),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_19,tmp_qloop_97),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_50); + const __m256d hessian_blending_1_2_0 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_100,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_18,tmp_qloop_37),tmp_qloop_98),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17))); + const __m256d hessian_blending_2_2_0 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_85,tmp_qloop_97),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_101,tmp_qloop_18),tmp_qloop_37),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17))); + const __m256d hessian_blending_0_2_1 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_100,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_25,tmp_qloop_37),tmp_qloop_96),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17))); + const __m256d hessian_blending_1_2_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_81,tmp_qloop_98),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_29,tmp_qloop_97),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_50); + const __m256d hessian_blending_2_2_1 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_22,tmp_qloop_99),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_101,tmp_qloop_25),tmp_qloop_37),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17))); + const __m256d hessian_blending_0_2_2 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_102,tmp_qloop_18),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_37,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_30,_mm256_set_pd(tmp_qloop_68,tmp_qloop_68,tmp_qloop_68,tmp_qloop_68)),_mm256_mul_pd(tmp_qloop_57,tmp_qloop_69)),tmp_qloop_94)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17))); + const __m256d hessian_blending_1_2_2 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_102,tmp_qloop_25),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_37,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_30,_mm256_set_pd(tmp_qloop_40,tmp_qloop_40,tmp_qloop_40,tmp_qloop_40)),_mm256_mul_pd(tmp_qloop_57,tmp_qloop_79)),tmp_qloop_70)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17))); + const __m256d hessian_blending_2_2_2 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_102,tmp_qloop_22),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_37,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_31,_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_qloop_72,tmp_qloop_72,tmp_qloop_72,tmp_qloop_72)),_mm256_set_pd(tmp_qloop_46,tmp_qloop_46,tmp_qloop_46,tmp_qloop_46))),_mm256_mul_pd(tmp_qloop_48,_mm256_set_pd(2.0,2.0,2.0,2.0))),_mm256_mul_pd(tmp_qloop_30,_mm256_set_pd(tmp_qloop_72,tmp_qloop_72,tmp_qloop_72,tmp_qloop_72))),_mm256_mul_pd(tmp_qloop_57,tmp_qloop_82))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17))); + const __m256d q_tmp_0_0 = _mm256_mul_pd(tmp_qloop_115,tmp_qloop_137); + const __m256d q_tmp_0_1 = _mm256_mul_pd(tmp_qloop_127,tmp_qloop_137); + const __m256d q_tmp_0_2 = _mm256_mul_pd(tmp_qloop_128,tmp_qloop_137); + const __m256d q_tmp_0_3 = _mm256_mul_pd(tmp_qloop_129,tmp_qloop_137); + const __m256d q_tmp_0_4 = _mm256_mul_pd(tmp_qloop_110,tmp_qloop_137); + const __m256d q_tmp_0_5 = _mm256_mul_pd(tmp_qloop_113,tmp_qloop_137); + const __m256d q_tmp_0_6 = _mm256_mul_pd(tmp_qloop_112,tmp_qloop_137); + const __m256d q_tmp_0_7 = _mm256_mul_pd(tmp_qloop_130,tmp_qloop_137); + const __m256d q_tmp_0_8 = _mm256_mul_pd(tmp_qloop_132,tmp_qloop_137); + const __m256d q_tmp_0_9 = _mm256_mul_pd(tmp_qloop_134,tmp_qloop_137); + const __m256d q_tmp_0_10 = _mm256_mul_pd(tmp_qloop_115,tmp_qloop_139); + const __m256d q_tmp_0_11 = _mm256_mul_pd(tmp_qloop_127,tmp_qloop_139); + const __m256d q_tmp_0_12 = _mm256_mul_pd(tmp_qloop_128,tmp_qloop_139); + const __m256d q_tmp_0_13 = _mm256_mul_pd(tmp_qloop_129,tmp_qloop_139); + const __m256d q_tmp_0_14 = _mm256_mul_pd(tmp_qloop_110,tmp_qloop_139); + const __m256d q_tmp_0_15 = _mm256_mul_pd(tmp_qloop_113,tmp_qloop_139); + const __m256d q_tmp_0_16 = _mm256_mul_pd(tmp_qloop_112,tmp_qloop_139); + const __m256d q_tmp_0_17 = _mm256_mul_pd(tmp_qloop_130,tmp_qloop_139); + const __m256d q_tmp_0_18 = _mm256_mul_pd(tmp_qloop_132,tmp_qloop_139); + const __m256d q_tmp_0_19 = _mm256_mul_pd(tmp_qloop_134,tmp_qloop_139); + const __m256d q_tmp_0_20 = _mm256_mul_pd(tmp_qloop_115,tmp_qloop_141); + const __m256d q_tmp_0_21 = _mm256_mul_pd(tmp_qloop_127,tmp_qloop_141); + const __m256d q_tmp_0_22 = _mm256_mul_pd(tmp_qloop_128,tmp_qloop_141); + const __m256d q_tmp_0_23 = _mm256_mul_pd(tmp_qloop_129,tmp_qloop_141); + const __m256d q_tmp_0_24 = _mm256_mul_pd(tmp_qloop_110,tmp_qloop_141); + const __m256d q_tmp_0_25 = _mm256_mul_pd(tmp_qloop_113,tmp_qloop_141); + const __m256d q_tmp_0_26 = _mm256_mul_pd(tmp_qloop_112,tmp_qloop_141); + const __m256d q_tmp_0_27 = _mm256_mul_pd(tmp_qloop_130,tmp_qloop_141); + const __m256d q_tmp_0_28 = _mm256_mul_pd(tmp_qloop_132,tmp_qloop_141); + const __m256d q_tmp_0_29 = _mm256_mul_pd(tmp_qloop_134,tmp_qloop_141); + const __m256d q_tmp_1_0 = _mm256_mul_pd(tmp_qloop_115,tmp_qloop_143); + const __m256d q_tmp_1_1 = _mm256_mul_pd(tmp_qloop_127,tmp_qloop_143); + const __m256d q_tmp_1_2 = _mm256_mul_pd(tmp_qloop_128,tmp_qloop_143); + const __m256d q_tmp_1_3 = _mm256_mul_pd(tmp_qloop_129,tmp_qloop_143); + const __m256d q_tmp_1_4 = tmp_qloop_146; + const __m256d q_tmp_1_5 = _mm256_mul_pd(tmp_qloop_133,tmp_qloop_145); + const __m256d q_tmp_1_6 = _mm256_mul_pd(tmp_qloop_133,tmp_qloop_148); + const __m256d q_tmp_1_7 = _mm256_mul_pd(tmp_qloop_130,tmp_qloop_143); + const __m256d q_tmp_1_8 = _mm256_mul_pd(tmp_qloop_132,tmp_qloop_143); + const __m256d q_tmp_1_9 = _mm256_mul_pd(tmp_qloop_134,tmp_qloop_143); + const __m256d q_tmp_1_10 = _mm256_mul_pd(tmp_qloop_115,tmp_qloop_149); + const __m256d q_tmp_1_11 = _mm256_mul_pd(tmp_qloop_127,tmp_qloop_149); + const __m256d q_tmp_1_12 = _mm256_mul_pd(tmp_qloop_128,tmp_qloop_149); + const __m256d q_tmp_1_13 = _mm256_mul_pd(tmp_qloop_129,tmp_qloop_149); + const __m256d q_tmp_1_14 = tmp_qloop_151; + const __m256d q_tmp_1_15 = _mm256_mul_pd(tmp_qloop_133,tmp_qloop_150); + const __m256d q_tmp_1_16 = _mm256_mul_pd(tmp_qloop_133,tmp_qloop_152); + const __m256d q_tmp_1_17 = _mm256_mul_pd(tmp_qloop_130,tmp_qloop_149); + const __m256d q_tmp_1_18 = _mm256_mul_pd(tmp_qloop_132,tmp_qloop_149); + const __m256d q_tmp_1_19 = _mm256_mul_pd(tmp_qloop_134,tmp_qloop_149); + const __m256d q_tmp_1_20 = _mm256_mul_pd(tmp_qloop_115,tmp_qloop_153); + const __m256d q_tmp_1_21 = _mm256_mul_pd(tmp_qloop_127,tmp_qloop_153); + const __m256d q_tmp_1_22 = _mm256_mul_pd(tmp_qloop_128,tmp_qloop_153); + const __m256d q_tmp_1_23 = _mm256_mul_pd(tmp_qloop_129,tmp_qloop_153); + const __m256d q_tmp_1_24 = tmp_qloop_155; + const __m256d q_tmp_1_25 = _mm256_mul_pd(tmp_qloop_133,tmp_qloop_154); + const __m256d q_tmp_1_26 = _mm256_mul_pd(tmp_qloop_133,tmp_qloop_156); + const __m256d q_tmp_1_27 = _mm256_mul_pd(tmp_qloop_130,tmp_qloop_153); + const __m256d q_tmp_1_28 = _mm256_mul_pd(tmp_qloop_132,tmp_qloop_153); + const __m256d q_tmp_1_29 = _mm256_mul_pd(tmp_qloop_134,tmp_qloop_153); + const __m256d q_tmp_2_0 = _mm256_mul_pd(tmp_qloop_115,tmp_qloop_148); + const __m256d q_tmp_2_1 = _mm256_mul_pd(tmp_qloop_127,tmp_qloop_148); + const __m256d q_tmp_2_2 = _mm256_mul_pd(tmp_qloop_128,tmp_qloop_148); + const __m256d q_tmp_2_3 = _mm256_mul_pd(tmp_qloop_129,tmp_qloop_148); + const __m256d q_tmp_2_4 = _mm256_mul_pd(tmp_qloop_131,tmp_qloop_145); + const __m256d q_tmp_2_5 = tmp_qloop_146; + const __m256d q_tmp_2_6 = _mm256_mul_pd(tmp_qloop_157,tmp_qloop_158); + const __m256d q_tmp_2_7 = _mm256_mul_pd(tmp_qloop_130,tmp_qloop_148); + const __m256d q_tmp_2_8 = _mm256_mul_pd(tmp_qloop_132,tmp_qloop_148); + const __m256d q_tmp_2_9 = _mm256_mul_pd(tmp_qloop_134,tmp_qloop_148); + const __m256d q_tmp_2_10 = _mm256_mul_pd(tmp_qloop_115,tmp_qloop_152); + const __m256d q_tmp_2_11 = _mm256_mul_pd(tmp_qloop_127,tmp_qloop_152); + const __m256d q_tmp_2_12 = _mm256_mul_pd(tmp_qloop_128,tmp_qloop_152); + const __m256d q_tmp_2_13 = _mm256_mul_pd(tmp_qloop_129,tmp_qloop_152); + const __m256d q_tmp_2_14 = _mm256_mul_pd(tmp_qloop_131,tmp_qloop_150); + const __m256d q_tmp_2_15 = tmp_qloop_151; + const __m256d q_tmp_2_16 = _mm256_mul_pd(tmp_qloop_138,tmp_qloop_159); + const __m256d q_tmp_2_17 = _mm256_mul_pd(tmp_qloop_130,tmp_qloop_152); + const __m256d q_tmp_2_18 = _mm256_mul_pd(tmp_qloop_132,tmp_qloop_152); + const __m256d q_tmp_2_19 = _mm256_mul_pd(tmp_qloop_134,tmp_qloop_152); + const __m256d q_tmp_2_20 = _mm256_mul_pd(tmp_qloop_115,tmp_qloop_156); + const __m256d q_tmp_2_21 = _mm256_mul_pd(tmp_qloop_127,tmp_qloop_156); + const __m256d q_tmp_2_22 = _mm256_mul_pd(tmp_qloop_128,tmp_qloop_156); + const __m256d q_tmp_2_23 = _mm256_mul_pd(tmp_qloop_129,tmp_qloop_156); + const __m256d q_tmp_2_24 = _mm256_mul_pd(tmp_qloop_131,tmp_qloop_154); + const __m256d q_tmp_2_25 = tmp_qloop_155; + const __m256d q_tmp_2_26 = _mm256_mul_pd(tmp_qloop_140,tmp_qloop_159); + const __m256d q_tmp_2_27 = _mm256_mul_pd(tmp_qloop_130,tmp_qloop_156); + const __m256d q_tmp_2_28 = _mm256_mul_pd(tmp_qloop_132,tmp_qloop_156); + const __m256d q_tmp_2_29 = _mm256_mul_pd(tmp_qloop_134,tmp_qloop_156); + const __m256d q_tmp_3_0 = _mm256_mul_pd(tmp_qloop_115,tmp_qloop_145); + const __m256d q_tmp_3_1 = _mm256_mul_pd(tmp_qloop_127,tmp_qloop_145); + const __m256d q_tmp_3_2 = _mm256_mul_pd(tmp_qloop_128,tmp_qloop_145); + const __m256d q_tmp_3_3 = _mm256_mul_pd(tmp_qloop_129,tmp_qloop_145); + const __m256d q_tmp_3_4 = _mm256_mul_pd(tmp_qloop_109,tmp_qloop_160); + const __m256d q_tmp_3_5 = _mm256_mul_pd(tmp_qloop_111,tmp_qloop_160); + const __m256d q_tmp_3_6 = tmp_qloop_146; + const __m256d q_tmp_3_7 = _mm256_mul_pd(tmp_qloop_130,tmp_qloop_145); + const __m256d q_tmp_3_8 = _mm256_mul_pd(tmp_qloop_132,tmp_qloop_145); + const __m256d q_tmp_3_9 = _mm256_mul_pd(tmp_qloop_134,tmp_qloop_145); + const __m256d q_tmp_3_10 = _mm256_mul_pd(tmp_qloop_115,tmp_qloop_150); + const __m256d q_tmp_3_11 = _mm256_mul_pd(tmp_qloop_127,tmp_qloop_150); + const __m256d q_tmp_3_12 = _mm256_mul_pd(tmp_qloop_128,tmp_qloop_150); + const __m256d q_tmp_3_13 = _mm256_mul_pd(tmp_qloop_129,tmp_qloop_150); + const __m256d q_tmp_3_14 = _mm256_mul_pd(tmp_qloop_109,tmp_qloop_162); + const __m256d q_tmp_3_15 = _mm256_mul_pd(tmp_qloop_111,tmp_qloop_162); + const __m256d q_tmp_3_16 = tmp_qloop_151; + const __m256d q_tmp_3_17 = _mm256_mul_pd(tmp_qloop_130,tmp_qloop_150); + const __m256d q_tmp_3_18 = _mm256_mul_pd(tmp_qloop_132,tmp_qloop_150); + const __m256d q_tmp_3_19 = _mm256_mul_pd(tmp_qloop_134,tmp_qloop_150); + const __m256d q_tmp_3_20 = _mm256_mul_pd(tmp_qloop_115,tmp_qloop_154); + const __m256d q_tmp_3_21 = _mm256_mul_pd(tmp_qloop_127,tmp_qloop_154); + const __m256d q_tmp_3_22 = _mm256_mul_pd(tmp_qloop_128,tmp_qloop_154); + const __m256d q_tmp_3_23 = _mm256_mul_pd(tmp_qloop_129,tmp_qloop_154); + const __m256d q_tmp_3_24 = _mm256_mul_pd(tmp_qloop_109,tmp_qloop_163); + const __m256d q_tmp_3_25 = _mm256_mul_pd(tmp_qloop_111,tmp_qloop_163); + const __m256d q_tmp_3_26 = tmp_qloop_155; + const __m256d q_tmp_3_27 = _mm256_mul_pd(tmp_qloop_130,tmp_qloop_154); + const __m256d q_tmp_3_28 = _mm256_mul_pd(tmp_qloop_132,tmp_qloop_154); + const __m256d q_tmp_3_29 = _mm256_mul_pd(tmp_qloop_134,tmp_qloop_154); + q_acc_0_0 = _mm256_add_pd(q_acc_0_0,q_tmp_0_0); + q_acc_0_1 = _mm256_add_pd(q_acc_0_1,q_tmp_0_1); + q_acc_0_2 = _mm256_add_pd(q_acc_0_2,q_tmp_0_2); + q_acc_0_3 = _mm256_add_pd(q_acc_0_3,q_tmp_0_3); + q_acc_0_4 = _mm256_add_pd(q_acc_0_4,q_tmp_0_4); + q_acc_0_5 = _mm256_add_pd(q_acc_0_5,q_tmp_0_5); + q_acc_0_6 = _mm256_add_pd(q_acc_0_6,q_tmp_0_6); + q_acc_0_7 = _mm256_add_pd(q_acc_0_7,q_tmp_0_7); + q_acc_0_8 = _mm256_add_pd(q_acc_0_8,q_tmp_0_8); + q_acc_0_9 = _mm256_add_pd(q_acc_0_9,q_tmp_0_9); + q_acc_0_10 = _mm256_add_pd(q_acc_0_10,q_tmp_0_10); + q_acc_0_11 = _mm256_add_pd(q_acc_0_11,q_tmp_0_11); + q_acc_0_12 = _mm256_add_pd(q_acc_0_12,q_tmp_0_12); + q_acc_0_13 = _mm256_add_pd(q_acc_0_13,q_tmp_0_13); + q_acc_0_14 = _mm256_add_pd(q_acc_0_14,q_tmp_0_14); + q_acc_0_15 = _mm256_add_pd(q_acc_0_15,q_tmp_0_15); + q_acc_0_16 = _mm256_add_pd(q_acc_0_16,q_tmp_0_16); + q_acc_0_17 = _mm256_add_pd(q_acc_0_17,q_tmp_0_17); + q_acc_0_18 = _mm256_add_pd(q_acc_0_18,q_tmp_0_18); + q_acc_0_19 = _mm256_add_pd(q_acc_0_19,q_tmp_0_19); + q_acc_0_20 = _mm256_add_pd(q_acc_0_20,q_tmp_0_20); + q_acc_0_21 = _mm256_add_pd(q_acc_0_21,q_tmp_0_21); + q_acc_0_22 = _mm256_add_pd(q_acc_0_22,q_tmp_0_22); + q_acc_0_23 = _mm256_add_pd(q_acc_0_23,q_tmp_0_23); + q_acc_0_24 = _mm256_add_pd(q_acc_0_24,q_tmp_0_24); + q_acc_0_25 = _mm256_add_pd(q_acc_0_25,q_tmp_0_25); + q_acc_0_26 = _mm256_add_pd(q_acc_0_26,q_tmp_0_26); + q_acc_0_27 = _mm256_add_pd(q_acc_0_27,q_tmp_0_27); + q_acc_0_28 = _mm256_add_pd(q_acc_0_28,q_tmp_0_28); + q_acc_0_29 = _mm256_add_pd(q_acc_0_29,q_tmp_0_29); + q_acc_1_0 = _mm256_add_pd(q_acc_1_0,q_tmp_1_0); + q_acc_1_1 = _mm256_add_pd(q_acc_1_1,q_tmp_1_1); + q_acc_1_2 = _mm256_add_pd(q_acc_1_2,q_tmp_1_2); + q_acc_1_3 = _mm256_add_pd(q_acc_1_3,q_tmp_1_3); + q_acc_1_4 = _mm256_add_pd(q_acc_1_4,q_tmp_1_4); + q_acc_1_5 = _mm256_add_pd(q_acc_1_5,q_tmp_1_5); + q_acc_1_6 = _mm256_add_pd(q_acc_1_6,q_tmp_1_6); + q_acc_1_7 = _mm256_add_pd(q_acc_1_7,q_tmp_1_7); + q_acc_1_8 = _mm256_add_pd(q_acc_1_8,q_tmp_1_8); + q_acc_1_9 = _mm256_add_pd(q_acc_1_9,q_tmp_1_9); + q_acc_1_10 = _mm256_add_pd(q_acc_1_10,q_tmp_1_10); + q_acc_1_11 = _mm256_add_pd(q_acc_1_11,q_tmp_1_11); + q_acc_1_12 = _mm256_add_pd(q_acc_1_12,q_tmp_1_12); + q_acc_1_13 = _mm256_add_pd(q_acc_1_13,q_tmp_1_13); + q_acc_1_14 = _mm256_add_pd(q_acc_1_14,q_tmp_1_14); + q_acc_1_15 = _mm256_add_pd(q_acc_1_15,q_tmp_1_15); + q_acc_1_16 = _mm256_add_pd(q_acc_1_16,q_tmp_1_16); + q_acc_1_17 = _mm256_add_pd(q_acc_1_17,q_tmp_1_17); + q_acc_1_18 = _mm256_add_pd(q_acc_1_18,q_tmp_1_18); + q_acc_1_19 = _mm256_add_pd(q_acc_1_19,q_tmp_1_19); + q_acc_1_20 = _mm256_add_pd(q_acc_1_20,q_tmp_1_20); + q_acc_1_21 = _mm256_add_pd(q_acc_1_21,q_tmp_1_21); + q_acc_1_22 = _mm256_add_pd(q_acc_1_22,q_tmp_1_22); + q_acc_1_23 = _mm256_add_pd(q_acc_1_23,q_tmp_1_23); + q_acc_1_24 = _mm256_add_pd(q_acc_1_24,q_tmp_1_24); + q_acc_1_25 = _mm256_add_pd(q_acc_1_25,q_tmp_1_25); + q_acc_1_26 = _mm256_add_pd(q_acc_1_26,q_tmp_1_26); + q_acc_1_27 = _mm256_add_pd(q_acc_1_27,q_tmp_1_27); + q_acc_1_28 = _mm256_add_pd(q_acc_1_28,q_tmp_1_28); + q_acc_1_29 = _mm256_add_pd(q_acc_1_29,q_tmp_1_29); + q_acc_2_0 = _mm256_add_pd(q_acc_2_0,q_tmp_2_0); + q_acc_2_1 = _mm256_add_pd(q_acc_2_1,q_tmp_2_1); + q_acc_2_2 = _mm256_add_pd(q_acc_2_2,q_tmp_2_2); + q_acc_2_3 = _mm256_add_pd(q_acc_2_3,q_tmp_2_3); + q_acc_2_4 = _mm256_add_pd(q_acc_2_4,q_tmp_2_4); + q_acc_2_5 = _mm256_add_pd(q_acc_2_5,q_tmp_2_5); + q_acc_2_6 = _mm256_add_pd(q_acc_2_6,q_tmp_2_6); + q_acc_2_7 = _mm256_add_pd(q_acc_2_7,q_tmp_2_7); + q_acc_2_8 = _mm256_add_pd(q_acc_2_8,q_tmp_2_8); + q_acc_2_9 = _mm256_add_pd(q_acc_2_9,q_tmp_2_9); + q_acc_2_10 = _mm256_add_pd(q_acc_2_10,q_tmp_2_10); + q_acc_2_11 = _mm256_add_pd(q_acc_2_11,q_tmp_2_11); + q_acc_2_12 = _mm256_add_pd(q_acc_2_12,q_tmp_2_12); + q_acc_2_13 = _mm256_add_pd(q_acc_2_13,q_tmp_2_13); + q_acc_2_14 = _mm256_add_pd(q_acc_2_14,q_tmp_2_14); + q_acc_2_15 = _mm256_add_pd(q_acc_2_15,q_tmp_2_15); + q_acc_2_16 = _mm256_add_pd(q_acc_2_16,q_tmp_2_16); + q_acc_2_17 = _mm256_add_pd(q_acc_2_17,q_tmp_2_17); + q_acc_2_18 = _mm256_add_pd(q_acc_2_18,q_tmp_2_18); + q_acc_2_19 = _mm256_add_pd(q_acc_2_19,q_tmp_2_19); + q_acc_2_20 = _mm256_add_pd(q_acc_2_20,q_tmp_2_20); + q_acc_2_21 = _mm256_add_pd(q_acc_2_21,q_tmp_2_21); + q_acc_2_22 = _mm256_add_pd(q_acc_2_22,q_tmp_2_22); + q_acc_2_23 = _mm256_add_pd(q_acc_2_23,q_tmp_2_23); + q_acc_2_24 = _mm256_add_pd(q_acc_2_24,q_tmp_2_24); + q_acc_2_25 = _mm256_add_pd(q_acc_2_25,q_tmp_2_25); + q_acc_2_26 = _mm256_add_pd(q_acc_2_26,q_tmp_2_26); + q_acc_2_27 = _mm256_add_pd(q_acc_2_27,q_tmp_2_27); + q_acc_2_28 = _mm256_add_pd(q_acc_2_28,q_tmp_2_28); + q_acc_2_29 = _mm256_add_pd(q_acc_2_29,q_tmp_2_29); + q_acc_3_0 = _mm256_add_pd(q_acc_3_0,q_tmp_3_0); + q_acc_3_1 = _mm256_add_pd(q_acc_3_1,q_tmp_3_1); + q_acc_3_2 = _mm256_add_pd(q_acc_3_2,q_tmp_3_2); + q_acc_3_3 = _mm256_add_pd(q_acc_3_3,q_tmp_3_3); + q_acc_3_4 = _mm256_add_pd(q_acc_3_4,q_tmp_3_4); + q_acc_3_5 = _mm256_add_pd(q_acc_3_5,q_tmp_3_5); + q_acc_3_6 = _mm256_add_pd(q_acc_3_6,q_tmp_3_6); + q_acc_3_7 = _mm256_add_pd(q_acc_3_7,q_tmp_3_7); + q_acc_3_8 = _mm256_add_pd(q_acc_3_8,q_tmp_3_8); + q_acc_3_9 = _mm256_add_pd(q_acc_3_9,q_tmp_3_9); + q_acc_3_10 = _mm256_add_pd(q_acc_3_10,q_tmp_3_10); + q_acc_3_11 = _mm256_add_pd(q_acc_3_11,q_tmp_3_11); + q_acc_3_12 = _mm256_add_pd(q_acc_3_12,q_tmp_3_12); + q_acc_3_13 = _mm256_add_pd(q_acc_3_13,q_tmp_3_13); + q_acc_3_14 = _mm256_add_pd(q_acc_3_14,q_tmp_3_14); + q_acc_3_15 = _mm256_add_pd(q_acc_3_15,q_tmp_3_15); + q_acc_3_16 = _mm256_add_pd(q_acc_3_16,q_tmp_3_16); + q_acc_3_17 = _mm256_add_pd(q_acc_3_17,q_tmp_3_17); + q_acc_3_18 = _mm256_add_pd(q_acc_3_18,q_tmp_3_18); + q_acc_3_19 = _mm256_add_pd(q_acc_3_19,q_tmp_3_19); + q_acc_3_20 = _mm256_add_pd(q_acc_3_20,q_tmp_3_20); + q_acc_3_21 = _mm256_add_pd(q_acc_3_21,q_tmp_3_21); + q_acc_3_22 = _mm256_add_pd(q_acc_3_22,q_tmp_3_22); + q_acc_3_23 = _mm256_add_pd(q_acc_3_23,q_tmp_3_23); + q_acc_3_24 = _mm256_add_pd(q_acc_3_24,q_tmp_3_24); + q_acc_3_25 = _mm256_add_pd(q_acc_3_25,q_tmp_3_25); + q_acc_3_26 = _mm256_add_pd(q_acc_3_26,q_tmp_3_26); + q_acc_3_27 = _mm256_add_pd(q_acc_3_27,q_tmp_3_27); + q_acc_3_28 = _mm256_add_pd(q_acc_3_28,q_tmp_3_28); + q_acc_3_29 = _mm256_add_pd(q_acc_3_29,q_tmp_3_29); + } + const __m256d elMatVec_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_0_0,src_dof_0),_mm256_mul_pd(q_acc_0_1,src_dof_1)),_mm256_mul_pd(q_acc_0_10,src_dof_10)),_mm256_mul_pd(q_acc_0_11,src_dof_11)),_mm256_mul_pd(q_acc_0_12,src_dof_12)),_mm256_mul_pd(q_acc_0_13,src_dof_13)),_mm256_mul_pd(q_acc_0_14,src_dof_14)),_mm256_mul_pd(q_acc_0_15,src_dof_15)),_mm256_mul_pd(q_acc_0_16,src_dof_16)),_mm256_mul_pd(q_acc_0_17,src_dof_17)),_mm256_mul_pd(q_acc_0_18,src_dof_18)),_mm256_mul_pd(q_acc_0_19,src_dof_19)),_mm256_mul_pd(q_acc_0_2,src_dof_2)),_mm256_mul_pd(q_acc_0_20,src_dof_20)),_mm256_mul_pd(q_acc_0_21,src_dof_21)),_mm256_mul_pd(q_acc_0_22,src_dof_22)),_mm256_mul_pd(q_acc_0_23,src_dof_23)),_mm256_mul_pd(q_acc_0_24,src_dof_24)),_mm256_mul_pd(q_acc_0_25,src_dof_25)),_mm256_mul_pd(q_acc_0_26,src_dof_26)),_mm256_mul_pd(q_acc_0_27,src_dof_27)),_mm256_mul_pd(q_acc_0_28,src_dof_28)),_mm256_mul_pd(q_acc_0_29,src_dof_29)),_mm256_mul_pd(q_acc_0_3,src_dof_3)),_mm256_mul_pd(q_acc_0_4,src_dof_4)),_mm256_mul_pd(q_acc_0_5,src_dof_5)),_mm256_mul_pd(q_acc_0_6,src_dof_6)),_mm256_mul_pd(q_acc_0_7,src_dof_7)),_mm256_mul_pd(q_acc_0_8,src_dof_8)),_mm256_mul_pd(q_acc_0_9,src_dof_9)); + const __m256d elMatVec_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_1_0,src_dof_0),_mm256_mul_pd(q_acc_1_1,src_dof_1)),_mm256_mul_pd(q_acc_1_10,src_dof_10)),_mm256_mul_pd(q_acc_1_11,src_dof_11)),_mm256_mul_pd(q_acc_1_12,src_dof_12)),_mm256_mul_pd(q_acc_1_13,src_dof_13)),_mm256_mul_pd(q_acc_1_14,src_dof_14)),_mm256_mul_pd(q_acc_1_15,src_dof_15)),_mm256_mul_pd(q_acc_1_16,src_dof_16)),_mm256_mul_pd(q_acc_1_17,src_dof_17)),_mm256_mul_pd(q_acc_1_18,src_dof_18)),_mm256_mul_pd(q_acc_1_19,src_dof_19)),_mm256_mul_pd(q_acc_1_2,src_dof_2)),_mm256_mul_pd(q_acc_1_20,src_dof_20)),_mm256_mul_pd(q_acc_1_21,src_dof_21)),_mm256_mul_pd(q_acc_1_22,src_dof_22)),_mm256_mul_pd(q_acc_1_23,src_dof_23)),_mm256_mul_pd(q_acc_1_24,src_dof_24)),_mm256_mul_pd(q_acc_1_25,src_dof_25)),_mm256_mul_pd(q_acc_1_26,src_dof_26)),_mm256_mul_pd(q_acc_1_27,src_dof_27)),_mm256_mul_pd(q_acc_1_28,src_dof_28)),_mm256_mul_pd(q_acc_1_29,src_dof_29)),_mm256_mul_pd(q_acc_1_3,src_dof_3)),_mm256_mul_pd(q_acc_1_4,src_dof_4)),_mm256_mul_pd(q_acc_1_5,src_dof_5)),_mm256_mul_pd(q_acc_1_6,src_dof_6)),_mm256_mul_pd(q_acc_1_7,src_dof_7)),_mm256_mul_pd(q_acc_1_8,src_dof_8)),_mm256_mul_pd(q_acc_1_9,src_dof_9)); + const __m256d elMatVec_2 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_2_0,src_dof_0),_mm256_mul_pd(q_acc_2_1,src_dof_1)),_mm256_mul_pd(q_acc_2_10,src_dof_10)),_mm256_mul_pd(q_acc_2_11,src_dof_11)),_mm256_mul_pd(q_acc_2_12,src_dof_12)),_mm256_mul_pd(q_acc_2_13,src_dof_13)),_mm256_mul_pd(q_acc_2_14,src_dof_14)),_mm256_mul_pd(q_acc_2_15,src_dof_15)),_mm256_mul_pd(q_acc_2_16,src_dof_16)),_mm256_mul_pd(q_acc_2_17,src_dof_17)),_mm256_mul_pd(q_acc_2_18,src_dof_18)),_mm256_mul_pd(q_acc_2_19,src_dof_19)),_mm256_mul_pd(q_acc_2_2,src_dof_2)),_mm256_mul_pd(q_acc_2_20,src_dof_20)),_mm256_mul_pd(q_acc_2_21,src_dof_21)),_mm256_mul_pd(q_acc_2_22,src_dof_22)),_mm256_mul_pd(q_acc_2_23,src_dof_23)),_mm256_mul_pd(q_acc_2_24,src_dof_24)),_mm256_mul_pd(q_acc_2_25,src_dof_25)),_mm256_mul_pd(q_acc_2_26,src_dof_26)),_mm256_mul_pd(q_acc_2_27,src_dof_27)),_mm256_mul_pd(q_acc_2_28,src_dof_28)),_mm256_mul_pd(q_acc_2_29,src_dof_29)),_mm256_mul_pd(q_acc_2_3,src_dof_3)),_mm256_mul_pd(q_acc_2_4,src_dof_4)),_mm256_mul_pd(q_acc_2_5,src_dof_5)),_mm256_mul_pd(q_acc_2_6,src_dof_6)),_mm256_mul_pd(q_acc_2_7,src_dof_7)),_mm256_mul_pd(q_acc_2_8,src_dof_8)),_mm256_mul_pd(q_acc_2_9,src_dof_9)); + const __m256d elMatVec_3 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_3_0,src_dof_0),_mm256_mul_pd(q_acc_3_1,src_dof_1)),_mm256_mul_pd(q_acc_3_10,src_dof_10)),_mm256_mul_pd(q_acc_3_11,src_dof_11)),_mm256_mul_pd(q_acc_3_12,src_dof_12)),_mm256_mul_pd(q_acc_3_13,src_dof_13)),_mm256_mul_pd(q_acc_3_14,src_dof_14)),_mm256_mul_pd(q_acc_3_15,src_dof_15)),_mm256_mul_pd(q_acc_3_16,src_dof_16)),_mm256_mul_pd(q_acc_3_17,src_dof_17)),_mm256_mul_pd(q_acc_3_18,src_dof_18)),_mm256_mul_pd(q_acc_3_19,src_dof_19)),_mm256_mul_pd(q_acc_3_2,src_dof_2)),_mm256_mul_pd(q_acc_3_20,src_dof_20)),_mm256_mul_pd(q_acc_3_21,src_dof_21)),_mm256_mul_pd(q_acc_3_22,src_dof_22)),_mm256_mul_pd(q_acc_3_23,src_dof_23)),_mm256_mul_pd(q_acc_3_24,src_dof_24)),_mm256_mul_pd(q_acc_3_25,src_dof_25)),_mm256_mul_pd(q_acc_3_26,src_dof_26)),_mm256_mul_pd(q_acc_3_27,src_dof_27)),_mm256_mul_pd(q_acc_3_28,src_dof_28)),_mm256_mul_pd(q_acc_3_29,src_dof_29)),_mm256_mul_pd(q_acc_3_3,src_dof_3)),_mm256_mul_pd(q_acc_3_4,src_dof_4)),_mm256_mul_pd(q_acc_3_5,src_dof_5)),_mm256_mul_pd(q_acc_3_6,src_dof_6)),_mm256_mul_pd(q_acc_3_7,src_dof_7)),_mm256_mul_pd(q_acc_3_8,src_dof_8)),_mm256_mul_pd(q_acc_3_9,src_dof_9)); + _mm256_storeu_pd(&_data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1],_mm256_add_pd(elMatVec_0,_mm256_loadu_pd(& _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]))); + _mm256_storeu_pd(&_data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))],_mm256_add_pd(elMatVec_1,_mm256_loadu_pd(& _data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]))); + _mm256_storeu_pd(&_data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1],_mm256_add_pd(elMatVec_2,_mm256_loadu_pd(& _data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]))); + _mm256_storeu_pd(&_data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1],_mm256_add_pd(elMatVec_3,_mm256_loadu_pd(& _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]))); + } + for (int64_t ctr_0 = (int64_t)((-ctr_1 - ctr_2 + micro_edges_per_macro_edge - 1) / (4)) * (4); ctr_0 < -ctr_1 - ctr_2 + micro_edges_per_macro_edge - 1; ctr_0 += 1) + { + + const int64_t phantom_ctr_0 = ctr_0; + real_t _data_float_loop_ctr_array_dim_0[4]; + _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0; + _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1; + _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2; + _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3; + real_t _data_float_loop_ctr_array_dim_1[4]; + _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1; + real_t _data_float_loop_ctr_array_dim_2[4]; + _data_float_loop_ctr_array_dim_2[0] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[1] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[2] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[3] = (real_t) ctr_2; + + const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_0_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_2_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_3_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_3_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_3_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t src_dof_0 = _data_src_vertex_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]; + const real_t src_dof_1 = _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]; + const real_t src_dof_2 = _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]; + const real_t src_dof_3 = _data_src_vertex_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]; + const real_t src_dof_4 = _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]; + const real_t src_dof_5 = _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge - 1)*(micro_edges_per_macro_edge + 1)) / (6)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]; + const real_t src_dof_6 = _data_src_edge_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t src_dof_7 = _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]; + const real_t src_dof_8 = _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]; + const real_t src_dof_9 = _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t src_dof_10 = _data_src_vertex_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]; + const real_t src_dof_11 = _data_src_vertex_1[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]; + const real_t src_dof_12 = _data_src_vertex_1[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]; + const real_t src_dof_13 = _data_src_vertex_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]; + const real_t src_dof_14 = _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]; + const real_t src_dof_15 = _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge - 1)*(micro_edges_per_macro_edge + 1)) / (6)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]; + const real_t src_dof_16 = _data_src_edge_1[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t src_dof_17 = _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]; + const real_t src_dof_18 = _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]; + const real_t src_dof_19 = _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t src_dof_20 = _data_src_vertex_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]; + const real_t src_dof_21 = _data_src_vertex_2[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]; + const real_t src_dof_22 = _data_src_vertex_2[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]; + const real_t src_dof_23 = _data_src_vertex_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]; + const real_t src_dof_24 = _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]; + const real_t src_dof_25 = _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge - 1)*(micro_edges_per_macro_edge + 1)) / (6)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]; + const real_t src_dof_26 = _data_src_edge_2[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t src_dof_27 = _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]; + const real_t src_dof_28 = _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]; + const real_t src_dof_29 = _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t rho_dof_0 = _data_rhoVertex[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]; + const real_t rho_dof_1 = _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]; + const real_t rho_dof_2 = _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]; + const real_t rho_dof_3 = _data_rhoVertex[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]; + const real_t rho_dof_4 = _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]; + const real_t rho_dof_5 = _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge - 1)*(micro_edges_per_macro_edge + 1)) / (6)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]; + const real_t rho_dof_6 = _data_rhoEdge[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t rho_dof_7 = _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]; + const real_t rho_dof_8 = _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]; + const real_t rho_dof_9 = _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + real_t q_acc_0_0 = 0.0; + real_t q_acc_0_1 = 0.0; + real_t q_acc_0_2 = 0.0; + real_t q_acc_0_3 = 0.0; + real_t q_acc_0_4 = 0.0; + real_t q_acc_0_5 = 0.0; + real_t q_acc_0_6 = 0.0; + real_t q_acc_0_7 = 0.0; + real_t q_acc_0_8 = 0.0; + real_t q_acc_0_9 = 0.0; + real_t q_acc_0_10 = 0.0; + real_t q_acc_0_11 = 0.0; + real_t q_acc_0_12 = 0.0; + real_t q_acc_0_13 = 0.0; + real_t q_acc_0_14 = 0.0; + real_t q_acc_0_15 = 0.0; + real_t q_acc_0_16 = 0.0; + real_t q_acc_0_17 = 0.0; + real_t q_acc_0_18 = 0.0; + real_t q_acc_0_19 = 0.0; + real_t q_acc_0_20 = 0.0; + real_t q_acc_0_21 = 0.0; + real_t q_acc_0_22 = 0.0; + real_t q_acc_0_23 = 0.0; + real_t q_acc_0_24 = 0.0; + real_t q_acc_0_25 = 0.0; + real_t q_acc_0_26 = 0.0; + real_t q_acc_0_27 = 0.0; + real_t q_acc_0_28 = 0.0; + real_t q_acc_0_29 = 0.0; + real_t q_acc_1_0 = 0.0; + real_t q_acc_1_1 = 0.0; + real_t q_acc_1_2 = 0.0; + real_t q_acc_1_3 = 0.0; + real_t q_acc_1_4 = 0.0; + real_t q_acc_1_5 = 0.0; + real_t q_acc_1_6 = 0.0; + real_t q_acc_1_7 = 0.0; + real_t q_acc_1_8 = 0.0; + real_t q_acc_1_9 = 0.0; + real_t q_acc_1_10 = 0.0; + real_t q_acc_1_11 = 0.0; + real_t q_acc_1_12 = 0.0; + real_t q_acc_1_13 = 0.0; + real_t q_acc_1_14 = 0.0; + real_t q_acc_1_15 = 0.0; + real_t q_acc_1_16 = 0.0; + real_t q_acc_1_17 = 0.0; + real_t q_acc_1_18 = 0.0; + real_t q_acc_1_19 = 0.0; + real_t q_acc_1_20 = 0.0; + real_t q_acc_1_21 = 0.0; + real_t q_acc_1_22 = 0.0; + real_t q_acc_1_23 = 0.0; + real_t q_acc_1_24 = 0.0; + real_t q_acc_1_25 = 0.0; + real_t q_acc_1_26 = 0.0; + real_t q_acc_1_27 = 0.0; + real_t q_acc_1_28 = 0.0; + real_t q_acc_1_29 = 0.0; + real_t q_acc_2_0 = 0.0; + real_t q_acc_2_1 = 0.0; + real_t q_acc_2_2 = 0.0; + real_t q_acc_2_3 = 0.0; + real_t q_acc_2_4 = 0.0; + real_t q_acc_2_5 = 0.0; + real_t q_acc_2_6 = 0.0; + real_t q_acc_2_7 = 0.0; + real_t q_acc_2_8 = 0.0; + real_t q_acc_2_9 = 0.0; + real_t q_acc_2_10 = 0.0; + real_t q_acc_2_11 = 0.0; + real_t q_acc_2_12 = 0.0; + real_t q_acc_2_13 = 0.0; + real_t q_acc_2_14 = 0.0; + real_t q_acc_2_15 = 0.0; + real_t q_acc_2_16 = 0.0; + real_t q_acc_2_17 = 0.0; + real_t q_acc_2_18 = 0.0; + real_t q_acc_2_19 = 0.0; + real_t q_acc_2_20 = 0.0; + real_t q_acc_2_21 = 0.0; + real_t q_acc_2_22 = 0.0; + real_t q_acc_2_23 = 0.0; + real_t q_acc_2_24 = 0.0; + real_t q_acc_2_25 = 0.0; + real_t q_acc_2_26 = 0.0; + real_t q_acc_2_27 = 0.0; + real_t q_acc_2_28 = 0.0; + real_t q_acc_2_29 = 0.0; + real_t q_acc_3_0 = 0.0; + real_t q_acc_3_1 = 0.0; + real_t q_acc_3_2 = 0.0; + real_t q_acc_3_3 = 0.0; + real_t q_acc_3_4 = 0.0; + real_t q_acc_3_5 = 0.0; + real_t q_acc_3_6 = 0.0; + real_t q_acc_3_7 = 0.0; + real_t q_acc_3_8 = 0.0; + real_t q_acc_3_9 = 0.0; + real_t q_acc_3_10 = 0.0; + real_t q_acc_3_11 = 0.0; + real_t q_acc_3_12 = 0.0; + real_t q_acc_3_13 = 0.0; + real_t q_acc_3_14 = 0.0; + real_t q_acc_3_15 = 0.0; + real_t q_acc_3_16 = 0.0; + real_t q_acc_3_17 = 0.0; + real_t q_acc_3_18 = 0.0; + real_t q_acc_3_19 = 0.0; + real_t q_acc_3_20 = 0.0; + real_t q_acc_3_21 = 0.0; + real_t q_acc_3_22 = 0.0; + real_t q_acc_3_23 = 0.0; + real_t q_acc_3_24 = 0.0; + real_t q_acc_3_25 = 0.0; + real_t q_acc_3_26 = 0.0; + real_t q_acc_3_27 = 0.0; + real_t q_acc_3_28 = 0.0; + real_t q_acc_3_29 = 0.0; + for (int64_t q = 0; q < 5; q += 1) + { + const real_t tmp_qloop_18 = p_affine_0_0 + (-p_affine_0_0 + p_affine_1_0)*_data_q_p_0[q] + (-p_affine_0_0 + p_affine_2_0)*_data_q_p_1[q] + (-p_affine_0_0 + p_affine_3_0)*_data_q_p_2[q]; + const real_t tmp_qloop_19 = (tmp_qloop_18*tmp_qloop_18); + const real_t tmp_qloop_22 = p_affine_0_2 + (-p_affine_0_2 + p_affine_1_2)*_data_q_p_0[q] + (-p_affine_0_2 + p_affine_2_2)*_data_q_p_1[q] + (-p_affine_0_2 + p_affine_3_2)*_data_q_p_2[q]; + const real_t tmp_qloop_23 = -tmp_qloop_12 - tmp_qloop_22; + const real_t tmp_qloop_24 = -tmp_qloop_1 - tmp_qloop_18; + const real_t tmp_qloop_25 = p_affine_0_1 + (-p_affine_0_1 + p_affine_1_1)*_data_q_p_0[q] + (-p_affine_0_1 + p_affine_2_1)*_data_q_p_1[q] + (-p_affine_0_1 + p_affine_3_1)*_data_q_p_2[q]; + const real_t tmp_qloop_26 = -tmp_qloop_25 - tmp_qloop_8; + const real_t tmp_qloop_27 = tmp_qloop_21*(tmp_qloop_10*tmp_qloop_23*tmp_qloop_9 + tmp_qloop_13*tmp_qloop_24*tmp_qloop_6 - tmp_qloop_14*tmp_qloop_24 - tmp_qloop_15*tmp_qloop_26 + tmp_qloop_2*tmp_qloop_26*tmp_qloop_3 - tmp_qloop_23*tmp_qloop_7); + const real_t tmp_qloop_28 = tmp_qloop_20 - tmp_qloop_27; + const real_t tmp_qloop_29 = (tmp_qloop_25*tmp_qloop_25); + const real_t tmp_qloop_30 = (tmp_qloop_22*tmp_qloop_22); + const real_t tmp_qloop_31 = tmp_qloop_19 + tmp_qloop_29 + tmp_qloop_30; + const real_t tmp_qloop_34 = tmp_qloop_18*tmp_qloop_33; + const real_t tmp_qloop_35 = tmp_qloop_28 + tmp_qloop_34; + const real_t tmp_qloop_36 = tmp_qloop_17*(-tmp_qloop_19*tmp_qloop_28 + tmp_qloop_31*tmp_qloop_35); + const real_t tmp_qloop_37 = pow(tmp_qloop_31, -1.5000000000000000); + const real_t tmp_qloop_38 = tmp_qloop_37*1.0; + const real_t tmp_qloop_41 = tmp_qloop_31*tmp_qloop_40; + const real_t tmp_qloop_42 = -tmp_qloop_25*tmp_qloop_28 - tmp_qloop_41; + const real_t tmp_qloop_43 = tmp_qloop_17*tmp_qloop_38; + const real_t tmp_qloop_44 = tmp_qloop_42*tmp_qloop_43; + const real_t tmp_qloop_47 = -tmp_qloop_28; + const real_t tmp_qloop_48 = tmp_qloop_22*tmp_qloop_47; + const real_t tmp_qloop_49 = tmp_qloop_31*tmp_qloop_46 + tmp_qloop_48; + const real_t tmp_qloop_50 = tmp_qloop_43*tmp_qloop_49; + const real_t tmp_qloop_51 = tmp_qloop_18*tmp_qloop_47 + tmp_qloop_31*tmp_qloop_33; + const real_t tmp_qloop_52 = tmp_qloop_43*tmp_qloop_51; + const real_t tmp_qloop_54 = tmp_qloop_25*tmp_qloop_53 + tmp_qloop_28; + const real_t tmp_qloop_55 = tmp_qloop_29*tmp_qloop_47 + tmp_qloop_31*tmp_qloop_54; + const real_t tmp_qloop_56 = tmp_qloop_22*tmp_qloop_46; + const real_t tmp_qloop_57 = tmp_qloop_28 + tmp_qloop_56; + const real_t tmp_qloop_58 = tmp_qloop_30*tmp_qloop_47 + tmp_qloop_31*tmp_qloop_57; + const real_t tmp_qloop_66 = pow(tmp_qloop_31, -2.5000000000000000)*3.0; + const real_t tmp_qloop_67 = tmp_qloop_36*tmp_qloop_66; + const real_t tmp_qloop_69 = tmp_qloop_18*2.0; + const real_t tmp_qloop_70 = -tmp_qloop_41; + const real_t tmp_qloop_71 = tmp_qloop_35*2.0; + const real_t tmp_qloop_73 = -tmp_qloop_31*tmp_qloop_72; + const real_t tmp_qloop_74 = -tmp_qloop_20 + tmp_qloop_27; + const real_t tmp_qloop_75 = tmp_qloop_18*tmp_qloop_68 + tmp_qloop_34*2.0 + tmp_qloop_74; + const real_t tmp_qloop_76 = tmp_qloop_17*tmp_qloop_66; + const real_t tmp_qloop_77 = tmp_qloop_51*tmp_qloop_76; + const real_t tmp_qloop_78 = tmp_qloop_25*tmp_qloop_77; + const real_t tmp_qloop_79 = tmp_qloop_25*2.0; + const real_t tmp_qloop_80 = tmp_qloop_18*tmp_qloop_40 + tmp_qloop_33*tmp_qloop_79; + const real_t tmp_qloop_81 = tmp_qloop_25*tmp_qloop_43; + const real_t tmp_qloop_82 = tmp_qloop_22*2.0; + const real_t tmp_qloop_83 = tmp_qloop_18*tmp_qloop_72 + tmp_qloop_33*tmp_qloop_82; + const real_t tmp_qloop_84 = -tmp_qloop_22*tmp_qloop_78; + const real_t tmp_qloop_85 = tmp_qloop_18*tmp_qloop_22; + const real_t tmp_qloop_86 = tmp_qloop_22*tmp_qloop_43; + const real_t tmp_qloop_87 = tmp_qloop_21*tmp_qloop_25*tmp_qloop_32 - tmp_qloop_40*tmp_qloop_69; + const real_t tmp_qloop_88 = tmp_qloop_18*tmp_qloop_43; + const real_t tmp_qloop_89 = tmp_qloop_42*tmp_qloop_76; + const real_t tmp_qloop_90 = -tmp_qloop_25*tmp_qloop_40 - tmp_qloop_28; + const real_t tmp_qloop_91 = tmp_qloop_25*tmp_qloop_89; + const real_t tmp_qloop_92 = tmp_qloop_25*tmp_qloop_72 - tmp_qloop_40*tmp_qloop_82; + const real_t tmp_qloop_93 = tmp_qloop_85*tmp_qloop_89; + const real_t tmp_qloop_94 = -tmp_qloop_31*tmp_qloop_68; + const real_t tmp_qloop_95 = tmp_qloop_55*tmp_qloop_76; + const real_t tmp_qloop_96 = tmp_qloop_22*tmp_qloop_68 + tmp_qloop_46*tmp_qloop_69; + const real_t tmp_qloop_97 = tmp_qloop_49*tmp_qloop_76; + const real_t tmp_qloop_98 = tmp_qloop_22*tmp_qloop_40 + tmp_qloop_46*tmp_qloop_79; + const real_t tmp_qloop_99 = tmp_qloop_25*tmp_qloop_97; + const real_t tmp_qloop_100 = tmp_qloop_18*tmp_qloop_99; + const real_t tmp_qloop_101 = tmp_qloop_22*tmp_qloop_72 + tmp_qloop_56*2.0 + tmp_qloop_74; + const real_t tmp_qloop_102 = tmp_qloop_58*tmp_qloop_76; + const real_t tmp_qloop_103 = (_data_q_p_0[q]*_data_q_p_0[q]); + const real_t tmp_qloop_104 = tmp_qloop_103*2.0; + const real_t tmp_qloop_105 = (_data_q_p_1[q]*_data_q_p_1[q]); + const real_t tmp_qloop_106 = tmp_qloop_105*2.0; + const real_t tmp_qloop_107 = (_data_q_p_2[q]*_data_q_p_2[q]); + const real_t tmp_qloop_108 = tmp_qloop_107*2.0; + const real_t tmp_qloop_109 = 4.0*_data_q_p_1[q]; + const real_t tmp_qloop_110 = tmp_qloop_109*_data_q_p_2[q]; + const real_t tmp_qloop_111 = 4.0*_data_q_p_0[q]; + const real_t tmp_qloop_112 = tmp_qloop_111*_data_q_p_1[q]; + const real_t tmp_qloop_113 = tmp_qloop_111*_data_q_p_2[q]; + const real_t tmp_qloop_114 = tmp_qloop_112 + tmp_qloop_113; + const real_t tmp_qloop_115 = tmp_qloop_104 + tmp_qloop_106 + tmp_qloop_108 + tmp_qloop_110 + tmp_qloop_114 - 3.0*_data_q_p_0[q] - 3.0*_data_q_p_1[q] - 3.0*_data_q_p_2[q] + 1.0; + const real_t tmp_qloop_116 = 4.0*_data_q_p_2[q]; + const real_t tmp_qloop_117 = tmp_qloop_109 + tmp_qloop_116; + const real_t tmp_qloop_118 = -rho_dof_8*tmp_qloop_109; + const real_t tmp_qloop_119 = rho_dof_0*(tmp_qloop_111 + tmp_qloop_117 - 3.0); + const real_t tmp_qloop_120 = -rho_dof_7*tmp_qloop_116 + tmp_qloop_119; + const real_t tmp_qloop_121 = rho_dof_1*(tmp_qloop_111 - 1.0) + rho_dof_5*tmp_qloop_116 + rho_dof_6*tmp_qloop_109 + rho_dof_9*(-tmp_qloop_117 - 8.0*_data_q_p_0[q] + 4.0) + tmp_qloop_118 + tmp_qloop_120; + const real_t tmp_qloop_122 = tmp_qloop_111 - 4.0; + const real_t tmp_qloop_123 = -rho_dof_9*tmp_qloop_111; + const real_t tmp_qloop_124 = rho_dof_2*(tmp_qloop_109 - 1.0) + rho_dof_4*tmp_qloop_116 + rho_dof_6*tmp_qloop_111 + rho_dof_8*(-tmp_qloop_116 - tmp_qloop_122 - 8.0*_data_q_p_1[q]) + tmp_qloop_120 + tmp_qloop_123; + const real_t tmp_qloop_125 = rho_dof_3*(tmp_qloop_116 - 1.0) + rho_dof_4*tmp_qloop_109 + rho_dof_5*tmp_qloop_111 + rho_dof_7*(-tmp_qloop_109 - tmp_qloop_122 - 8.0*_data_q_p_2[q]) + tmp_qloop_118 + tmp_qloop_119 + tmp_qloop_123; + const real_t tmp_qloop_127 = tmp_qloop_104 - _data_q_p_0[q]; + const real_t tmp_qloop_128 = tmp_qloop_106 - _data_q_p_1[q]; + const real_t tmp_qloop_129 = tmp_qloop_108 - _data_q_p_2[q]; + const real_t tmp_qloop_130 = tmp_qloop_107*-4.0 - tmp_qloop_110 - tmp_qloop_113 + tmp_qloop_116; + const real_t tmp_qloop_131 = tmp_qloop_105*4.0; + const real_t tmp_qloop_132 = tmp_qloop_109 - tmp_qloop_110 - tmp_qloop_112 - tmp_qloop_131; + const real_t tmp_qloop_133 = tmp_qloop_103*4.0; + const real_t tmp_qloop_134 = tmp_qloop_111 - tmp_qloop_114 - tmp_qloop_133; + const real_t tmp_qloop_158 = tmp_qloop_105*tmp_qloop_111; + const real_t jac_blending_0_0 = tmp_qloop_36*tmp_qloop_38; + const real_t jac_blending_0_1 = tmp_qloop_18*tmp_qloop_44; + const real_t jac_blending_0_2 = tmp_qloop_18*tmp_qloop_50; + const real_t jac_blending_1_0 = tmp_qloop_25*tmp_qloop_52; + const real_t jac_blending_1_1 = tmp_qloop_43*tmp_qloop_55; + const real_t tmp_qloop_63 = jac_blending_0_2*jac_blending_1_1; + const real_t jac_blending_1_2 = tmp_qloop_25*tmp_qloop_50; + const real_t tmp_qloop_60 = jac_blending_0_1*jac_blending_1_2; + const real_t jac_blending_2_0 = tmp_qloop_22*tmp_qloop_52; + const real_t jac_blending_2_1 = tmp_qloop_22*tmp_qloop_44; + const real_t tmp_qloop_61 = jac_blending_1_2*jac_blending_2_1; + const real_t jac_blending_2_2 = tmp_qloop_43*tmp_qloop_58; + const real_t tmp_qloop_59 = jac_blending_1_1*jac_blending_2_2; + const real_t tmp_qloop_62 = jac_blending_0_1*jac_blending_2_2; + const real_t tmp_qloop_64 = jac_blending_0_0*tmp_qloop_59 - jac_blending_0_0*tmp_qloop_61 + jac_blending_0_2*jac_blending_1_0*jac_blending_2_1 - jac_blending_1_0*tmp_qloop_62 + jac_blending_2_0*tmp_qloop_60 - jac_blending_2_0*tmp_qloop_63; + const real_t tmp_qloop_65 = 1.0 / (tmp_qloop_64); + const real_t abs_det_jac_blending = tmp_qloop_64; + const real_t tmp_qloop_135 = abs_det_jac_affine_BLUE_UP*abs_det_jac_blending*1.0 / (rho_dof_0*tmp_qloop_115 + rho_dof_1*tmp_qloop_127 + rho_dof_2*tmp_qloop_128 + rho_dof_3*tmp_qloop_129 + rho_dof_4*tmp_qloop_110 + rho_dof_5*tmp_qloop_113 + rho_dof_6*tmp_qloop_112 + rho_dof_7*tmp_qloop_130 + rho_dof_8*tmp_qloop_132 + rho_dof_9*tmp_qloop_134)*_data_q_w[q]; + const real_t tmp_qloop_136 = tmp_qloop_135*(1.0 - _data_q_p_0[q] - _data_q_p_1[q] - _data_q_p_2[q]); + const real_t tmp_qloop_142 = tmp_qloop_135*_data_q_p_0[q]; + const real_t tmp_qloop_144 = tmp_qloop_135*_data_q_p_2[q]; + const real_t tmp_qloop_147 = tmp_qloop_135*_data_q_p_1[q]; + const real_t tmp_qloop_159 = tmp_qloop_135*tmp_qloop_158; + const real_t tmp_qloop_161 = tmp_qloop_107*tmp_qloop_135; + const real_t jac_blending_inv_0_0 = tmp_qloop_65*(tmp_qloop_59 - tmp_qloop_61); + const real_t jac_blending_inv_0_1 = tmp_qloop_65*(jac_blending_0_2*jac_blending_2_1 - tmp_qloop_62); + const real_t jac_blending_inv_0_2 = tmp_qloop_65*(tmp_qloop_60 - tmp_qloop_63); + const real_t jac_blending_inv_1_0 = tmp_qloop_65*(-jac_blending_1_0*jac_blending_2_2 + jac_blending_1_2*jac_blending_2_0); + const real_t jac_blending_inv_1_1 = tmp_qloop_65*(jac_blending_0_0*jac_blending_2_2 - jac_blending_0_2*jac_blending_2_0); + const real_t jac_blending_inv_1_2 = tmp_qloop_65*(-jac_blending_0_0*jac_blending_1_2 + jac_blending_0_2*jac_blending_1_0); + const real_t jac_blending_inv_2_0 = tmp_qloop_65*(jac_blending_1_0*jac_blending_2_1 - jac_blending_1_1*jac_blending_2_0); + const real_t tmp_qloop_126 = tmp_qloop_121*(jac_affine_inv_0_0_BLUE_UP*jac_blending_inv_0_0 + jac_affine_inv_0_1_BLUE_UP*jac_blending_inv_1_0 + jac_affine_inv_0_2_BLUE_UP*jac_blending_inv_2_0) + tmp_qloop_124*(jac_affine_inv_1_0_BLUE_UP*jac_blending_inv_0_0 + jac_affine_inv_1_1_BLUE_UP*jac_blending_inv_1_0 + jac_affine_inv_1_2_BLUE_UP*jac_blending_inv_2_0) + tmp_qloop_125*(jac_affine_inv_2_0_BLUE_UP*jac_blending_inv_0_0 + jac_affine_inv_2_1_BLUE_UP*jac_blending_inv_1_0 + jac_affine_inv_2_2_BLUE_UP*jac_blending_inv_2_0); + const real_t tmp_qloop_137 = tmp_qloop_126*tmp_qloop_136; + const real_t tmp_qloop_143 = tmp_qloop_126*tmp_qloop_142; + const real_t tmp_qloop_145 = tmp_qloop_126*tmp_qloop_144; + const real_t tmp_qloop_146 = tmp_qloop_112*tmp_qloop_145; + const real_t tmp_qloop_148 = tmp_qloop_126*tmp_qloop_147; + const real_t tmp_qloop_157 = tmp_qloop_126*tmp_qloop_135; + const real_t tmp_qloop_160 = tmp_qloop_107*tmp_qloop_157; + const real_t jac_blending_inv_2_1 = tmp_qloop_65*(-jac_blending_0_0*jac_blending_2_1 + jac_blending_0_1*jac_blending_2_0); + const real_t tmp_qloop_138 = tmp_qloop_121*(jac_affine_inv_0_0_BLUE_UP*jac_blending_inv_0_1 + jac_affine_inv_0_1_BLUE_UP*jac_blending_inv_1_1 + jac_affine_inv_0_2_BLUE_UP*jac_blending_inv_2_1) + tmp_qloop_124*(jac_affine_inv_1_0_BLUE_UP*jac_blending_inv_0_1 + jac_affine_inv_1_1_BLUE_UP*jac_blending_inv_1_1 + jac_affine_inv_1_2_BLUE_UP*jac_blending_inv_2_1) + tmp_qloop_125*(jac_affine_inv_2_0_BLUE_UP*jac_blending_inv_0_1 + jac_affine_inv_2_1_BLUE_UP*jac_blending_inv_1_1 + jac_affine_inv_2_2_BLUE_UP*jac_blending_inv_2_1); + const real_t tmp_qloop_139 = tmp_qloop_136*tmp_qloop_138; + const real_t tmp_qloop_149 = tmp_qloop_138*tmp_qloop_142; + const real_t tmp_qloop_150 = tmp_qloop_138*tmp_qloop_144; + const real_t tmp_qloop_151 = tmp_qloop_112*tmp_qloop_150; + const real_t tmp_qloop_152 = tmp_qloop_138*tmp_qloop_147; + const real_t tmp_qloop_162 = tmp_qloop_138*tmp_qloop_161; + const real_t jac_blending_inv_2_2 = tmp_qloop_65*(jac_blending_0_0*jac_blending_1_1 - jac_blending_0_1*jac_blending_1_0); + const real_t tmp_qloop_140 = tmp_qloop_121*(jac_affine_inv_0_0_BLUE_UP*jac_blending_inv_0_2 + jac_affine_inv_0_1_BLUE_UP*jac_blending_inv_1_2 + jac_affine_inv_0_2_BLUE_UP*jac_blending_inv_2_2) + tmp_qloop_124*(jac_affine_inv_1_0_BLUE_UP*jac_blending_inv_0_2 + jac_affine_inv_1_1_BLUE_UP*jac_blending_inv_1_2 + jac_affine_inv_1_2_BLUE_UP*jac_blending_inv_2_2) + tmp_qloop_125*(jac_affine_inv_2_0_BLUE_UP*jac_blending_inv_0_2 + jac_affine_inv_2_1_BLUE_UP*jac_blending_inv_1_2 + jac_affine_inv_2_2_BLUE_UP*jac_blending_inv_2_2); + const real_t tmp_qloop_141 = tmp_qloop_136*tmp_qloop_140; + const real_t tmp_qloop_153 = tmp_qloop_140*tmp_qloop_142; + const real_t tmp_qloop_154 = tmp_qloop_140*tmp_qloop_144; + const real_t tmp_qloop_155 = tmp_qloop_112*tmp_qloop_154; + const real_t tmp_qloop_156 = tmp_qloop_140*tmp_qloop_147; + const real_t tmp_qloop_163 = tmp_qloop_140*tmp_qloop_161; + const real_t hessian_blending_0_0_0 = tmp_qloop_17*tmp_qloop_37*(tmp_qloop_19*tmp_qloop_68 - tmp_qloop_28*tmp_qloop_69 + tmp_qloop_31*(tmp_qloop_33 - tmp_qloop_68) + tmp_qloop_35*tmp_qloop_69)*1.0 - tmp_qloop_18*tmp_qloop_67; + const real_t hessian_blending_1_0_0 = tmp_qloop_17*tmp_qloop_37*(tmp_qloop_19*tmp_qloop_40 + tmp_qloop_25*tmp_qloop_71 + tmp_qloop_70)*1.0 - tmp_qloop_25*tmp_qloop_67; + const real_t hessian_blending_2_0_0 = tmp_qloop_17*tmp_qloop_37*(tmp_qloop_19*tmp_qloop_72 + tmp_qloop_22*tmp_qloop_71 + tmp_qloop_73)*1.0 - tmp_qloop_22*tmp_qloop_67; + const real_t hessian_blending_0_0_1 = tmp_qloop_17*tmp_qloop_25*tmp_qloop_37*tmp_qloop_75*1.0 - tmp_qloop_18*tmp_qloop_78; + const real_t hessian_blending_1_0_1 = -tmp_qloop_29*tmp_qloop_77 + tmp_qloop_52 + tmp_qloop_80*tmp_qloop_81; + const real_t hessian_blending_2_0_1 = tmp_qloop_81*tmp_qloop_83 + tmp_qloop_84; + const real_t hessian_blending_0_0_2 = tmp_qloop_17*tmp_qloop_22*tmp_qloop_37*tmp_qloop_75*1.0 - tmp_qloop_77*tmp_qloop_85; + const real_t hessian_blending_1_0_2 = tmp_qloop_80*tmp_qloop_86 + tmp_qloop_84; + const real_t hessian_blending_2_0_2 = -tmp_qloop_30*tmp_qloop_77 + tmp_qloop_52 + tmp_qloop_83*tmp_qloop_86; + const real_t hessian_blending_0_1_0 = -tmp_qloop_19*tmp_qloop_89 + tmp_qloop_44 + tmp_qloop_87*tmp_qloop_88; + const real_t hessian_blending_1_1_0 = tmp_qloop_17*tmp_qloop_18*tmp_qloop_37*tmp_qloop_90*1.0 - tmp_qloop_18*tmp_qloop_91; + const real_t hessian_blending_2_1_0 = tmp_qloop_88*tmp_qloop_92 - tmp_qloop_93; + const real_t hessian_blending_0_1_1 = tmp_qloop_17*tmp_qloop_37*(tmp_qloop_29*tmp_qloop_68 + tmp_qloop_54*tmp_qloop_69 + tmp_qloop_94)*1.0 - tmp_qloop_18*tmp_qloop_95; + const real_t hessian_blending_1_1_1 = tmp_qloop_17*tmp_qloop_37*(tmp_qloop_29*tmp_qloop_40 + tmp_qloop_31*(-tmp_qloop_40 + tmp_qloop_53) + tmp_qloop_47*tmp_qloop_79 + tmp_qloop_54*tmp_qloop_79)*1.0 - tmp_qloop_25*tmp_qloop_95; + const real_t hessian_blending_2_1_1 = tmp_qloop_17*tmp_qloop_37*(tmp_qloop_29*tmp_qloop_72 + tmp_qloop_54*tmp_qloop_82 + tmp_qloop_73)*1.0 - tmp_qloop_22*tmp_qloop_95; + const real_t hessian_blending_0_1_2 = tmp_qloop_17*tmp_qloop_22*tmp_qloop_37*tmp_qloop_87*1.0 - tmp_qloop_93; + const real_t hessian_blending_1_1_2 = tmp_qloop_17*tmp_qloop_22*tmp_qloop_37*tmp_qloop_90*1.0 - tmp_qloop_22*tmp_qloop_91; + const real_t hessian_blending_2_1_2 = -tmp_qloop_30*tmp_qloop_89 + tmp_qloop_44 + tmp_qloop_86*tmp_qloop_92; + const real_t hessian_blending_0_2_0 = -tmp_qloop_19*tmp_qloop_97 + tmp_qloop_50 + tmp_qloop_88*tmp_qloop_96; + const real_t hessian_blending_1_2_0 = -tmp_qloop_100 + tmp_qloop_17*tmp_qloop_18*tmp_qloop_37*tmp_qloop_98*1.0; + const real_t hessian_blending_2_2_0 = tmp_qloop_101*tmp_qloop_17*tmp_qloop_18*tmp_qloop_37*1.0 - tmp_qloop_85*tmp_qloop_97; + const real_t hessian_blending_0_2_1 = -tmp_qloop_100 + tmp_qloop_17*tmp_qloop_25*tmp_qloop_37*tmp_qloop_96*1.0; + const real_t hessian_blending_1_2_1 = -tmp_qloop_29*tmp_qloop_97 + tmp_qloop_50 + tmp_qloop_81*tmp_qloop_98; + const real_t hessian_blending_2_2_1 = tmp_qloop_101*tmp_qloop_17*tmp_qloop_25*tmp_qloop_37*1.0 - tmp_qloop_22*tmp_qloop_99; + const real_t hessian_blending_0_2_2 = -tmp_qloop_102*tmp_qloop_18 + tmp_qloop_17*tmp_qloop_37*(tmp_qloop_30*tmp_qloop_68 + tmp_qloop_57*tmp_qloop_69 + tmp_qloop_94)*1.0; + const real_t hessian_blending_1_2_2 = -tmp_qloop_102*tmp_qloop_25 + tmp_qloop_17*tmp_qloop_37*(tmp_qloop_30*tmp_qloop_40 + tmp_qloop_57*tmp_qloop_79 + tmp_qloop_70)*1.0; + const real_t hessian_blending_2_2_2 = -tmp_qloop_102*tmp_qloop_22 + tmp_qloop_17*tmp_qloop_37*(tmp_qloop_30*tmp_qloop_72 + tmp_qloop_31*(tmp_qloop_46 - tmp_qloop_72) + tmp_qloop_48*2.0 + tmp_qloop_57*tmp_qloop_82)*1.0; + const real_t q_tmp_0_0 = tmp_qloop_115*tmp_qloop_137; + const real_t q_tmp_0_1 = tmp_qloop_127*tmp_qloop_137; + const real_t q_tmp_0_2 = tmp_qloop_128*tmp_qloop_137; + const real_t q_tmp_0_3 = tmp_qloop_129*tmp_qloop_137; + const real_t q_tmp_0_4 = tmp_qloop_110*tmp_qloop_137; + const real_t q_tmp_0_5 = tmp_qloop_113*tmp_qloop_137; + const real_t q_tmp_0_6 = tmp_qloop_112*tmp_qloop_137; + const real_t q_tmp_0_7 = tmp_qloop_130*tmp_qloop_137; + const real_t q_tmp_0_8 = tmp_qloop_132*tmp_qloop_137; + const real_t q_tmp_0_9 = tmp_qloop_134*tmp_qloop_137; + const real_t q_tmp_0_10 = tmp_qloop_115*tmp_qloop_139; + const real_t q_tmp_0_11 = tmp_qloop_127*tmp_qloop_139; + const real_t q_tmp_0_12 = tmp_qloop_128*tmp_qloop_139; + const real_t q_tmp_0_13 = tmp_qloop_129*tmp_qloop_139; + const real_t q_tmp_0_14 = tmp_qloop_110*tmp_qloop_139; + const real_t q_tmp_0_15 = tmp_qloop_113*tmp_qloop_139; + const real_t q_tmp_0_16 = tmp_qloop_112*tmp_qloop_139; + const real_t q_tmp_0_17 = tmp_qloop_130*tmp_qloop_139; + const real_t q_tmp_0_18 = tmp_qloop_132*tmp_qloop_139; + const real_t q_tmp_0_19 = tmp_qloop_134*tmp_qloop_139; + const real_t q_tmp_0_20 = tmp_qloop_115*tmp_qloop_141; + const real_t q_tmp_0_21 = tmp_qloop_127*tmp_qloop_141; + const real_t q_tmp_0_22 = tmp_qloop_128*tmp_qloop_141; + const real_t q_tmp_0_23 = tmp_qloop_129*tmp_qloop_141; + const real_t q_tmp_0_24 = tmp_qloop_110*tmp_qloop_141; + const real_t q_tmp_0_25 = tmp_qloop_113*tmp_qloop_141; + const real_t q_tmp_0_26 = tmp_qloop_112*tmp_qloop_141; + const real_t q_tmp_0_27 = tmp_qloop_130*tmp_qloop_141; + const real_t q_tmp_0_28 = tmp_qloop_132*tmp_qloop_141; + const real_t q_tmp_0_29 = tmp_qloop_134*tmp_qloop_141; + const real_t q_tmp_1_0 = tmp_qloop_115*tmp_qloop_143; + const real_t q_tmp_1_1 = tmp_qloop_127*tmp_qloop_143; + const real_t q_tmp_1_2 = tmp_qloop_128*tmp_qloop_143; + const real_t q_tmp_1_3 = tmp_qloop_129*tmp_qloop_143; + const real_t q_tmp_1_4 = tmp_qloop_146; + const real_t q_tmp_1_5 = tmp_qloop_133*tmp_qloop_145; + const real_t q_tmp_1_6 = tmp_qloop_133*tmp_qloop_148; + const real_t q_tmp_1_7 = tmp_qloop_130*tmp_qloop_143; + const real_t q_tmp_1_8 = tmp_qloop_132*tmp_qloop_143; + const real_t q_tmp_1_9 = tmp_qloop_134*tmp_qloop_143; + const real_t q_tmp_1_10 = tmp_qloop_115*tmp_qloop_149; + const real_t q_tmp_1_11 = tmp_qloop_127*tmp_qloop_149; + const real_t q_tmp_1_12 = tmp_qloop_128*tmp_qloop_149; + const real_t q_tmp_1_13 = tmp_qloop_129*tmp_qloop_149; + const real_t q_tmp_1_14 = tmp_qloop_151; + const real_t q_tmp_1_15 = tmp_qloop_133*tmp_qloop_150; + const real_t q_tmp_1_16 = tmp_qloop_133*tmp_qloop_152; + const real_t q_tmp_1_17 = tmp_qloop_130*tmp_qloop_149; + const real_t q_tmp_1_18 = tmp_qloop_132*tmp_qloop_149; + const real_t q_tmp_1_19 = tmp_qloop_134*tmp_qloop_149; + const real_t q_tmp_1_20 = tmp_qloop_115*tmp_qloop_153; + const real_t q_tmp_1_21 = tmp_qloop_127*tmp_qloop_153; + const real_t q_tmp_1_22 = tmp_qloop_128*tmp_qloop_153; + const real_t q_tmp_1_23 = tmp_qloop_129*tmp_qloop_153; + const real_t q_tmp_1_24 = tmp_qloop_155; + const real_t q_tmp_1_25 = tmp_qloop_133*tmp_qloop_154; + const real_t q_tmp_1_26 = tmp_qloop_133*tmp_qloop_156; + const real_t q_tmp_1_27 = tmp_qloop_130*tmp_qloop_153; + const real_t q_tmp_1_28 = tmp_qloop_132*tmp_qloop_153; + const real_t q_tmp_1_29 = tmp_qloop_134*tmp_qloop_153; + const real_t q_tmp_2_0 = tmp_qloop_115*tmp_qloop_148; + const real_t q_tmp_2_1 = tmp_qloop_127*tmp_qloop_148; + const real_t q_tmp_2_2 = tmp_qloop_128*tmp_qloop_148; + const real_t q_tmp_2_3 = tmp_qloop_129*tmp_qloop_148; + const real_t q_tmp_2_4 = tmp_qloop_131*tmp_qloop_145; + const real_t q_tmp_2_5 = tmp_qloop_146; + const real_t q_tmp_2_6 = tmp_qloop_157*tmp_qloop_158; + const real_t q_tmp_2_7 = tmp_qloop_130*tmp_qloop_148; + const real_t q_tmp_2_8 = tmp_qloop_132*tmp_qloop_148; + const real_t q_tmp_2_9 = tmp_qloop_134*tmp_qloop_148; + const real_t q_tmp_2_10 = tmp_qloop_115*tmp_qloop_152; + const real_t q_tmp_2_11 = tmp_qloop_127*tmp_qloop_152; + const real_t q_tmp_2_12 = tmp_qloop_128*tmp_qloop_152; + const real_t q_tmp_2_13 = tmp_qloop_129*tmp_qloop_152; + const real_t q_tmp_2_14 = tmp_qloop_131*tmp_qloop_150; + const real_t q_tmp_2_15 = tmp_qloop_151; + const real_t q_tmp_2_16 = tmp_qloop_138*tmp_qloop_159; + const real_t q_tmp_2_17 = tmp_qloop_130*tmp_qloop_152; + const real_t q_tmp_2_18 = tmp_qloop_132*tmp_qloop_152; + const real_t q_tmp_2_19 = tmp_qloop_134*tmp_qloop_152; + const real_t q_tmp_2_20 = tmp_qloop_115*tmp_qloop_156; + const real_t q_tmp_2_21 = tmp_qloop_127*tmp_qloop_156; + const real_t q_tmp_2_22 = tmp_qloop_128*tmp_qloop_156; + const real_t q_tmp_2_23 = tmp_qloop_129*tmp_qloop_156; + const real_t q_tmp_2_24 = tmp_qloop_131*tmp_qloop_154; + const real_t q_tmp_2_25 = tmp_qloop_155; + const real_t q_tmp_2_26 = tmp_qloop_140*tmp_qloop_159; + const real_t q_tmp_2_27 = tmp_qloop_130*tmp_qloop_156; + const real_t q_tmp_2_28 = tmp_qloop_132*tmp_qloop_156; + const real_t q_tmp_2_29 = tmp_qloop_134*tmp_qloop_156; + const real_t q_tmp_3_0 = tmp_qloop_115*tmp_qloop_145; + const real_t q_tmp_3_1 = tmp_qloop_127*tmp_qloop_145; + const real_t q_tmp_3_2 = tmp_qloop_128*tmp_qloop_145; + const real_t q_tmp_3_3 = tmp_qloop_129*tmp_qloop_145; + const real_t q_tmp_3_4 = tmp_qloop_109*tmp_qloop_160; + const real_t q_tmp_3_5 = tmp_qloop_111*tmp_qloop_160; + const real_t q_tmp_3_6 = tmp_qloop_146; + const real_t q_tmp_3_7 = tmp_qloop_130*tmp_qloop_145; + const real_t q_tmp_3_8 = tmp_qloop_132*tmp_qloop_145; + const real_t q_tmp_3_9 = tmp_qloop_134*tmp_qloop_145; + const real_t q_tmp_3_10 = tmp_qloop_115*tmp_qloop_150; + const real_t q_tmp_3_11 = tmp_qloop_127*tmp_qloop_150; + const real_t q_tmp_3_12 = tmp_qloop_128*tmp_qloop_150; + const real_t q_tmp_3_13 = tmp_qloop_129*tmp_qloop_150; + const real_t q_tmp_3_14 = tmp_qloop_109*tmp_qloop_162; + const real_t q_tmp_3_15 = tmp_qloop_111*tmp_qloop_162; + const real_t q_tmp_3_16 = tmp_qloop_151; + const real_t q_tmp_3_17 = tmp_qloop_130*tmp_qloop_150; + const real_t q_tmp_3_18 = tmp_qloop_132*tmp_qloop_150; + const real_t q_tmp_3_19 = tmp_qloop_134*tmp_qloop_150; + const real_t q_tmp_3_20 = tmp_qloop_115*tmp_qloop_154; + const real_t q_tmp_3_21 = tmp_qloop_127*tmp_qloop_154; + const real_t q_tmp_3_22 = tmp_qloop_128*tmp_qloop_154; + const real_t q_tmp_3_23 = tmp_qloop_129*tmp_qloop_154; + const real_t q_tmp_3_24 = tmp_qloop_109*tmp_qloop_163; + const real_t q_tmp_3_25 = tmp_qloop_111*tmp_qloop_163; + const real_t q_tmp_3_26 = tmp_qloop_155; + const real_t q_tmp_3_27 = tmp_qloop_130*tmp_qloop_154; + const real_t q_tmp_3_28 = tmp_qloop_132*tmp_qloop_154; + const real_t q_tmp_3_29 = tmp_qloop_134*tmp_qloop_154; + q_acc_0_0 = q_acc_0_0 + q_tmp_0_0; + q_acc_0_1 = q_acc_0_1 + q_tmp_0_1; + q_acc_0_2 = q_acc_0_2 + q_tmp_0_2; + q_acc_0_3 = q_acc_0_3 + q_tmp_0_3; + q_acc_0_4 = q_acc_0_4 + q_tmp_0_4; + q_acc_0_5 = q_acc_0_5 + q_tmp_0_5; + q_acc_0_6 = q_acc_0_6 + q_tmp_0_6; + q_acc_0_7 = q_acc_0_7 + q_tmp_0_7; + q_acc_0_8 = q_acc_0_8 + q_tmp_0_8; + q_acc_0_9 = q_acc_0_9 + q_tmp_0_9; + q_acc_0_10 = q_acc_0_10 + q_tmp_0_10; + q_acc_0_11 = q_acc_0_11 + q_tmp_0_11; + q_acc_0_12 = q_acc_0_12 + q_tmp_0_12; + q_acc_0_13 = q_acc_0_13 + q_tmp_0_13; + q_acc_0_14 = q_acc_0_14 + q_tmp_0_14; + q_acc_0_15 = q_acc_0_15 + q_tmp_0_15; + q_acc_0_16 = q_acc_0_16 + q_tmp_0_16; + q_acc_0_17 = q_acc_0_17 + q_tmp_0_17; + q_acc_0_18 = q_acc_0_18 + q_tmp_0_18; + q_acc_0_19 = q_acc_0_19 + q_tmp_0_19; + q_acc_0_20 = q_acc_0_20 + q_tmp_0_20; + q_acc_0_21 = q_acc_0_21 + q_tmp_0_21; + q_acc_0_22 = q_acc_0_22 + q_tmp_0_22; + q_acc_0_23 = q_acc_0_23 + q_tmp_0_23; + q_acc_0_24 = q_acc_0_24 + q_tmp_0_24; + q_acc_0_25 = q_acc_0_25 + q_tmp_0_25; + q_acc_0_26 = q_acc_0_26 + q_tmp_0_26; + q_acc_0_27 = q_acc_0_27 + q_tmp_0_27; + q_acc_0_28 = q_acc_0_28 + q_tmp_0_28; + q_acc_0_29 = q_acc_0_29 + q_tmp_0_29; + q_acc_1_0 = q_acc_1_0 + q_tmp_1_0; + q_acc_1_1 = q_acc_1_1 + q_tmp_1_1; + q_acc_1_2 = q_acc_1_2 + q_tmp_1_2; + q_acc_1_3 = q_acc_1_3 + q_tmp_1_3; + q_acc_1_4 = q_acc_1_4 + q_tmp_1_4; + q_acc_1_5 = q_acc_1_5 + q_tmp_1_5; + q_acc_1_6 = q_acc_1_6 + q_tmp_1_6; + q_acc_1_7 = q_acc_1_7 + q_tmp_1_7; + q_acc_1_8 = q_acc_1_8 + q_tmp_1_8; + q_acc_1_9 = q_acc_1_9 + q_tmp_1_9; + q_acc_1_10 = q_acc_1_10 + q_tmp_1_10; + q_acc_1_11 = q_acc_1_11 + q_tmp_1_11; + q_acc_1_12 = q_acc_1_12 + q_tmp_1_12; + q_acc_1_13 = q_acc_1_13 + q_tmp_1_13; + q_acc_1_14 = q_acc_1_14 + q_tmp_1_14; + q_acc_1_15 = q_acc_1_15 + q_tmp_1_15; + q_acc_1_16 = q_acc_1_16 + q_tmp_1_16; + q_acc_1_17 = q_acc_1_17 + q_tmp_1_17; + q_acc_1_18 = q_acc_1_18 + q_tmp_1_18; + q_acc_1_19 = q_acc_1_19 + q_tmp_1_19; + q_acc_1_20 = q_acc_1_20 + q_tmp_1_20; + q_acc_1_21 = q_acc_1_21 + q_tmp_1_21; + q_acc_1_22 = q_acc_1_22 + q_tmp_1_22; + q_acc_1_23 = q_acc_1_23 + q_tmp_1_23; + q_acc_1_24 = q_acc_1_24 + q_tmp_1_24; + q_acc_1_25 = q_acc_1_25 + q_tmp_1_25; + q_acc_1_26 = q_acc_1_26 + q_tmp_1_26; + q_acc_1_27 = q_acc_1_27 + q_tmp_1_27; + q_acc_1_28 = q_acc_1_28 + q_tmp_1_28; + q_acc_1_29 = q_acc_1_29 + q_tmp_1_29; + q_acc_2_0 = q_acc_2_0 + q_tmp_2_0; + q_acc_2_1 = q_acc_2_1 + q_tmp_2_1; + q_acc_2_2 = q_acc_2_2 + q_tmp_2_2; + q_acc_2_3 = q_acc_2_3 + q_tmp_2_3; + q_acc_2_4 = q_acc_2_4 + q_tmp_2_4; + q_acc_2_5 = q_acc_2_5 + q_tmp_2_5; + q_acc_2_6 = q_acc_2_6 + q_tmp_2_6; + q_acc_2_7 = q_acc_2_7 + q_tmp_2_7; + q_acc_2_8 = q_acc_2_8 + q_tmp_2_8; + q_acc_2_9 = q_acc_2_9 + q_tmp_2_9; + q_acc_2_10 = q_acc_2_10 + q_tmp_2_10; + q_acc_2_11 = q_acc_2_11 + q_tmp_2_11; + q_acc_2_12 = q_acc_2_12 + q_tmp_2_12; + q_acc_2_13 = q_acc_2_13 + q_tmp_2_13; + q_acc_2_14 = q_acc_2_14 + q_tmp_2_14; + q_acc_2_15 = q_acc_2_15 + q_tmp_2_15; + q_acc_2_16 = q_acc_2_16 + q_tmp_2_16; + q_acc_2_17 = q_acc_2_17 + q_tmp_2_17; + q_acc_2_18 = q_acc_2_18 + q_tmp_2_18; + q_acc_2_19 = q_acc_2_19 + q_tmp_2_19; + q_acc_2_20 = q_acc_2_20 + q_tmp_2_20; + q_acc_2_21 = q_acc_2_21 + q_tmp_2_21; + q_acc_2_22 = q_acc_2_22 + q_tmp_2_22; + q_acc_2_23 = q_acc_2_23 + q_tmp_2_23; + q_acc_2_24 = q_acc_2_24 + q_tmp_2_24; + q_acc_2_25 = q_acc_2_25 + q_tmp_2_25; + q_acc_2_26 = q_acc_2_26 + q_tmp_2_26; + q_acc_2_27 = q_acc_2_27 + q_tmp_2_27; + q_acc_2_28 = q_acc_2_28 + q_tmp_2_28; + q_acc_2_29 = q_acc_2_29 + q_tmp_2_29; + q_acc_3_0 = q_acc_3_0 + q_tmp_3_0; + q_acc_3_1 = q_acc_3_1 + q_tmp_3_1; + q_acc_3_2 = q_acc_3_2 + q_tmp_3_2; + q_acc_3_3 = q_acc_3_3 + q_tmp_3_3; + q_acc_3_4 = q_acc_3_4 + q_tmp_3_4; + q_acc_3_5 = q_acc_3_5 + q_tmp_3_5; + q_acc_3_6 = q_acc_3_6 + q_tmp_3_6; + q_acc_3_7 = q_acc_3_7 + q_tmp_3_7; + q_acc_3_8 = q_acc_3_8 + q_tmp_3_8; + q_acc_3_9 = q_acc_3_9 + q_tmp_3_9; + q_acc_3_10 = q_acc_3_10 + q_tmp_3_10; + q_acc_3_11 = q_acc_3_11 + q_tmp_3_11; + q_acc_3_12 = q_acc_3_12 + q_tmp_3_12; + q_acc_3_13 = q_acc_3_13 + q_tmp_3_13; + q_acc_3_14 = q_acc_3_14 + q_tmp_3_14; + q_acc_3_15 = q_acc_3_15 + q_tmp_3_15; + q_acc_3_16 = q_acc_3_16 + q_tmp_3_16; + q_acc_3_17 = q_acc_3_17 + q_tmp_3_17; + q_acc_3_18 = q_acc_3_18 + q_tmp_3_18; + q_acc_3_19 = q_acc_3_19 + q_tmp_3_19; + q_acc_3_20 = q_acc_3_20 + q_tmp_3_20; + q_acc_3_21 = q_acc_3_21 + q_tmp_3_21; + q_acc_3_22 = q_acc_3_22 + q_tmp_3_22; + q_acc_3_23 = q_acc_3_23 + q_tmp_3_23; + q_acc_3_24 = q_acc_3_24 + q_tmp_3_24; + q_acc_3_25 = q_acc_3_25 + q_tmp_3_25; + q_acc_3_26 = q_acc_3_26 + q_tmp_3_26; + q_acc_3_27 = q_acc_3_27 + q_tmp_3_27; + q_acc_3_28 = q_acc_3_28 + q_tmp_3_28; + q_acc_3_29 = q_acc_3_29 + q_tmp_3_29; + } + const real_t elMatVec_0 = q_acc_0_0*src_dof_0 + q_acc_0_1*src_dof_1 + q_acc_0_10*src_dof_10 + q_acc_0_11*src_dof_11 + q_acc_0_12*src_dof_12 + q_acc_0_13*src_dof_13 + q_acc_0_14*src_dof_14 + q_acc_0_15*src_dof_15 + q_acc_0_16*src_dof_16 + q_acc_0_17*src_dof_17 + q_acc_0_18*src_dof_18 + q_acc_0_19*src_dof_19 + q_acc_0_2*src_dof_2 + q_acc_0_20*src_dof_20 + q_acc_0_21*src_dof_21 + q_acc_0_22*src_dof_22 + q_acc_0_23*src_dof_23 + q_acc_0_24*src_dof_24 + q_acc_0_25*src_dof_25 + q_acc_0_26*src_dof_26 + q_acc_0_27*src_dof_27 + q_acc_0_28*src_dof_28 + q_acc_0_29*src_dof_29 + q_acc_0_3*src_dof_3 + q_acc_0_4*src_dof_4 + q_acc_0_5*src_dof_5 + q_acc_0_6*src_dof_6 + q_acc_0_7*src_dof_7 + q_acc_0_8*src_dof_8 + q_acc_0_9*src_dof_9; + const real_t elMatVec_1 = q_acc_1_0*src_dof_0 + q_acc_1_1*src_dof_1 + q_acc_1_10*src_dof_10 + q_acc_1_11*src_dof_11 + q_acc_1_12*src_dof_12 + q_acc_1_13*src_dof_13 + q_acc_1_14*src_dof_14 + q_acc_1_15*src_dof_15 + q_acc_1_16*src_dof_16 + q_acc_1_17*src_dof_17 + q_acc_1_18*src_dof_18 + q_acc_1_19*src_dof_19 + q_acc_1_2*src_dof_2 + q_acc_1_20*src_dof_20 + q_acc_1_21*src_dof_21 + q_acc_1_22*src_dof_22 + q_acc_1_23*src_dof_23 + q_acc_1_24*src_dof_24 + q_acc_1_25*src_dof_25 + q_acc_1_26*src_dof_26 + q_acc_1_27*src_dof_27 + q_acc_1_28*src_dof_28 + q_acc_1_29*src_dof_29 + q_acc_1_3*src_dof_3 + q_acc_1_4*src_dof_4 + q_acc_1_5*src_dof_5 + q_acc_1_6*src_dof_6 + q_acc_1_7*src_dof_7 + q_acc_1_8*src_dof_8 + q_acc_1_9*src_dof_9; + const real_t elMatVec_2 = q_acc_2_0*src_dof_0 + q_acc_2_1*src_dof_1 + q_acc_2_10*src_dof_10 + q_acc_2_11*src_dof_11 + q_acc_2_12*src_dof_12 + q_acc_2_13*src_dof_13 + q_acc_2_14*src_dof_14 + q_acc_2_15*src_dof_15 + q_acc_2_16*src_dof_16 + q_acc_2_17*src_dof_17 + q_acc_2_18*src_dof_18 + q_acc_2_19*src_dof_19 + q_acc_2_2*src_dof_2 + q_acc_2_20*src_dof_20 + q_acc_2_21*src_dof_21 + q_acc_2_22*src_dof_22 + q_acc_2_23*src_dof_23 + q_acc_2_24*src_dof_24 + q_acc_2_25*src_dof_25 + q_acc_2_26*src_dof_26 + q_acc_2_27*src_dof_27 + q_acc_2_28*src_dof_28 + q_acc_2_29*src_dof_29 + q_acc_2_3*src_dof_3 + q_acc_2_4*src_dof_4 + q_acc_2_5*src_dof_5 + q_acc_2_6*src_dof_6 + q_acc_2_7*src_dof_7 + q_acc_2_8*src_dof_8 + q_acc_2_9*src_dof_9; + const real_t elMatVec_3 = q_acc_3_0*src_dof_0 + q_acc_3_1*src_dof_1 + q_acc_3_10*src_dof_10 + q_acc_3_11*src_dof_11 + q_acc_3_12*src_dof_12 + q_acc_3_13*src_dof_13 + q_acc_3_14*src_dof_14 + q_acc_3_15*src_dof_15 + q_acc_3_16*src_dof_16 + q_acc_3_17*src_dof_17 + q_acc_3_18*src_dof_18 + q_acc_3_19*src_dof_19 + q_acc_3_2*src_dof_2 + q_acc_3_20*src_dof_20 + q_acc_3_21*src_dof_21 + q_acc_3_22*src_dof_22 + q_acc_3_23*src_dof_23 + q_acc_3_24*src_dof_24 + q_acc_3_25*src_dof_25 + q_acc_3_26*src_dof_26 + q_acc_3_27*src_dof_27 + q_acc_3_28*src_dof_28 + q_acc_3_29*src_dof_29 + q_acc_3_3*src_dof_3 + q_acc_3_4*src_dof_4 + q_acc_3_5*src_dof_5 + q_acc_3_6*src_dof_6 + q_acc_3_7*src_dof_7 + q_acc_3_8*src_dof_8 + q_acc_3_9*src_dof_9; + _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1] = elMatVec_0 + _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]; + _data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))] = elMatVec_1 + _data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]; + _data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1] = elMatVec_2 + _data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]; + _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1] = elMatVec_3 + _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]; + } + } + } + const real_t tmp_coords_jac_0_BLUE_DOWN = 1.0 / (micro_edges_per_macro_edge_float)*1.0; + const real_t tmp_coords_jac_1_BLUE_DOWN = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0); + const real_t tmp_coords_jac_2_BLUE_DOWN = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1); + const real_t tmp_coords_jac_3_BLUE_DOWN = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2); + const real_t tmp_coords_jac_4_BLUE_DOWN = tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0); + const real_t tmp_coords_jac_5_BLUE_DOWN = macro_vertex_coord_id_0comp0 + tmp_coords_jac_4_BLUE_DOWN; + const real_t tmp_coords_jac_6_BLUE_DOWN = tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1); + const real_t tmp_coords_jac_7_BLUE_DOWN = macro_vertex_coord_id_0comp1 + tmp_coords_jac_6_BLUE_DOWN; + const real_t tmp_coords_jac_8_BLUE_DOWN = tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2); + const real_t tmp_coords_jac_9_BLUE_DOWN = macro_vertex_coord_id_0comp2 + tmp_coords_jac_8_BLUE_DOWN; + const real_t p_affine_const_0_0_BLUE_DOWN = tmp_coords_jac_1_BLUE_DOWN; + const real_t p_affine_const_0_1_BLUE_DOWN = tmp_coords_jac_2_BLUE_DOWN; + const real_t p_affine_const_0_2_BLUE_DOWN = tmp_coords_jac_3_BLUE_DOWN; + const real_t p_affine_const_1_0_BLUE_DOWN = tmp_coords_jac_5_BLUE_DOWN; + const real_t p_affine_const_1_1_BLUE_DOWN = tmp_coords_jac_7_BLUE_DOWN; + const real_t p_affine_const_1_2_BLUE_DOWN = tmp_coords_jac_9_BLUE_DOWN; + const real_t p_affine_const_2_0_BLUE_DOWN = tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0) + tmp_coords_jac_5_BLUE_DOWN; + const real_t p_affine_const_2_1_BLUE_DOWN = tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1) + tmp_coords_jac_7_BLUE_DOWN; + const real_t p_affine_const_2_2_BLUE_DOWN = tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2) + tmp_coords_jac_9_BLUE_DOWN; + const real_t p_affine_const_3_0_BLUE_DOWN = tmp_coords_jac_1_BLUE_DOWN + tmp_coords_jac_4_BLUE_DOWN; + const real_t p_affine_const_3_1_BLUE_DOWN = tmp_coords_jac_2_BLUE_DOWN + tmp_coords_jac_6_BLUE_DOWN; + const real_t p_affine_const_3_2_BLUE_DOWN = tmp_coords_jac_3_BLUE_DOWN + tmp_coords_jac_8_BLUE_DOWN; + const real_t jac_affine_0_0_BLUE_DOWN = -p_affine_const_0_0_BLUE_DOWN + p_affine_const_1_0_BLUE_DOWN; + const real_t jac_affine_0_1_BLUE_DOWN = -p_affine_const_0_0_BLUE_DOWN + p_affine_const_2_0_BLUE_DOWN; + const real_t jac_affine_0_2_BLUE_DOWN = -p_affine_const_0_0_BLUE_DOWN + p_affine_const_3_0_BLUE_DOWN; + const real_t jac_affine_1_0_BLUE_DOWN = -p_affine_const_0_1_BLUE_DOWN + p_affine_const_1_1_BLUE_DOWN; + const real_t jac_affine_1_1_BLUE_DOWN = -p_affine_const_0_1_BLUE_DOWN + p_affine_const_2_1_BLUE_DOWN; + const real_t tmp_coords_jac_14_BLUE_DOWN = jac_affine_0_2_BLUE_DOWN*jac_affine_1_1_BLUE_DOWN; + const real_t jac_affine_1_2_BLUE_DOWN = -p_affine_const_0_1_BLUE_DOWN + p_affine_const_3_1_BLUE_DOWN; + const real_t tmp_coords_jac_12_BLUE_DOWN = jac_affine_0_1_BLUE_DOWN*jac_affine_1_2_BLUE_DOWN; + const real_t jac_affine_2_0_BLUE_DOWN = -p_affine_const_0_2_BLUE_DOWN + p_affine_const_1_2_BLUE_DOWN; + const real_t jac_affine_2_1_BLUE_DOWN = -p_affine_const_0_2_BLUE_DOWN + p_affine_const_2_2_BLUE_DOWN; + const real_t tmp_coords_jac_11_BLUE_DOWN = jac_affine_1_2_BLUE_DOWN*jac_affine_2_1_BLUE_DOWN; + const real_t jac_affine_2_2_BLUE_DOWN = -p_affine_const_0_2_BLUE_DOWN + p_affine_const_3_2_BLUE_DOWN; + const real_t tmp_coords_jac_10_BLUE_DOWN = jac_affine_1_1_BLUE_DOWN*jac_affine_2_2_BLUE_DOWN; + const real_t tmp_coords_jac_13_BLUE_DOWN = jac_affine_0_1_BLUE_DOWN*jac_affine_2_2_BLUE_DOWN; + const real_t tmp_coords_jac_15_BLUE_DOWN = jac_affine_0_0_BLUE_DOWN*tmp_coords_jac_10_BLUE_DOWN - jac_affine_0_0_BLUE_DOWN*tmp_coords_jac_11_BLUE_DOWN + jac_affine_0_2_BLUE_DOWN*jac_affine_1_0_BLUE_DOWN*jac_affine_2_1_BLUE_DOWN - jac_affine_1_0_BLUE_DOWN*tmp_coords_jac_13_BLUE_DOWN + jac_affine_2_0_BLUE_DOWN*tmp_coords_jac_12_BLUE_DOWN - jac_affine_2_0_BLUE_DOWN*tmp_coords_jac_14_BLUE_DOWN; + const real_t tmp_coords_jac_16_BLUE_DOWN = 1.0 / (tmp_coords_jac_15_BLUE_DOWN); + const real_t jac_affine_inv_0_0_BLUE_DOWN = tmp_coords_jac_16_BLUE_DOWN*(tmp_coords_jac_10_BLUE_DOWN - tmp_coords_jac_11_BLUE_DOWN); + const real_t jac_affine_inv_0_1_BLUE_DOWN = tmp_coords_jac_16_BLUE_DOWN*(jac_affine_0_2_BLUE_DOWN*jac_affine_2_1_BLUE_DOWN - tmp_coords_jac_13_BLUE_DOWN); + const real_t jac_affine_inv_0_2_BLUE_DOWN = tmp_coords_jac_16_BLUE_DOWN*(tmp_coords_jac_12_BLUE_DOWN - tmp_coords_jac_14_BLUE_DOWN); + const real_t jac_affine_inv_1_0_BLUE_DOWN = tmp_coords_jac_16_BLUE_DOWN*(-jac_affine_1_0_BLUE_DOWN*jac_affine_2_2_BLUE_DOWN + jac_affine_1_2_BLUE_DOWN*jac_affine_2_0_BLUE_DOWN); + const real_t jac_affine_inv_1_1_BLUE_DOWN = tmp_coords_jac_16_BLUE_DOWN*(jac_affine_0_0_BLUE_DOWN*jac_affine_2_2_BLUE_DOWN - jac_affine_0_2_BLUE_DOWN*jac_affine_2_0_BLUE_DOWN); + const real_t jac_affine_inv_1_2_BLUE_DOWN = tmp_coords_jac_16_BLUE_DOWN*(-jac_affine_0_0_BLUE_DOWN*jac_affine_1_2_BLUE_DOWN + jac_affine_0_2_BLUE_DOWN*jac_affine_1_0_BLUE_DOWN); + const real_t jac_affine_inv_2_0_BLUE_DOWN = tmp_coords_jac_16_BLUE_DOWN*(jac_affine_1_0_BLUE_DOWN*jac_affine_2_1_BLUE_DOWN - jac_affine_1_1_BLUE_DOWN*jac_affine_2_0_BLUE_DOWN); + const real_t jac_affine_inv_2_1_BLUE_DOWN = tmp_coords_jac_16_BLUE_DOWN*(-jac_affine_0_0_BLUE_DOWN*jac_affine_2_1_BLUE_DOWN + jac_affine_0_1_BLUE_DOWN*jac_affine_2_0_BLUE_DOWN); + const real_t jac_affine_inv_2_2_BLUE_DOWN = tmp_coords_jac_16_BLUE_DOWN*(jac_affine_0_0_BLUE_DOWN*jac_affine_1_1_BLUE_DOWN - jac_affine_0_1_BLUE_DOWN*jac_affine_1_0_BLUE_DOWN); + const real_t abs_det_jac_affine_BLUE_DOWN = abs(tmp_coords_jac_15_BLUE_DOWN); + { + /* CellType.BLUE_DOWN */ + for (int64_t ctr_2 = 0; ctr_2 < micro_edges_per_macro_edge; ctr_2 += 1) + for (int64_t ctr_1 = 0; ctr_1 < -ctr_2 + micro_edges_per_macro_edge; ctr_1 += 1) + { + for (int64_t ctr_0 = 0; ctr_0 < (int64_t)((-ctr_1 - ctr_2 + micro_edges_per_macro_edge - 1) / (4)) * (4); ctr_0 += 4) + { + + const int64_t phantom_ctr_0 = ctr_0; + real_t _data_float_loop_ctr_array_dim_0[4]; + _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0; + _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1; + _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2; + _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3; + real_t _data_float_loop_ctr_array_dim_1[4]; + _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1; + real_t _data_float_loop_ctr_array_dim_2[4]; + _data_float_loop_ctr_array_dim_2[0] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[1] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[2] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[3] = (real_t) ctr_2; + + const __m256d p_affine_0_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)); + const __m256d p_affine_0_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)); + const __m256d p_affine_0_2 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)); + const __m256d p_affine_1_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)); + const __m256d p_affine_1_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)); + const __m256d p_affine_1_2 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)); + const __m256d p_affine_2_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)); + const __m256d p_affine_2_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)); + const __m256d p_affine_2_2 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)); + const __m256d p_affine_3_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)); + const __m256d p_affine_3_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)); + const __m256d p_affine_3_2 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)); + const __m256d src_dof_0 = _mm256_loadu_pd(& _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]); + const __m256d src_dof_1 = _mm256_loadu_pd(& _data_src_vertex_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]); + const __m256d src_dof_2 = _mm256_loadu_pd(& _data_src_vertex_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]); + const __m256d src_dof_3 = _mm256_loadu_pd(& _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]); + const __m256d src_dof_4 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]); + const __m256d src_dof_5 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]); + const __m256d src_dof_6 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]); + const __m256d src_dof_7 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d src_dof_8 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge - 1)*(micro_edges_per_macro_edge + 1)) / (6)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]); + const __m256d src_dof_9 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d src_dof_10 = _mm256_loadu_pd(& _data_src_vertex_1[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]); + const __m256d src_dof_11 = _mm256_loadu_pd(& _data_src_vertex_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]); + const __m256d src_dof_12 = _mm256_loadu_pd(& _data_src_vertex_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]); + const __m256d src_dof_13 = _mm256_loadu_pd(& _data_src_vertex_1[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]); + const __m256d src_dof_14 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]); + const __m256d src_dof_15 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]); + const __m256d src_dof_16 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]); + const __m256d src_dof_17 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d src_dof_18 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge - 1)*(micro_edges_per_macro_edge + 1)) / (6)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]); + const __m256d src_dof_19 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d src_dof_20 = _mm256_loadu_pd(& _data_src_vertex_2[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]); + const __m256d src_dof_21 = _mm256_loadu_pd(& _data_src_vertex_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]); + const __m256d src_dof_22 = _mm256_loadu_pd(& _data_src_vertex_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]); + const __m256d src_dof_23 = _mm256_loadu_pd(& _data_src_vertex_2[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]); + const __m256d src_dof_24 = _mm256_loadu_pd(& _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]); + const __m256d src_dof_25 = _mm256_loadu_pd(& _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]); + const __m256d src_dof_26 = _mm256_loadu_pd(& _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]); + const __m256d src_dof_27 = _mm256_loadu_pd(& _data_src_edge_2[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d src_dof_28 = _mm256_loadu_pd(& _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge - 1)*(micro_edges_per_macro_edge + 1)) / (6)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]); + const __m256d src_dof_29 = _mm256_loadu_pd(& _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d rho_dof_0 = _mm256_loadu_pd(& _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]); + const __m256d rho_dof_1 = _mm256_loadu_pd(& _data_rhoVertex[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]); + const __m256d rho_dof_2 = _mm256_loadu_pd(& _data_rhoVertex[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]); + const __m256d rho_dof_3 = _mm256_loadu_pd(& _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]); + const __m256d rho_dof_4 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]); + const __m256d rho_dof_5 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]); + const __m256d rho_dof_6 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]); + const __m256d rho_dof_7 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d rho_dof_8 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge - 1)*(micro_edges_per_macro_edge + 1)) / (6)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]); + const __m256d rho_dof_9 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + __m256d q_acc_0_0 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_1 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_2 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_3 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_4 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_5 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_6 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_7 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_8 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_9 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_10 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_11 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_12 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_13 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_14 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_15 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_16 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_17 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_18 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_19 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_20 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_21 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_22 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_23 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_24 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_25 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_26 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_27 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_28 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_29 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_0 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_1 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_2 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_3 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_4 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_5 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_6 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_7 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_8 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_9 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_10 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_11 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_12 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_13 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_14 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_15 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_16 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_17 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_18 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_19 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_20 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_21 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_22 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_23 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_24 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_25 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_26 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_27 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_28 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_29 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_0 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_1 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_2 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_3 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_4 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_5 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_6 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_7 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_8 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_9 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_10 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_11 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_12 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_13 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_14 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_15 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_16 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_17 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_18 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_19 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_20 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_21 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_22 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_23 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_24 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_25 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_26 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_27 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_28 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_29 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_0 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_1 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_2 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_3 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_4 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_5 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_6 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_7 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_8 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_9 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_10 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_11 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_12 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_13 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_14 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_15 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_16 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_17 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_18 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_19 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_20 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_21 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_22 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_23 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_24 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_25 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_26 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_27 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_28 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_29 = _mm256_set_pd(0.0,0.0,0.0,0.0); + for (int64_t q = 0; q < 5; q += 1) + { + const __m256d tmp_qloop_18 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_1_0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_2_0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_3_0),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]))),p_affine_0_0); + const __m256d tmp_qloop_19 = _mm256_mul_pd(tmp_qloop_18,tmp_qloop_18); + const __m256d tmp_qloop_22 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_2,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_1_2),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_2,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_2_2),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_2,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_3_2),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]))),p_affine_0_2); + const __m256d tmp_qloop_23 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_qloop_12,tmp_qloop_12,tmp_qloop_12,tmp_qloop_12)),_mm256_mul_pd(tmp_qloop_22,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))); + const __m256d tmp_qloop_24 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_qloop_1,tmp_qloop_1,tmp_qloop_1,tmp_qloop_1)),_mm256_mul_pd(tmp_qloop_18,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))); + const __m256d tmp_qloop_25 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_1_1),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_2_1),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_3_1),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]))),p_affine_0_1); + const __m256d tmp_qloop_26 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_qloop_8,tmp_qloop_8,tmp_qloop_8,tmp_qloop_8)),_mm256_mul_pd(tmp_qloop_25,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))); + const __m256d tmp_qloop_27 = _mm256_mul_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_24,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_14,tmp_qloop_14,tmp_qloop_14,tmp_qloop_14)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_26,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_15,tmp_qloop_15,tmp_qloop_15,tmp_qloop_15))),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_23,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_7,tmp_qloop_7,tmp_qloop_7,tmp_qloop_7))),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_23,_mm256_set_pd(tmp_qloop_10,tmp_qloop_10,tmp_qloop_10,tmp_qloop_10)),_mm256_set_pd(tmp_qloop_9,tmp_qloop_9,tmp_qloop_9,tmp_qloop_9))),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_24,_mm256_set_pd(tmp_qloop_13,tmp_qloop_13,tmp_qloop_13,tmp_qloop_13)),_mm256_set_pd(tmp_qloop_6,tmp_qloop_6,tmp_qloop_6,tmp_qloop_6))),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_26,_mm256_set_pd(tmp_qloop_2,tmp_qloop_2,tmp_qloop_2,tmp_qloop_2)),_mm256_set_pd(tmp_qloop_3,tmp_qloop_3,tmp_qloop_3,tmp_qloop_3))),_mm256_set_pd(tmp_qloop_21,tmp_qloop_21,tmp_qloop_21,tmp_qloop_21)); + const __m256d tmp_qloop_28 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_27,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_20,tmp_qloop_20,tmp_qloop_20,tmp_qloop_20)); + const __m256d tmp_qloop_29 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_25); + const __m256d tmp_qloop_30 = _mm256_mul_pd(tmp_qloop_22,tmp_qloop_22); + const __m256d tmp_qloop_31 = _mm256_add_pd(_mm256_add_pd(tmp_qloop_19,tmp_qloop_29),tmp_qloop_30); + const __m256d tmp_qloop_34 = _mm256_mul_pd(tmp_qloop_18,_mm256_set_pd(tmp_qloop_33,tmp_qloop_33,tmp_qloop_33,tmp_qloop_33)); + const __m256d tmp_qloop_35 = _mm256_add_pd(tmp_qloop_28,tmp_qloop_34); + const __m256d tmp_qloop_36 = _mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_31,tmp_qloop_35),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_19,tmp_qloop_28),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17)); + const __m256d tmp_qloop_37 = _mm256_div_pd(_mm256_sqrt_pd(tmp_qloop_31),_mm256_mul_pd(tmp_qloop_31,tmp_qloop_31)); + const __m256d tmp_qloop_38 = _mm256_mul_pd(tmp_qloop_37,_mm256_set_pd(1.0,1.0,1.0,1.0)); + const __m256d tmp_qloop_41 = _mm256_mul_pd(tmp_qloop_31,_mm256_set_pd(tmp_qloop_40,tmp_qloop_40,tmp_qloop_40,tmp_qloop_40)); + const __m256d tmp_qloop_42 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_41,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_25,tmp_qloop_28),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))); + const __m256d tmp_qloop_43 = _mm256_mul_pd(tmp_qloop_38,_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17)); + const __m256d tmp_qloop_44 = _mm256_mul_pd(tmp_qloop_42,tmp_qloop_43); + const __m256d tmp_qloop_47 = _mm256_mul_pd(tmp_qloop_28,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)); + const __m256d tmp_qloop_48 = _mm256_mul_pd(tmp_qloop_22,tmp_qloop_47); + const __m256d tmp_qloop_49 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_31,_mm256_set_pd(tmp_qloop_46,tmp_qloop_46,tmp_qloop_46,tmp_qloop_46)),tmp_qloop_48); + const __m256d tmp_qloop_50 = _mm256_mul_pd(tmp_qloop_43,tmp_qloop_49); + const __m256d tmp_qloop_51 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_31,_mm256_set_pd(tmp_qloop_33,tmp_qloop_33,tmp_qloop_33,tmp_qloop_33)),_mm256_mul_pd(tmp_qloop_18,tmp_qloop_47)); + const __m256d tmp_qloop_52 = _mm256_mul_pd(tmp_qloop_43,tmp_qloop_51); + const __m256d tmp_qloop_54 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_25,_mm256_set_pd(tmp_qloop_53,tmp_qloop_53,tmp_qloop_53,tmp_qloop_53)),tmp_qloop_28); + const __m256d tmp_qloop_55 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_29,tmp_qloop_47),_mm256_mul_pd(tmp_qloop_31,tmp_qloop_54)); + const __m256d tmp_qloop_56 = _mm256_mul_pd(tmp_qloop_22,_mm256_set_pd(tmp_qloop_46,tmp_qloop_46,tmp_qloop_46,tmp_qloop_46)); + const __m256d tmp_qloop_57 = _mm256_add_pd(tmp_qloop_28,tmp_qloop_56); + const __m256d tmp_qloop_58 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_30,tmp_qloop_47),_mm256_mul_pd(tmp_qloop_31,tmp_qloop_57)); + const __m256d tmp_qloop_66 = _mm256_mul_pd(_mm256_div_pd(_mm256_sqrt_pd(tmp_qloop_31),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_31,tmp_qloop_31),tmp_qloop_31)),_mm256_set_pd(3.0,3.0,3.0,3.0)); + const __m256d tmp_qloop_67 = _mm256_mul_pd(tmp_qloop_36,tmp_qloop_66); + const __m256d tmp_qloop_69 = _mm256_mul_pd(tmp_qloop_18,_mm256_set_pd(2.0,2.0,2.0,2.0)); + const __m256d tmp_qloop_70 = _mm256_mul_pd(tmp_qloop_41,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)); + const __m256d tmp_qloop_71 = _mm256_mul_pd(tmp_qloop_35,_mm256_set_pd(2.0,2.0,2.0,2.0)); + const __m256d tmp_qloop_73 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_31,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_72,tmp_qloop_72,tmp_qloop_72,tmp_qloop_72)); + const __m256d tmp_qloop_74 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_qloop_20,tmp_qloop_20,tmp_qloop_20,tmp_qloop_20)),tmp_qloop_27); + const __m256d tmp_qloop_75 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_34,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_mul_pd(tmp_qloop_18,_mm256_set_pd(tmp_qloop_68,tmp_qloop_68,tmp_qloop_68,tmp_qloop_68))),tmp_qloop_74); + const __m256d tmp_qloop_76 = _mm256_mul_pd(tmp_qloop_66,_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17)); + const __m256d tmp_qloop_77 = _mm256_mul_pd(tmp_qloop_51,tmp_qloop_76); + const __m256d tmp_qloop_78 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_77); + const __m256d tmp_qloop_79 = _mm256_mul_pd(tmp_qloop_25,_mm256_set_pd(2.0,2.0,2.0,2.0)); + const __m256d tmp_qloop_80 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_79,_mm256_set_pd(tmp_qloop_33,tmp_qloop_33,tmp_qloop_33,tmp_qloop_33)),_mm256_mul_pd(tmp_qloop_18,_mm256_set_pd(tmp_qloop_40,tmp_qloop_40,tmp_qloop_40,tmp_qloop_40))); + const __m256d tmp_qloop_81 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_43); + const __m256d tmp_qloop_82 = _mm256_mul_pd(tmp_qloop_22,_mm256_set_pd(2.0,2.0,2.0,2.0)); + const __m256d tmp_qloop_83 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_82,_mm256_set_pd(tmp_qloop_33,tmp_qloop_33,tmp_qloop_33,tmp_qloop_33)),_mm256_mul_pd(tmp_qloop_18,_mm256_set_pd(tmp_qloop_72,tmp_qloop_72,tmp_qloop_72,tmp_qloop_72))); + const __m256d tmp_qloop_84 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_22,tmp_qloop_78),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)); + const __m256d tmp_qloop_85 = _mm256_mul_pd(tmp_qloop_18,tmp_qloop_22); + const __m256d tmp_qloop_86 = _mm256_mul_pd(tmp_qloop_22,tmp_qloop_43); + const __m256d tmp_qloop_87 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_69,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_40,tmp_qloop_40,tmp_qloop_40,tmp_qloop_40)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_25,_mm256_set_pd(tmp_qloop_21,tmp_qloop_21,tmp_qloop_21,tmp_qloop_21)),_mm256_set_pd(tmp_qloop_32,tmp_qloop_32,tmp_qloop_32,tmp_qloop_32))); + const __m256d tmp_qloop_88 = _mm256_mul_pd(tmp_qloop_18,tmp_qloop_43); + const __m256d tmp_qloop_89 = _mm256_mul_pd(tmp_qloop_42,tmp_qloop_76); + const __m256d tmp_qloop_90 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_28,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_25,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_40,tmp_qloop_40,tmp_qloop_40,tmp_qloop_40))); + const __m256d tmp_qloop_91 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_89); + const __m256d tmp_qloop_92 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_25,_mm256_set_pd(tmp_qloop_72,tmp_qloop_72,tmp_qloop_72,tmp_qloop_72)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_82,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_40,tmp_qloop_40,tmp_qloop_40,tmp_qloop_40))); + const __m256d tmp_qloop_93 = _mm256_mul_pd(tmp_qloop_85,tmp_qloop_89); + const __m256d tmp_qloop_94 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_31,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_68,tmp_qloop_68,tmp_qloop_68,tmp_qloop_68)); + const __m256d tmp_qloop_95 = _mm256_mul_pd(tmp_qloop_55,tmp_qloop_76); + const __m256d tmp_qloop_96 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_69,_mm256_set_pd(tmp_qloop_46,tmp_qloop_46,tmp_qloop_46,tmp_qloop_46)),_mm256_mul_pd(tmp_qloop_22,_mm256_set_pd(tmp_qloop_68,tmp_qloop_68,tmp_qloop_68,tmp_qloop_68))); + const __m256d tmp_qloop_97 = _mm256_mul_pd(tmp_qloop_49,tmp_qloop_76); + const __m256d tmp_qloop_98 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_22,_mm256_set_pd(tmp_qloop_40,tmp_qloop_40,tmp_qloop_40,tmp_qloop_40)),_mm256_mul_pd(tmp_qloop_79,_mm256_set_pd(tmp_qloop_46,tmp_qloop_46,tmp_qloop_46,tmp_qloop_46))); + const __m256d tmp_qloop_99 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_97); + const __m256d tmp_qloop_100 = _mm256_mul_pd(tmp_qloop_18,tmp_qloop_99); + const __m256d tmp_qloop_101 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_56,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_mul_pd(tmp_qloop_22,_mm256_set_pd(tmp_qloop_72,tmp_qloop_72,tmp_qloop_72,tmp_qloop_72))),tmp_qloop_74); + const __m256d tmp_qloop_102 = _mm256_mul_pd(tmp_qloop_58,tmp_qloop_76); + const __m256d tmp_qloop_103 = _mm256_mul_pd(_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])); + const __m256d tmp_qloop_104 = _mm256_mul_pd(tmp_qloop_103,_mm256_set_pd(2.0,2.0,2.0,2.0)); + const __m256d tmp_qloop_105 = _mm256_mul_pd(_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])); + const __m256d tmp_qloop_106 = _mm256_mul_pd(tmp_qloop_105,_mm256_set_pd(2.0,2.0,2.0,2.0)); + const __m256d tmp_qloop_107 = _mm256_mul_pd(_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q])); + const __m256d tmp_qloop_108 = _mm256_mul_pd(tmp_qloop_107,_mm256_set_pd(2.0,2.0,2.0,2.0)); + const __m256d tmp_qloop_109 = _mm256_mul_pd(_mm256_set_pd(4.0,4.0,4.0,4.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])); + const __m256d tmp_qloop_110 = _mm256_mul_pd(tmp_qloop_109,_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q])); + const __m256d tmp_qloop_111 = _mm256_mul_pd(_mm256_set_pd(4.0,4.0,4.0,4.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])); + const __m256d tmp_qloop_112 = _mm256_mul_pd(tmp_qloop_111,_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])); + const __m256d tmp_qloop_113 = _mm256_mul_pd(tmp_qloop_111,_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q])); + const __m256d tmp_qloop_114 = _mm256_add_pd(tmp_qloop_112,tmp_qloop_113); + const __m256d tmp_qloop_115 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),_mm256_mul_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),tmp_qloop_104),tmp_qloop_106),tmp_qloop_108),tmp_qloop_110),tmp_qloop_114); + const __m256d tmp_qloop_116 = _mm256_mul_pd(_mm256_set_pd(4.0,4.0,4.0,4.0),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q])); + const __m256d tmp_qloop_117 = _mm256_add_pd(tmp_qloop_109,tmp_qloop_116); + const __m256d tmp_qloop_118 = _mm256_mul_pd(_mm256_mul_pd(rho_dof_8,tmp_qloop_109),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)); + const __m256d tmp_qloop_119 = _mm256_mul_pd(rho_dof_0,_mm256_add_pd(_mm256_add_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),tmp_qloop_111),tmp_qloop_117)); + const __m256d tmp_qloop_120 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(rho_dof_7,tmp_qloop_116),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),tmp_qloop_119); + const __m256d tmp_qloop_121 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(rho_dof_1,_mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),tmp_qloop_111)),_mm256_mul_pd(rho_dof_9,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_117,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_set_pd(-8.0,-8.0,-8.0,-8.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]))),_mm256_set_pd(4.0,4.0,4.0,4.0)))),_mm256_mul_pd(rho_dof_5,tmp_qloop_116)),_mm256_mul_pd(rho_dof_6,tmp_qloop_109)),tmp_qloop_118),tmp_qloop_120); + const __m256d tmp_qloop_122 = _mm256_add_pd(_mm256_set_pd(-4.0,-4.0,-4.0,-4.0),tmp_qloop_111); + const __m256d tmp_qloop_123 = _mm256_mul_pd(_mm256_mul_pd(rho_dof_9,tmp_qloop_111),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)); + const __m256d tmp_qloop_124 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(rho_dof_2,_mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),tmp_qloop_109)),_mm256_mul_pd(rho_dof_8,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_116,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_122,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(_mm256_set_pd(-8.0,-8.0,-8.0,-8.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))))),_mm256_mul_pd(rho_dof_4,tmp_qloop_116)),_mm256_mul_pd(rho_dof_6,tmp_qloop_111)),tmp_qloop_120),tmp_qloop_123); + const __m256d tmp_qloop_125 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(rho_dof_3,_mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),tmp_qloop_116)),_mm256_mul_pd(rho_dof_7,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_109,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_122,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(_mm256_set_pd(-8.0,-8.0,-8.0,-8.0),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]))))),_mm256_mul_pd(rho_dof_4,tmp_qloop_109)),_mm256_mul_pd(rho_dof_5,tmp_qloop_111)),tmp_qloop_118),tmp_qloop_119),tmp_qloop_123); + const __m256d tmp_qloop_127 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),tmp_qloop_104); + const __m256d tmp_qloop_128 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])),tmp_qloop_106); + const __m256d tmp_qloop_129 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q])),tmp_qloop_108); + const __m256d tmp_qloop_130 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_110,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_113,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(tmp_qloop_107,_mm256_set_pd(-4.0,-4.0,-4.0,-4.0))),tmp_qloop_116); + const __m256d tmp_qloop_131 = _mm256_mul_pd(tmp_qloop_105,_mm256_set_pd(4.0,4.0,4.0,4.0)); + const __m256d tmp_qloop_132 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_110,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_112,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(tmp_qloop_131,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_109); + const __m256d tmp_qloop_133 = _mm256_mul_pd(tmp_qloop_103,_mm256_set_pd(4.0,4.0,4.0,4.0)); + const __m256d tmp_qloop_134 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_114,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_133,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_111); + const __m256d tmp_qloop_158 = _mm256_mul_pd(tmp_qloop_105,tmp_qloop_111); + const __m256d jac_blending_0_0 = _mm256_mul_pd(tmp_qloop_36,tmp_qloop_38); + const __m256d jac_blending_0_1 = _mm256_mul_pd(tmp_qloop_18,tmp_qloop_44); + const __m256d jac_blending_0_2 = _mm256_mul_pd(tmp_qloop_18,tmp_qloop_50); + const __m256d jac_blending_1_0 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_52); + const __m256d jac_blending_1_1 = _mm256_mul_pd(tmp_qloop_43,tmp_qloop_55); + const __m256d tmp_qloop_63 = _mm256_mul_pd(jac_blending_0_2,jac_blending_1_1); + const __m256d jac_blending_1_2 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_50); + const __m256d tmp_qloop_60 = _mm256_mul_pd(jac_blending_0_1,jac_blending_1_2); + const __m256d jac_blending_2_0 = _mm256_mul_pd(tmp_qloop_22,tmp_qloop_52); + const __m256d jac_blending_2_1 = _mm256_mul_pd(tmp_qloop_22,tmp_qloop_44); + const __m256d tmp_qloop_61 = _mm256_mul_pd(jac_blending_1_2,jac_blending_2_1); + const __m256d jac_blending_2_2 = _mm256_mul_pd(tmp_qloop_43,tmp_qloop_58); + const __m256d tmp_qloop_59 = _mm256_mul_pd(jac_blending_1_1,jac_blending_2_2); + const __m256d tmp_qloop_62 = _mm256_mul_pd(jac_blending_0_1,jac_blending_2_2); + const __m256d tmp_qloop_64 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(jac_blending_0_0,tmp_qloop_59),_mm256_mul_pd(jac_blending_2_0,tmp_qloop_60)),_mm256_mul_pd(_mm256_mul_pd(jac_blending_0_0,tmp_qloop_61),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(_mm256_mul_pd(jac_blending_1_0,tmp_qloop_62),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(_mm256_mul_pd(jac_blending_2_0,tmp_qloop_63),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(_mm256_mul_pd(jac_blending_0_2,jac_blending_1_0),jac_blending_2_1)); + const __m256d tmp_qloop_65 = _mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),tmp_qloop_64); + const __m256d abs_det_jac_blending = tmp_qloop_64; + const __m256d tmp_qloop_135 = _mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(abs_det_jac_blending,_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(rho_dof_0,tmp_qloop_115),_mm256_mul_pd(rho_dof_1,tmp_qloop_127)),_mm256_mul_pd(rho_dof_2,tmp_qloop_128)),_mm256_mul_pd(rho_dof_3,tmp_qloop_129)),_mm256_mul_pd(rho_dof_4,tmp_qloop_110)),_mm256_mul_pd(rho_dof_5,tmp_qloop_113)),_mm256_mul_pd(rho_dof_6,tmp_qloop_112)),_mm256_mul_pd(rho_dof_7,tmp_qloop_130)),_mm256_mul_pd(rho_dof_8,tmp_qloop_132)),_mm256_mul_pd(rho_dof_9,tmp_qloop_134)))),_mm256_set_pd(_data_q_w[q],_data_q_w[q],_data_q_w[q],_data_q_w[q])),_mm256_set_pd(abs_det_jac_affine_BLUE_DOWN,abs_det_jac_affine_BLUE_DOWN,abs_det_jac_affine_BLUE_DOWN,abs_det_jac_affine_BLUE_DOWN)); + const __m256d tmp_qloop_136 = _mm256_mul_pd(tmp_qloop_135,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]))),_mm256_set_pd(1.0,1.0,1.0,1.0))); + const __m256d tmp_qloop_142 = _mm256_mul_pd(tmp_qloop_135,_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])); + const __m256d tmp_qloop_144 = _mm256_mul_pd(tmp_qloop_135,_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q])); + const __m256d tmp_qloop_147 = _mm256_mul_pd(tmp_qloop_135,_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])); + const __m256d tmp_qloop_159 = _mm256_mul_pd(tmp_qloop_135,tmp_qloop_158); + const __m256d tmp_qloop_161 = _mm256_mul_pd(tmp_qloop_107,tmp_qloop_135); + const __m256d jac_blending_inv_0_0 = _mm256_mul_pd(tmp_qloop_65,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_61,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),tmp_qloop_59)); + const __m256d jac_blending_inv_0_1 = _mm256_mul_pd(tmp_qloop_65,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_62,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(jac_blending_0_2,jac_blending_2_1))); + const __m256d jac_blending_inv_0_2 = _mm256_mul_pd(tmp_qloop_65,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_63,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),tmp_qloop_60)); + const __m256d jac_blending_inv_1_0 = _mm256_mul_pd(tmp_qloop_65,_mm256_add_pd(_mm256_mul_pd(jac_blending_1_2,jac_blending_2_0),_mm256_mul_pd(_mm256_mul_pd(jac_blending_1_0,jac_blending_2_2),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))); + const __m256d jac_blending_inv_1_1 = _mm256_mul_pd(tmp_qloop_65,_mm256_add_pd(_mm256_mul_pd(jac_blending_0_0,jac_blending_2_2),_mm256_mul_pd(_mm256_mul_pd(jac_blending_0_2,jac_blending_2_0),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))); + const __m256d jac_blending_inv_1_2 = _mm256_mul_pd(tmp_qloop_65,_mm256_add_pd(_mm256_mul_pd(jac_blending_0_2,jac_blending_1_0),_mm256_mul_pd(_mm256_mul_pd(jac_blending_0_0,jac_blending_1_2),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))); + const __m256d jac_blending_inv_2_0 = _mm256_mul_pd(tmp_qloop_65,_mm256_add_pd(_mm256_mul_pd(jac_blending_1_0,jac_blending_2_1),_mm256_mul_pd(_mm256_mul_pd(jac_blending_1_1,jac_blending_2_0),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))); + const __m256d tmp_qloop_126 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_121,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,_mm256_set_pd(jac_affine_inv_0_0_BLUE_DOWN,jac_affine_inv_0_0_BLUE_DOWN,jac_affine_inv_0_0_BLUE_DOWN,jac_affine_inv_0_0_BLUE_DOWN)),_mm256_mul_pd(jac_blending_inv_1_0,_mm256_set_pd(jac_affine_inv_0_1_BLUE_DOWN,jac_affine_inv_0_1_BLUE_DOWN,jac_affine_inv_0_1_BLUE_DOWN,jac_affine_inv_0_1_BLUE_DOWN))),_mm256_mul_pd(jac_blending_inv_2_0,_mm256_set_pd(jac_affine_inv_0_2_BLUE_DOWN,jac_affine_inv_0_2_BLUE_DOWN,jac_affine_inv_0_2_BLUE_DOWN,jac_affine_inv_0_2_BLUE_DOWN)))),_mm256_mul_pd(tmp_qloop_124,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,_mm256_set_pd(jac_affine_inv_1_0_BLUE_DOWN,jac_affine_inv_1_0_BLUE_DOWN,jac_affine_inv_1_0_BLUE_DOWN,jac_affine_inv_1_0_BLUE_DOWN)),_mm256_mul_pd(jac_blending_inv_1_0,_mm256_set_pd(jac_affine_inv_1_1_BLUE_DOWN,jac_affine_inv_1_1_BLUE_DOWN,jac_affine_inv_1_1_BLUE_DOWN,jac_affine_inv_1_1_BLUE_DOWN))),_mm256_mul_pd(jac_blending_inv_2_0,_mm256_set_pd(jac_affine_inv_1_2_BLUE_DOWN,jac_affine_inv_1_2_BLUE_DOWN,jac_affine_inv_1_2_BLUE_DOWN,jac_affine_inv_1_2_BLUE_DOWN))))),_mm256_mul_pd(tmp_qloop_125,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,_mm256_set_pd(jac_affine_inv_2_0_BLUE_DOWN,jac_affine_inv_2_0_BLUE_DOWN,jac_affine_inv_2_0_BLUE_DOWN,jac_affine_inv_2_0_BLUE_DOWN)),_mm256_mul_pd(jac_blending_inv_1_0,_mm256_set_pd(jac_affine_inv_2_1_BLUE_DOWN,jac_affine_inv_2_1_BLUE_DOWN,jac_affine_inv_2_1_BLUE_DOWN,jac_affine_inv_2_1_BLUE_DOWN))),_mm256_mul_pd(jac_blending_inv_2_0,_mm256_set_pd(jac_affine_inv_2_2_BLUE_DOWN,jac_affine_inv_2_2_BLUE_DOWN,jac_affine_inv_2_2_BLUE_DOWN,jac_affine_inv_2_2_BLUE_DOWN))))); + const __m256d tmp_qloop_137 = _mm256_mul_pd(tmp_qloop_126,tmp_qloop_136); + const __m256d tmp_qloop_143 = _mm256_mul_pd(tmp_qloop_126,tmp_qloop_142); + const __m256d tmp_qloop_145 = _mm256_mul_pd(tmp_qloop_126,tmp_qloop_144); + const __m256d tmp_qloop_146 = _mm256_mul_pd(tmp_qloop_112,tmp_qloop_145); + const __m256d tmp_qloop_148 = _mm256_mul_pd(tmp_qloop_126,tmp_qloop_147); + const __m256d tmp_qloop_157 = _mm256_mul_pd(tmp_qloop_126,tmp_qloop_135); + const __m256d tmp_qloop_160 = _mm256_mul_pd(tmp_qloop_107,tmp_qloop_157); + const __m256d jac_blending_inv_2_1 = _mm256_mul_pd(tmp_qloop_65,_mm256_add_pd(_mm256_mul_pd(jac_blending_0_1,jac_blending_2_0),_mm256_mul_pd(_mm256_mul_pd(jac_blending_0_0,jac_blending_2_1),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))); + const __m256d tmp_qloop_138 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_121,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,_mm256_set_pd(jac_affine_inv_0_0_BLUE_DOWN,jac_affine_inv_0_0_BLUE_DOWN,jac_affine_inv_0_0_BLUE_DOWN,jac_affine_inv_0_0_BLUE_DOWN)),_mm256_mul_pd(jac_blending_inv_1_1,_mm256_set_pd(jac_affine_inv_0_1_BLUE_DOWN,jac_affine_inv_0_1_BLUE_DOWN,jac_affine_inv_0_1_BLUE_DOWN,jac_affine_inv_0_1_BLUE_DOWN))),_mm256_mul_pd(jac_blending_inv_2_1,_mm256_set_pd(jac_affine_inv_0_2_BLUE_DOWN,jac_affine_inv_0_2_BLUE_DOWN,jac_affine_inv_0_2_BLUE_DOWN,jac_affine_inv_0_2_BLUE_DOWN)))),_mm256_mul_pd(tmp_qloop_124,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,_mm256_set_pd(jac_affine_inv_1_0_BLUE_DOWN,jac_affine_inv_1_0_BLUE_DOWN,jac_affine_inv_1_0_BLUE_DOWN,jac_affine_inv_1_0_BLUE_DOWN)),_mm256_mul_pd(jac_blending_inv_1_1,_mm256_set_pd(jac_affine_inv_1_1_BLUE_DOWN,jac_affine_inv_1_1_BLUE_DOWN,jac_affine_inv_1_1_BLUE_DOWN,jac_affine_inv_1_1_BLUE_DOWN))),_mm256_mul_pd(jac_blending_inv_2_1,_mm256_set_pd(jac_affine_inv_1_2_BLUE_DOWN,jac_affine_inv_1_2_BLUE_DOWN,jac_affine_inv_1_2_BLUE_DOWN,jac_affine_inv_1_2_BLUE_DOWN))))),_mm256_mul_pd(tmp_qloop_125,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,_mm256_set_pd(jac_affine_inv_2_0_BLUE_DOWN,jac_affine_inv_2_0_BLUE_DOWN,jac_affine_inv_2_0_BLUE_DOWN,jac_affine_inv_2_0_BLUE_DOWN)),_mm256_mul_pd(jac_blending_inv_1_1,_mm256_set_pd(jac_affine_inv_2_1_BLUE_DOWN,jac_affine_inv_2_1_BLUE_DOWN,jac_affine_inv_2_1_BLUE_DOWN,jac_affine_inv_2_1_BLUE_DOWN))),_mm256_mul_pd(jac_blending_inv_2_1,_mm256_set_pd(jac_affine_inv_2_2_BLUE_DOWN,jac_affine_inv_2_2_BLUE_DOWN,jac_affine_inv_2_2_BLUE_DOWN,jac_affine_inv_2_2_BLUE_DOWN))))); + const __m256d tmp_qloop_139 = _mm256_mul_pd(tmp_qloop_136,tmp_qloop_138); + const __m256d tmp_qloop_149 = _mm256_mul_pd(tmp_qloop_138,tmp_qloop_142); + const __m256d tmp_qloop_150 = _mm256_mul_pd(tmp_qloop_138,tmp_qloop_144); + const __m256d tmp_qloop_151 = _mm256_mul_pd(tmp_qloop_112,tmp_qloop_150); + const __m256d tmp_qloop_152 = _mm256_mul_pd(tmp_qloop_138,tmp_qloop_147); + const __m256d tmp_qloop_162 = _mm256_mul_pd(tmp_qloop_138,tmp_qloop_161); + const __m256d jac_blending_inv_2_2 = _mm256_mul_pd(tmp_qloop_65,_mm256_add_pd(_mm256_mul_pd(jac_blending_0_0,jac_blending_1_1),_mm256_mul_pd(_mm256_mul_pd(jac_blending_0_1,jac_blending_1_0),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))); + const __m256d tmp_qloop_140 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_121,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_2,_mm256_set_pd(jac_affine_inv_0_0_BLUE_DOWN,jac_affine_inv_0_0_BLUE_DOWN,jac_affine_inv_0_0_BLUE_DOWN,jac_affine_inv_0_0_BLUE_DOWN)),_mm256_mul_pd(jac_blending_inv_1_2,_mm256_set_pd(jac_affine_inv_0_1_BLUE_DOWN,jac_affine_inv_0_1_BLUE_DOWN,jac_affine_inv_0_1_BLUE_DOWN,jac_affine_inv_0_1_BLUE_DOWN))),_mm256_mul_pd(jac_blending_inv_2_2,_mm256_set_pd(jac_affine_inv_0_2_BLUE_DOWN,jac_affine_inv_0_2_BLUE_DOWN,jac_affine_inv_0_2_BLUE_DOWN,jac_affine_inv_0_2_BLUE_DOWN)))),_mm256_mul_pd(tmp_qloop_124,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_2,_mm256_set_pd(jac_affine_inv_1_0_BLUE_DOWN,jac_affine_inv_1_0_BLUE_DOWN,jac_affine_inv_1_0_BLUE_DOWN,jac_affine_inv_1_0_BLUE_DOWN)),_mm256_mul_pd(jac_blending_inv_1_2,_mm256_set_pd(jac_affine_inv_1_1_BLUE_DOWN,jac_affine_inv_1_1_BLUE_DOWN,jac_affine_inv_1_1_BLUE_DOWN,jac_affine_inv_1_1_BLUE_DOWN))),_mm256_mul_pd(jac_blending_inv_2_2,_mm256_set_pd(jac_affine_inv_1_2_BLUE_DOWN,jac_affine_inv_1_2_BLUE_DOWN,jac_affine_inv_1_2_BLUE_DOWN,jac_affine_inv_1_2_BLUE_DOWN))))),_mm256_mul_pd(tmp_qloop_125,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_2,_mm256_set_pd(jac_affine_inv_2_0_BLUE_DOWN,jac_affine_inv_2_0_BLUE_DOWN,jac_affine_inv_2_0_BLUE_DOWN,jac_affine_inv_2_0_BLUE_DOWN)),_mm256_mul_pd(jac_blending_inv_1_2,_mm256_set_pd(jac_affine_inv_2_1_BLUE_DOWN,jac_affine_inv_2_1_BLUE_DOWN,jac_affine_inv_2_1_BLUE_DOWN,jac_affine_inv_2_1_BLUE_DOWN))),_mm256_mul_pd(jac_blending_inv_2_2,_mm256_set_pd(jac_affine_inv_2_2_BLUE_DOWN,jac_affine_inv_2_2_BLUE_DOWN,jac_affine_inv_2_2_BLUE_DOWN,jac_affine_inv_2_2_BLUE_DOWN))))); + const __m256d tmp_qloop_141 = _mm256_mul_pd(tmp_qloop_136,tmp_qloop_140); + const __m256d tmp_qloop_153 = _mm256_mul_pd(tmp_qloop_140,tmp_qloop_142); + const __m256d tmp_qloop_154 = _mm256_mul_pd(tmp_qloop_140,tmp_qloop_144); + const __m256d tmp_qloop_155 = _mm256_mul_pd(tmp_qloop_112,tmp_qloop_154); + const __m256d tmp_qloop_156 = _mm256_mul_pd(tmp_qloop_140,tmp_qloop_147); + const __m256d tmp_qloop_163 = _mm256_mul_pd(tmp_qloop_140,tmp_qloop_161); + const __m256d hessian_blending_0_0_0 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_18,tmp_qloop_67),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_37,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_31,_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_qloop_68,tmp_qloop_68,tmp_qloop_68,tmp_qloop_68)),_mm256_set_pd(tmp_qloop_33,tmp_qloop_33,tmp_qloop_33,tmp_qloop_33))),_mm256_mul_pd(tmp_qloop_19,_mm256_set_pd(tmp_qloop_68,tmp_qloop_68,tmp_qloop_68,tmp_qloop_68))),_mm256_mul_pd(tmp_qloop_35,tmp_qloop_69)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_28,tmp_qloop_69),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17))); + const __m256d hessian_blending_1_0_0 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_25,tmp_qloop_67),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_37,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_19,_mm256_set_pd(tmp_qloop_40,tmp_qloop_40,tmp_qloop_40,tmp_qloop_40)),_mm256_mul_pd(tmp_qloop_25,tmp_qloop_71)),tmp_qloop_70)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17))); + const __m256d hessian_blending_2_0_0 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_22,tmp_qloop_67),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_37,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_19,_mm256_set_pd(tmp_qloop_72,tmp_qloop_72,tmp_qloop_72,tmp_qloop_72)),_mm256_mul_pd(tmp_qloop_22,tmp_qloop_71)),tmp_qloop_73)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17))); + const __m256d hessian_blending_0_0_1 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_18,tmp_qloop_78),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_25,tmp_qloop_37),tmp_qloop_75),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17))); + const __m256d hessian_blending_1_0_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_80,tmp_qloop_81),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_29,tmp_qloop_77),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_52); + const __m256d hessian_blending_2_0_1 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_81,tmp_qloop_83),tmp_qloop_84); + const __m256d hessian_blending_0_0_2 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_77,tmp_qloop_85),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_22,tmp_qloop_37),tmp_qloop_75),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17))); + const __m256d hessian_blending_1_0_2 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_80,tmp_qloop_86),tmp_qloop_84); + const __m256d hessian_blending_2_0_2 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_83,tmp_qloop_86),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_30,tmp_qloop_77),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_52); + const __m256d hessian_blending_0_1_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_87,tmp_qloop_88),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_19,tmp_qloop_89),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_44); + const __m256d hessian_blending_1_1_0 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_18,tmp_qloop_91),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_18,tmp_qloop_37),tmp_qloop_90),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17))); + const __m256d hessian_blending_2_1_0 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_93,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_88,tmp_qloop_92)); + const __m256d hessian_blending_0_1_1 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_18,tmp_qloop_95),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_37,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_29,_mm256_set_pd(tmp_qloop_68,tmp_qloop_68,tmp_qloop_68,tmp_qloop_68)),_mm256_mul_pd(tmp_qloop_54,tmp_qloop_69)),tmp_qloop_94)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17))); + const __m256d hessian_blending_1_1_1 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_25,tmp_qloop_95),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_37,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_31,_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_qloop_40,tmp_qloop_40,tmp_qloop_40,tmp_qloop_40)),_mm256_set_pd(tmp_qloop_53,tmp_qloop_53,tmp_qloop_53,tmp_qloop_53))),_mm256_mul_pd(tmp_qloop_29,_mm256_set_pd(tmp_qloop_40,tmp_qloop_40,tmp_qloop_40,tmp_qloop_40))),_mm256_mul_pd(tmp_qloop_47,tmp_qloop_79)),_mm256_mul_pd(tmp_qloop_54,tmp_qloop_79))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17))); + const __m256d hessian_blending_2_1_1 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_22,tmp_qloop_95),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_37,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_29,_mm256_set_pd(tmp_qloop_72,tmp_qloop_72,tmp_qloop_72,tmp_qloop_72)),_mm256_mul_pd(tmp_qloop_54,tmp_qloop_82)),tmp_qloop_73)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17))); + const __m256d hessian_blending_0_1_2 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_93,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_22,tmp_qloop_37),tmp_qloop_87),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17))); + const __m256d hessian_blending_1_1_2 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_22,tmp_qloop_91),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_22,tmp_qloop_37),tmp_qloop_90),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17))); + const __m256d hessian_blending_2_1_2 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_86,tmp_qloop_92),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_30,tmp_qloop_89),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_44); + const __m256d hessian_blending_0_2_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_88,tmp_qloop_96),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_19,tmp_qloop_97),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_50); + const __m256d hessian_blending_1_2_0 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_100,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_18,tmp_qloop_37),tmp_qloop_98),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17))); + const __m256d hessian_blending_2_2_0 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_85,tmp_qloop_97),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_101,tmp_qloop_18),tmp_qloop_37),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17))); + const __m256d hessian_blending_0_2_1 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_100,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_25,tmp_qloop_37),tmp_qloop_96),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17))); + const __m256d hessian_blending_1_2_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_81,tmp_qloop_98),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_29,tmp_qloop_97),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_50); + const __m256d hessian_blending_2_2_1 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_22,tmp_qloop_99),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_101,tmp_qloop_25),tmp_qloop_37),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17))); + const __m256d hessian_blending_0_2_2 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_102,tmp_qloop_18),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_37,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_30,_mm256_set_pd(tmp_qloop_68,tmp_qloop_68,tmp_qloop_68,tmp_qloop_68)),_mm256_mul_pd(tmp_qloop_57,tmp_qloop_69)),tmp_qloop_94)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17))); + const __m256d hessian_blending_1_2_2 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_102,tmp_qloop_25),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_37,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_30,_mm256_set_pd(tmp_qloop_40,tmp_qloop_40,tmp_qloop_40,tmp_qloop_40)),_mm256_mul_pd(tmp_qloop_57,tmp_qloop_79)),tmp_qloop_70)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17))); + const __m256d hessian_blending_2_2_2 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_102,tmp_qloop_22),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_37,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_31,_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_qloop_72,tmp_qloop_72,tmp_qloop_72,tmp_qloop_72)),_mm256_set_pd(tmp_qloop_46,tmp_qloop_46,tmp_qloop_46,tmp_qloop_46))),_mm256_mul_pd(tmp_qloop_48,_mm256_set_pd(2.0,2.0,2.0,2.0))),_mm256_mul_pd(tmp_qloop_30,_mm256_set_pd(tmp_qloop_72,tmp_qloop_72,tmp_qloop_72,tmp_qloop_72))),_mm256_mul_pd(tmp_qloop_57,tmp_qloop_82))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17))); + const __m256d q_tmp_0_0 = _mm256_mul_pd(tmp_qloop_115,tmp_qloop_137); + const __m256d q_tmp_0_1 = _mm256_mul_pd(tmp_qloop_127,tmp_qloop_137); + const __m256d q_tmp_0_2 = _mm256_mul_pd(tmp_qloop_128,tmp_qloop_137); + const __m256d q_tmp_0_3 = _mm256_mul_pd(tmp_qloop_129,tmp_qloop_137); + const __m256d q_tmp_0_4 = _mm256_mul_pd(tmp_qloop_110,tmp_qloop_137); + const __m256d q_tmp_0_5 = _mm256_mul_pd(tmp_qloop_113,tmp_qloop_137); + const __m256d q_tmp_0_6 = _mm256_mul_pd(tmp_qloop_112,tmp_qloop_137); + const __m256d q_tmp_0_7 = _mm256_mul_pd(tmp_qloop_130,tmp_qloop_137); + const __m256d q_tmp_0_8 = _mm256_mul_pd(tmp_qloop_132,tmp_qloop_137); + const __m256d q_tmp_0_9 = _mm256_mul_pd(tmp_qloop_134,tmp_qloop_137); + const __m256d q_tmp_0_10 = _mm256_mul_pd(tmp_qloop_115,tmp_qloop_139); + const __m256d q_tmp_0_11 = _mm256_mul_pd(tmp_qloop_127,tmp_qloop_139); + const __m256d q_tmp_0_12 = _mm256_mul_pd(tmp_qloop_128,tmp_qloop_139); + const __m256d q_tmp_0_13 = _mm256_mul_pd(tmp_qloop_129,tmp_qloop_139); + const __m256d q_tmp_0_14 = _mm256_mul_pd(tmp_qloop_110,tmp_qloop_139); + const __m256d q_tmp_0_15 = _mm256_mul_pd(tmp_qloop_113,tmp_qloop_139); + const __m256d q_tmp_0_16 = _mm256_mul_pd(tmp_qloop_112,tmp_qloop_139); + const __m256d q_tmp_0_17 = _mm256_mul_pd(tmp_qloop_130,tmp_qloop_139); + const __m256d q_tmp_0_18 = _mm256_mul_pd(tmp_qloop_132,tmp_qloop_139); + const __m256d q_tmp_0_19 = _mm256_mul_pd(tmp_qloop_134,tmp_qloop_139); + const __m256d q_tmp_0_20 = _mm256_mul_pd(tmp_qloop_115,tmp_qloop_141); + const __m256d q_tmp_0_21 = _mm256_mul_pd(tmp_qloop_127,tmp_qloop_141); + const __m256d q_tmp_0_22 = _mm256_mul_pd(tmp_qloop_128,tmp_qloop_141); + const __m256d q_tmp_0_23 = _mm256_mul_pd(tmp_qloop_129,tmp_qloop_141); + const __m256d q_tmp_0_24 = _mm256_mul_pd(tmp_qloop_110,tmp_qloop_141); + const __m256d q_tmp_0_25 = _mm256_mul_pd(tmp_qloop_113,tmp_qloop_141); + const __m256d q_tmp_0_26 = _mm256_mul_pd(tmp_qloop_112,tmp_qloop_141); + const __m256d q_tmp_0_27 = _mm256_mul_pd(tmp_qloop_130,tmp_qloop_141); + const __m256d q_tmp_0_28 = _mm256_mul_pd(tmp_qloop_132,tmp_qloop_141); + const __m256d q_tmp_0_29 = _mm256_mul_pd(tmp_qloop_134,tmp_qloop_141); + const __m256d q_tmp_1_0 = _mm256_mul_pd(tmp_qloop_115,tmp_qloop_143); + const __m256d q_tmp_1_1 = _mm256_mul_pd(tmp_qloop_127,tmp_qloop_143); + const __m256d q_tmp_1_2 = _mm256_mul_pd(tmp_qloop_128,tmp_qloop_143); + const __m256d q_tmp_1_3 = _mm256_mul_pd(tmp_qloop_129,tmp_qloop_143); + const __m256d q_tmp_1_4 = tmp_qloop_146; + const __m256d q_tmp_1_5 = _mm256_mul_pd(tmp_qloop_133,tmp_qloop_145); + const __m256d q_tmp_1_6 = _mm256_mul_pd(tmp_qloop_133,tmp_qloop_148); + const __m256d q_tmp_1_7 = _mm256_mul_pd(tmp_qloop_130,tmp_qloop_143); + const __m256d q_tmp_1_8 = _mm256_mul_pd(tmp_qloop_132,tmp_qloop_143); + const __m256d q_tmp_1_9 = _mm256_mul_pd(tmp_qloop_134,tmp_qloop_143); + const __m256d q_tmp_1_10 = _mm256_mul_pd(tmp_qloop_115,tmp_qloop_149); + const __m256d q_tmp_1_11 = _mm256_mul_pd(tmp_qloop_127,tmp_qloop_149); + const __m256d q_tmp_1_12 = _mm256_mul_pd(tmp_qloop_128,tmp_qloop_149); + const __m256d q_tmp_1_13 = _mm256_mul_pd(tmp_qloop_129,tmp_qloop_149); + const __m256d q_tmp_1_14 = tmp_qloop_151; + const __m256d q_tmp_1_15 = _mm256_mul_pd(tmp_qloop_133,tmp_qloop_150); + const __m256d q_tmp_1_16 = _mm256_mul_pd(tmp_qloop_133,tmp_qloop_152); + const __m256d q_tmp_1_17 = _mm256_mul_pd(tmp_qloop_130,tmp_qloop_149); + const __m256d q_tmp_1_18 = _mm256_mul_pd(tmp_qloop_132,tmp_qloop_149); + const __m256d q_tmp_1_19 = _mm256_mul_pd(tmp_qloop_134,tmp_qloop_149); + const __m256d q_tmp_1_20 = _mm256_mul_pd(tmp_qloop_115,tmp_qloop_153); + const __m256d q_tmp_1_21 = _mm256_mul_pd(tmp_qloop_127,tmp_qloop_153); + const __m256d q_tmp_1_22 = _mm256_mul_pd(tmp_qloop_128,tmp_qloop_153); + const __m256d q_tmp_1_23 = _mm256_mul_pd(tmp_qloop_129,tmp_qloop_153); + const __m256d q_tmp_1_24 = tmp_qloop_155; + const __m256d q_tmp_1_25 = _mm256_mul_pd(tmp_qloop_133,tmp_qloop_154); + const __m256d q_tmp_1_26 = _mm256_mul_pd(tmp_qloop_133,tmp_qloop_156); + const __m256d q_tmp_1_27 = _mm256_mul_pd(tmp_qloop_130,tmp_qloop_153); + const __m256d q_tmp_1_28 = _mm256_mul_pd(tmp_qloop_132,tmp_qloop_153); + const __m256d q_tmp_1_29 = _mm256_mul_pd(tmp_qloop_134,tmp_qloop_153); + const __m256d q_tmp_2_0 = _mm256_mul_pd(tmp_qloop_115,tmp_qloop_148); + const __m256d q_tmp_2_1 = _mm256_mul_pd(tmp_qloop_127,tmp_qloop_148); + const __m256d q_tmp_2_2 = _mm256_mul_pd(tmp_qloop_128,tmp_qloop_148); + const __m256d q_tmp_2_3 = _mm256_mul_pd(tmp_qloop_129,tmp_qloop_148); + const __m256d q_tmp_2_4 = _mm256_mul_pd(tmp_qloop_131,tmp_qloop_145); + const __m256d q_tmp_2_5 = tmp_qloop_146; + const __m256d q_tmp_2_6 = _mm256_mul_pd(tmp_qloop_157,tmp_qloop_158); + const __m256d q_tmp_2_7 = _mm256_mul_pd(tmp_qloop_130,tmp_qloop_148); + const __m256d q_tmp_2_8 = _mm256_mul_pd(tmp_qloop_132,tmp_qloop_148); + const __m256d q_tmp_2_9 = _mm256_mul_pd(tmp_qloop_134,tmp_qloop_148); + const __m256d q_tmp_2_10 = _mm256_mul_pd(tmp_qloop_115,tmp_qloop_152); + const __m256d q_tmp_2_11 = _mm256_mul_pd(tmp_qloop_127,tmp_qloop_152); + const __m256d q_tmp_2_12 = _mm256_mul_pd(tmp_qloop_128,tmp_qloop_152); + const __m256d q_tmp_2_13 = _mm256_mul_pd(tmp_qloop_129,tmp_qloop_152); + const __m256d q_tmp_2_14 = _mm256_mul_pd(tmp_qloop_131,tmp_qloop_150); + const __m256d q_tmp_2_15 = tmp_qloop_151; + const __m256d q_tmp_2_16 = _mm256_mul_pd(tmp_qloop_138,tmp_qloop_159); + const __m256d q_tmp_2_17 = _mm256_mul_pd(tmp_qloop_130,tmp_qloop_152); + const __m256d q_tmp_2_18 = _mm256_mul_pd(tmp_qloop_132,tmp_qloop_152); + const __m256d q_tmp_2_19 = _mm256_mul_pd(tmp_qloop_134,tmp_qloop_152); + const __m256d q_tmp_2_20 = _mm256_mul_pd(tmp_qloop_115,tmp_qloop_156); + const __m256d q_tmp_2_21 = _mm256_mul_pd(tmp_qloop_127,tmp_qloop_156); + const __m256d q_tmp_2_22 = _mm256_mul_pd(tmp_qloop_128,tmp_qloop_156); + const __m256d q_tmp_2_23 = _mm256_mul_pd(tmp_qloop_129,tmp_qloop_156); + const __m256d q_tmp_2_24 = _mm256_mul_pd(tmp_qloop_131,tmp_qloop_154); + const __m256d q_tmp_2_25 = tmp_qloop_155; + const __m256d q_tmp_2_26 = _mm256_mul_pd(tmp_qloop_140,tmp_qloop_159); + const __m256d q_tmp_2_27 = _mm256_mul_pd(tmp_qloop_130,tmp_qloop_156); + const __m256d q_tmp_2_28 = _mm256_mul_pd(tmp_qloop_132,tmp_qloop_156); + const __m256d q_tmp_2_29 = _mm256_mul_pd(tmp_qloop_134,tmp_qloop_156); + const __m256d q_tmp_3_0 = _mm256_mul_pd(tmp_qloop_115,tmp_qloop_145); + const __m256d q_tmp_3_1 = _mm256_mul_pd(tmp_qloop_127,tmp_qloop_145); + const __m256d q_tmp_3_2 = _mm256_mul_pd(tmp_qloop_128,tmp_qloop_145); + const __m256d q_tmp_3_3 = _mm256_mul_pd(tmp_qloop_129,tmp_qloop_145); + const __m256d q_tmp_3_4 = _mm256_mul_pd(tmp_qloop_109,tmp_qloop_160); + const __m256d q_tmp_3_5 = _mm256_mul_pd(tmp_qloop_111,tmp_qloop_160); + const __m256d q_tmp_3_6 = tmp_qloop_146; + const __m256d q_tmp_3_7 = _mm256_mul_pd(tmp_qloop_130,tmp_qloop_145); + const __m256d q_tmp_3_8 = _mm256_mul_pd(tmp_qloop_132,tmp_qloop_145); + const __m256d q_tmp_3_9 = _mm256_mul_pd(tmp_qloop_134,tmp_qloop_145); + const __m256d q_tmp_3_10 = _mm256_mul_pd(tmp_qloop_115,tmp_qloop_150); + const __m256d q_tmp_3_11 = _mm256_mul_pd(tmp_qloop_127,tmp_qloop_150); + const __m256d q_tmp_3_12 = _mm256_mul_pd(tmp_qloop_128,tmp_qloop_150); + const __m256d q_tmp_3_13 = _mm256_mul_pd(tmp_qloop_129,tmp_qloop_150); + const __m256d q_tmp_3_14 = _mm256_mul_pd(tmp_qloop_109,tmp_qloop_162); + const __m256d q_tmp_3_15 = _mm256_mul_pd(tmp_qloop_111,tmp_qloop_162); + const __m256d q_tmp_3_16 = tmp_qloop_151; + const __m256d q_tmp_3_17 = _mm256_mul_pd(tmp_qloop_130,tmp_qloop_150); + const __m256d q_tmp_3_18 = _mm256_mul_pd(tmp_qloop_132,tmp_qloop_150); + const __m256d q_tmp_3_19 = _mm256_mul_pd(tmp_qloop_134,tmp_qloop_150); + const __m256d q_tmp_3_20 = _mm256_mul_pd(tmp_qloop_115,tmp_qloop_154); + const __m256d q_tmp_3_21 = _mm256_mul_pd(tmp_qloop_127,tmp_qloop_154); + const __m256d q_tmp_3_22 = _mm256_mul_pd(tmp_qloop_128,tmp_qloop_154); + const __m256d q_tmp_3_23 = _mm256_mul_pd(tmp_qloop_129,tmp_qloop_154); + const __m256d q_tmp_3_24 = _mm256_mul_pd(tmp_qloop_109,tmp_qloop_163); + const __m256d q_tmp_3_25 = _mm256_mul_pd(tmp_qloop_111,tmp_qloop_163); + const __m256d q_tmp_3_26 = tmp_qloop_155; + const __m256d q_tmp_3_27 = _mm256_mul_pd(tmp_qloop_130,tmp_qloop_154); + const __m256d q_tmp_3_28 = _mm256_mul_pd(tmp_qloop_132,tmp_qloop_154); + const __m256d q_tmp_3_29 = _mm256_mul_pd(tmp_qloop_134,tmp_qloop_154); + q_acc_0_0 = _mm256_add_pd(q_acc_0_0,q_tmp_0_0); + q_acc_0_1 = _mm256_add_pd(q_acc_0_1,q_tmp_0_1); + q_acc_0_2 = _mm256_add_pd(q_acc_0_2,q_tmp_0_2); + q_acc_0_3 = _mm256_add_pd(q_acc_0_3,q_tmp_0_3); + q_acc_0_4 = _mm256_add_pd(q_acc_0_4,q_tmp_0_4); + q_acc_0_5 = _mm256_add_pd(q_acc_0_5,q_tmp_0_5); + q_acc_0_6 = _mm256_add_pd(q_acc_0_6,q_tmp_0_6); + q_acc_0_7 = _mm256_add_pd(q_acc_0_7,q_tmp_0_7); + q_acc_0_8 = _mm256_add_pd(q_acc_0_8,q_tmp_0_8); + q_acc_0_9 = _mm256_add_pd(q_acc_0_9,q_tmp_0_9); + q_acc_0_10 = _mm256_add_pd(q_acc_0_10,q_tmp_0_10); + q_acc_0_11 = _mm256_add_pd(q_acc_0_11,q_tmp_0_11); + q_acc_0_12 = _mm256_add_pd(q_acc_0_12,q_tmp_0_12); + q_acc_0_13 = _mm256_add_pd(q_acc_0_13,q_tmp_0_13); + q_acc_0_14 = _mm256_add_pd(q_acc_0_14,q_tmp_0_14); + q_acc_0_15 = _mm256_add_pd(q_acc_0_15,q_tmp_0_15); + q_acc_0_16 = _mm256_add_pd(q_acc_0_16,q_tmp_0_16); + q_acc_0_17 = _mm256_add_pd(q_acc_0_17,q_tmp_0_17); + q_acc_0_18 = _mm256_add_pd(q_acc_0_18,q_tmp_0_18); + q_acc_0_19 = _mm256_add_pd(q_acc_0_19,q_tmp_0_19); + q_acc_0_20 = _mm256_add_pd(q_acc_0_20,q_tmp_0_20); + q_acc_0_21 = _mm256_add_pd(q_acc_0_21,q_tmp_0_21); + q_acc_0_22 = _mm256_add_pd(q_acc_0_22,q_tmp_0_22); + q_acc_0_23 = _mm256_add_pd(q_acc_0_23,q_tmp_0_23); + q_acc_0_24 = _mm256_add_pd(q_acc_0_24,q_tmp_0_24); + q_acc_0_25 = _mm256_add_pd(q_acc_0_25,q_tmp_0_25); + q_acc_0_26 = _mm256_add_pd(q_acc_0_26,q_tmp_0_26); + q_acc_0_27 = _mm256_add_pd(q_acc_0_27,q_tmp_0_27); + q_acc_0_28 = _mm256_add_pd(q_acc_0_28,q_tmp_0_28); + q_acc_0_29 = _mm256_add_pd(q_acc_0_29,q_tmp_0_29); + q_acc_1_0 = _mm256_add_pd(q_acc_1_0,q_tmp_1_0); + q_acc_1_1 = _mm256_add_pd(q_acc_1_1,q_tmp_1_1); + q_acc_1_2 = _mm256_add_pd(q_acc_1_2,q_tmp_1_2); + q_acc_1_3 = _mm256_add_pd(q_acc_1_3,q_tmp_1_3); + q_acc_1_4 = _mm256_add_pd(q_acc_1_4,q_tmp_1_4); + q_acc_1_5 = _mm256_add_pd(q_acc_1_5,q_tmp_1_5); + q_acc_1_6 = _mm256_add_pd(q_acc_1_6,q_tmp_1_6); + q_acc_1_7 = _mm256_add_pd(q_acc_1_7,q_tmp_1_7); + q_acc_1_8 = _mm256_add_pd(q_acc_1_8,q_tmp_1_8); + q_acc_1_9 = _mm256_add_pd(q_acc_1_9,q_tmp_1_9); + q_acc_1_10 = _mm256_add_pd(q_acc_1_10,q_tmp_1_10); + q_acc_1_11 = _mm256_add_pd(q_acc_1_11,q_tmp_1_11); + q_acc_1_12 = _mm256_add_pd(q_acc_1_12,q_tmp_1_12); + q_acc_1_13 = _mm256_add_pd(q_acc_1_13,q_tmp_1_13); + q_acc_1_14 = _mm256_add_pd(q_acc_1_14,q_tmp_1_14); + q_acc_1_15 = _mm256_add_pd(q_acc_1_15,q_tmp_1_15); + q_acc_1_16 = _mm256_add_pd(q_acc_1_16,q_tmp_1_16); + q_acc_1_17 = _mm256_add_pd(q_acc_1_17,q_tmp_1_17); + q_acc_1_18 = _mm256_add_pd(q_acc_1_18,q_tmp_1_18); + q_acc_1_19 = _mm256_add_pd(q_acc_1_19,q_tmp_1_19); + q_acc_1_20 = _mm256_add_pd(q_acc_1_20,q_tmp_1_20); + q_acc_1_21 = _mm256_add_pd(q_acc_1_21,q_tmp_1_21); + q_acc_1_22 = _mm256_add_pd(q_acc_1_22,q_tmp_1_22); + q_acc_1_23 = _mm256_add_pd(q_acc_1_23,q_tmp_1_23); + q_acc_1_24 = _mm256_add_pd(q_acc_1_24,q_tmp_1_24); + q_acc_1_25 = _mm256_add_pd(q_acc_1_25,q_tmp_1_25); + q_acc_1_26 = _mm256_add_pd(q_acc_1_26,q_tmp_1_26); + q_acc_1_27 = _mm256_add_pd(q_acc_1_27,q_tmp_1_27); + q_acc_1_28 = _mm256_add_pd(q_acc_1_28,q_tmp_1_28); + q_acc_1_29 = _mm256_add_pd(q_acc_1_29,q_tmp_1_29); + q_acc_2_0 = _mm256_add_pd(q_acc_2_0,q_tmp_2_0); + q_acc_2_1 = _mm256_add_pd(q_acc_2_1,q_tmp_2_1); + q_acc_2_2 = _mm256_add_pd(q_acc_2_2,q_tmp_2_2); + q_acc_2_3 = _mm256_add_pd(q_acc_2_3,q_tmp_2_3); + q_acc_2_4 = _mm256_add_pd(q_acc_2_4,q_tmp_2_4); + q_acc_2_5 = _mm256_add_pd(q_acc_2_5,q_tmp_2_5); + q_acc_2_6 = _mm256_add_pd(q_acc_2_6,q_tmp_2_6); + q_acc_2_7 = _mm256_add_pd(q_acc_2_7,q_tmp_2_7); + q_acc_2_8 = _mm256_add_pd(q_acc_2_8,q_tmp_2_8); + q_acc_2_9 = _mm256_add_pd(q_acc_2_9,q_tmp_2_9); + q_acc_2_10 = _mm256_add_pd(q_acc_2_10,q_tmp_2_10); + q_acc_2_11 = _mm256_add_pd(q_acc_2_11,q_tmp_2_11); + q_acc_2_12 = _mm256_add_pd(q_acc_2_12,q_tmp_2_12); + q_acc_2_13 = _mm256_add_pd(q_acc_2_13,q_tmp_2_13); + q_acc_2_14 = _mm256_add_pd(q_acc_2_14,q_tmp_2_14); + q_acc_2_15 = _mm256_add_pd(q_acc_2_15,q_tmp_2_15); + q_acc_2_16 = _mm256_add_pd(q_acc_2_16,q_tmp_2_16); + q_acc_2_17 = _mm256_add_pd(q_acc_2_17,q_tmp_2_17); + q_acc_2_18 = _mm256_add_pd(q_acc_2_18,q_tmp_2_18); + q_acc_2_19 = _mm256_add_pd(q_acc_2_19,q_tmp_2_19); + q_acc_2_20 = _mm256_add_pd(q_acc_2_20,q_tmp_2_20); + q_acc_2_21 = _mm256_add_pd(q_acc_2_21,q_tmp_2_21); + q_acc_2_22 = _mm256_add_pd(q_acc_2_22,q_tmp_2_22); + q_acc_2_23 = _mm256_add_pd(q_acc_2_23,q_tmp_2_23); + q_acc_2_24 = _mm256_add_pd(q_acc_2_24,q_tmp_2_24); + q_acc_2_25 = _mm256_add_pd(q_acc_2_25,q_tmp_2_25); + q_acc_2_26 = _mm256_add_pd(q_acc_2_26,q_tmp_2_26); + q_acc_2_27 = _mm256_add_pd(q_acc_2_27,q_tmp_2_27); + q_acc_2_28 = _mm256_add_pd(q_acc_2_28,q_tmp_2_28); + q_acc_2_29 = _mm256_add_pd(q_acc_2_29,q_tmp_2_29); + q_acc_3_0 = _mm256_add_pd(q_acc_3_0,q_tmp_3_0); + q_acc_3_1 = _mm256_add_pd(q_acc_3_1,q_tmp_3_1); + q_acc_3_2 = _mm256_add_pd(q_acc_3_2,q_tmp_3_2); + q_acc_3_3 = _mm256_add_pd(q_acc_3_3,q_tmp_3_3); + q_acc_3_4 = _mm256_add_pd(q_acc_3_4,q_tmp_3_4); + q_acc_3_5 = _mm256_add_pd(q_acc_3_5,q_tmp_3_5); + q_acc_3_6 = _mm256_add_pd(q_acc_3_6,q_tmp_3_6); + q_acc_3_7 = _mm256_add_pd(q_acc_3_7,q_tmp_3_7); + q_acc_3_8 = _mm256_add_pd(q_acc_3_8,q_tmp_3_8); + q_acc_3_9 = _mm256_add_pd(q_acc_3_9,q_tmp_3_9); + q_acc_3_10 = _mm256_add_pd(q_acc_3_10,q_tmp_3_10); + q_acc_3_11 = _mm256_add_pd(q_acc_3_11,q_tmp_3_11); + q_acc_3_12 = _mm256_add_pd(q_acc_3_12,q_tmp_3_12); + q_acc_3_13 = _mm256_add_pd(q_acc_3_13,q_tmp_3_13); + q_acc_3_14 = _mm256_add_pd(q_acc_3_14,q_tmp_3_14); + q_acc_3_15 = _mm256_add_pd(q_acc_3_15,q_tmp_3_15); + q_acc_3_16 = _mm256_add_pd(q_acc_3_16,q_tmp_3_16); + q_acc_3_17 = _mm256_add_pd(q_acc_3_17,q_tmp_3_17); + q_acc_3_18 = _mm256_add_pd(q_acc_3_18,q_tmp_3_18); + q_acc_3_19 = _mm256_add_pd(q_acc_3_19,q_tmp_3_19); + q_acc_3_20 = _mm256_add_pd(q_acc_3_20,q_tmp_3_20); + q_acc_3_21 = _mm256_add_pd(q_acc_3_21,q_tmp_3_21); + q_acc_3_22 = _mm256_add_pd(q_acc_3_22,q_tmp_3_22); + q_acc_3_23 = _mm256_add_pd(q_acc_3_23,q_tmp_3_23); + q_acc_3_24 = _mm256_add_pd(q_acc_3_24,q_tmp_3_24); + q_acc_3_25 = _mm256_add_pd(q_acc_3_25,q_tmp_3_25); + q_acc_3_26 = _mm256_add_pd(q_acc_3_26,q_tmp_3_26); + q_acc_3_27 = _mm256_add_pd(q_acc_3_27,q_tmp_3_27); + q_acc_3_28 = _mm256_add_pd(q_acc_3_28,q_tmp_3_28); + q_acc_3_29 = _mm256_add_pd(q_acc_3_29,q_tmp_3_29); + } + const __m256d elMatVec_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_0_0,src_dof_0),_mm256_mul_pd(q_acc_0_1,src_dof_1)),_mm256_mul_pd(q_acc_0_10,src_dof_10)),_mm256_mul_pd(q_acc_0_11,src_dof_11)),_mm256_mul_pd(q_acc_0_12,src_dof_12)),_mm256_mul_pd(q_acc_0_13,src_dof_13)),_mm256_mul_pd(q_acc_0_14,src_dof_14)),_mm256_mul_pd(q_acc_0_15,src_dof_15)),_mm256_mul_pd(q_acc_0_16,src_dof_16)),_mm256_mul_pd(q_acc_0_17,src_dof_17)),_mm256_mul_pd(q_acc_0_18,src_dof_18)),_mm256_mul_pd(q_acc_0_19,src_dof_19)),_mm256_mul_pd(q_acc_0_2,src_dof_2)),_mm256_mul_pd(q_acc_0_20,src_dof_20)),_mm256_mul_pd(q_acc_0_21,src_dof_21)),_mm256_mul_pd(q_acc_0_22,src_dof_22)),_mm256_mul_pd(q_acc_0_23,src_dof_23)),_mm256_mul_pd(q_acc_0_24,src_dof_24)),_mm256_mul_pd(q_acc_0_25,src_dof_25)),_mm256_mul_pd(q_acc_0_26,src_dof_26)),_mm256_mul_pd(q_acc_0_27,src_dof_27)),_mm256_mul_pd(q_acc_0_28,src_dof_28)),_mm256_mul_pd(q_acc_0_29,src_dof_29)),_mm256_mul_pd(q_acc_0_3,src_dof_3)),_mm256_mul_pd(q_acc_0_4,src_dof_4)),_mm256_mul_pd(q_acc_0_5,src_dof_5)),_mm256_mul_pd(q_acc_0_6,src_dof_6)),_mm256_mul_pd(q_acc_0_7,src_dof_7)),_mm256_mul_pd(q_acc_0_8,src_dof_8)),_mm256_mul_pd(q_acc_0_9,src_dof_9)); + const __m256d elMatVec_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_1_0,src_dof_0),_mm256_mul_pd(q_acc_1_1,src_dof_1)),_mm256_mul_pd(q_acc_1_10,src_dof_10)),_mm256_mul_pd(q_acc_1_11,src_dof_11)),_mm256_mul_pd(q_acc_1_12,src_dof_12)),_mm256_mul_pd(q_acc_1_13,src_dof_13)),_mm256_mul_pd(q_acc_1_14,src_dof_14)),_mm256_mul_pd(q_acc_1_15,src_dof_15)),_mm256_mul_pd(q_acc_1_16,src_dof_16)),_mm256_mul_pd(q_acc_1_17,src_dof_17)),_mm256_mul_pd(q_acc_1_18,src_dof_18)),_mm256_mul_pd(q_acc_1_19,src_dof_19)),_mm256_mul_pd(q_acc_1_2,src_dof_2)),_mm256_mul_pd(q_acc_1_20,src_dof_20)),_mm256_mul_pd(q_acc_1_21,src_dof_21)),_mm256_mul_pd(q_acc_1_22,src_dof_22)),_mm256_mul_pd(q_acc_1_23,src_dof_23)),_mm256_mul_pd(q_acc_1_24,src_dof_24)),_mm256_mul_pd(q_acc_1_25,src_dof_25)),_mm256_mul_pd(q_acc_1_26,src_dof_26)),_mm256_mul_pd(q_acc_1_27,src_dof_27)),_mm256_mul_pd(q_acc_1_28,src_dof_28)),_mm256_mul_pd(q_acc_1_29,src_dof_29)),_mm256_mul_pd(q_acc_1_3,src_dof_3)),_mm256_mul_pd(q_acc_1_4,src_dof_4)),_mm256_mul_pd(q_acc_1_5,src_dof_5)),_mm256_mul_pd(q_acc_1_6,src_dof_6)),_mm256_mul_pd(q_acc_1_7,src_dof_7)),_mm256_mul_pd(q_acc_1_8,src_dof_8)),_mm256_mul_pd(q_acc_1_9,src_dof_9)); + const __m256d elMatVec_2 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_2_0,src_dof_0),_mm256_mul_pd(q_acc_2_1,src_dof_1)),_mm256_mul_pd(q_acc_2_10,src_dof_10)),_mm256_mul_pd(q_acc_2_11,src_dof_11)),_mm256_mul_pd(q_acc_2_12,src_dof_12)),_mm256_mul_pd(q_acc_2_13,src_dof_13)),_mm256_mul_pd(q_acc_2_14,src_dof_14)),_mm256_mul_pd(q_acc_2_15,src_dof_15)),_mm256_mul_pd(q_acc_2_16,src_dof_16)),_mm256_mul_pd(q_acc_2_17,src_dof_17)),_mm256_mul_pd(q_acc_2_18,src_dof_18)),_mm256_mul_pd(q_acc_2_19,src_dof_19)),_mm256_mul_pd(q_acc_2_2,src_dof_2)),_mm256_mul_pd(q_acc_2_20,src_dof_20)),_mm256_mul_pd(q_acc_2_21,src_dof_21)),_mm256_mul_pd(q_acc_2_22,src_dof_22)),_mm256_mul_pd(q_acc_2_23,src_dof_23)),_mm256_mul_pd(q_acc_2_24,src_dof_24)),_mm256_mul_pd(q_acc_2_25,src_dof_25)),_mm256_mul_pd(q_acc_2_26,src_dof_26)),_mm256_mul_pd(q_acc_2_27,src_dof_27)),_mm256_mul_pd(q_acc_2_28,src_dof_28)),_mm256_mul_pd(q_acc_2_29,src_dof_29)),_mm256_mul_pd(q_acc_2_3,src_dof_3)),_mm256_mul_pd(q_acc_2_4,src_dof_4)),_mm256_mul_pd(q_acc_2_5,src_dof_5)),_mm256_mul_pd(q_acc_2_6,src_dof_6)),_mm256_mul_pd(q_acc_2_7,src_dof_7)),_mm256_mul_pd(q_acc_2_8,src_dof_8)),_mm256_mul_pd(q_acc_2_9,src_dof_9)); + const __m256d elMatVec_3 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_3_0,src_dof_0),_mm256_mul_pd(q_acc_3_1,src_dof_1)),_mm256_mul_pd(q_acc_3_10,src_dof_10)),_mm256_mul_pd(q_acc_3_11,src_dof_11)),_mm256_mul_pd(q_acc_3_12,src_dof_12)),_mm256_mul_pd(q_acc_3_13,src_dof_13)),_mm256_mul_pd(q_acc_3_14,src_dof_14)),_mm256_mul_pd(q_acc_3_15,src_dof_15)),_mm256_mul_pd(q_acc_3_16,src_dof_16)),_mm256_mul_pd(q_acc_3_17,src_dof_17)),_mm256_mul_pd(q_acc_3_18,src_dof_18)),_mm256_mul_pd(q_acc_3_19,src_dof_19)),_mm256_mul_pd(q_acc_3_2,src_dof_2)),_mm256_mul_pd(q_acc_3_20,src_dof_20)),_mm256_mul_pd(q_acc_3_21,src_dof_21)),_mm256_mul_pd(q_acc_3_22,src_dof_22)),_mm256_mul_pd(q_acc_3_23,src_dof_23)),_mm256_mul_pd(q_acc_3_24,src_dof_24)),_mm256_mul_pd(q_acc_3_25,src_dof_25)),_mm256_mul_pd(q_acc_3_26,src_dof_26)),_mm256_mul_pd(q_acc_3_27,src_dof_27)),_mm256_mul_pd(q_acc_3_28,src_dof_28)),_mm256_mul_pd(q_acc_3_29,src_dof_29)),_mm256_mul_pd(q_acc_3_3,src_dof_3)),_mm256_mul_pd(q_acc_3_4,src_dof_4)),_mm256_mul_pd(q_acc_3_5,src_dof_5)),_mm256_mul_pd(q_acc_3_6,src_dof_6)),_mm256_mul_pd(q_acc_3_7,src_dof_7)),_mm256_mul_pd(q_acc_3_8,src_dof_8)),_mm256_mul_pd(q_acc_3_9,src_dof_9)); + _mm256_storeu_pd(&_data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))],_mm256_add_pd(elMatVec_0,_mm256_loadu_pd(& _data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]))); + _mm256_storeu_pd(&_data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))],_mm256_add_pd(elMatVec_1,_mm256_loadu_pd(& _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]))); + _mm256_storeu_pd(&_data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1],_mm256_add_pd(elMatVec_2,_mm256_loadu_pd(& _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]))); + _mm256_storeu_pd(&_data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))],_mm256_add_pd(elMatVec_3,_mm256_loadu_pd(& _data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]))); + } + for (int64_t ctr_0 = (int64_t)((-ctr_1 - ctr_2 + micro_edges_per_macro_edge - 1) / (4)) * (4); ctr_0 < -ctr_1 - ctr_2 + micro_edges_per_macro_edge - 1; ctr_0 += 1) + { + + const int64_t phantom_ctr_0 = ctr_0; + real_t _data_float_loop_ctr_array_dim_0[4]; + _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0; + _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1; + _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2; + _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3; + real_t _data_float_loop_ctr_array_dim_1[4]; + _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1; + real_t _data_float_loop_ctr_array_dim_2[4]; + _data_float_loop_ctr_array_dim_2[0] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[1] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[2] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[3] = (real_t) ctr_2; + + const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_0_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_1_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_2_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_3_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_3_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_3_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t src_dof_0 = _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]; + const real_t src_dof_1 = _data_src_vertex_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]; + const real_t src_dof_2 = _data_src_vertex_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]; + const real_t src_dof_3 = _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]; + const real_t src_dof_4 = _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]; + const real_t src_dof_5 = _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]; + const real_t src_dof_6 = _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]; + const real_t src_dof_7 = _data_src_edge_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t src_dof_8 = _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge - 1)*(micro_edges_per_macro_edge + 1)) / (6)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]; + const real_t src_dof_9 = _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t src_dof_10 = _data_src_vertex_1[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]; + const real_t src_dof_11 = _data_src_vertex_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]; + const real_t src_dof_12 = _data_src_vertex_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]; + const real_t src_dof_13 = _data_src_vertex_1[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]; + const real_t src_dof_14 = _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]; + const real_t src_dof_15 = _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]; + const real_t src_dof_16 = _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]; + const real_t src_dof_17 = _data_src_edge_1[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t src_dof_18 = _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge - 1)*(micro_edges_per_macro_edge + 1)) / (6)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]; + const real_t src_dof_19 = _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t src_dof_20 = _data_src_vertex_2[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]; + const real_t src_dof_21 = _data_src_vertex_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]; + const real_t src_dof_22 = _data_src_vertex_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]; + const real_t src_dof_23 = _data_src_vertex_2[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]; + const real_t src_dof_24 = _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]; + const real_t src_dof_25 = _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]; + const real_t src_dof_26 = _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]; + const real_t src_dof_27 = _data_src_edge_2[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t src_dof_28 = _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge - 1)*(micro_edges_per_macro_edge + 1)) / (6)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]; + const real_t src_dof_29 = _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t rho_dof_0 = _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]; + const real_t rho_dof_1 = _data_rhoVertex[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]; + const real_t rho_dof_2 = _data_rhoVertex[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]; + const real_t rho_dof_3 = _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]; + const real_t rho_dof_4 = _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]; + const real_t rho_dof_5 = _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]; + const real_t rho_dof_6 = _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]; + const real_t rho_dof_7 = _data_rhoEdge[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t rho_dof_8 = _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge - 1)*(micro_edges_per_macro_edge + 1)) / (6)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]; + const real_t rho_dof_9 = _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + real_t q_acc_0_0 = 0.0; + real_t q_acc_0_1 = 0.0; + real_t q_acc_0_2 = 0.0; + real_t q_acc_0_3 = 0.0; + real_t q_acc_0_4 = 0.0; + real_t q_acc_0_5 = 0.0; + real_t q_acc_0_6 = 0.0; + real_t q_acc_0_7 = 0.0; + real_t q_acc_0_8 = 0.0; + real_t q_acc_0_9 = 0.0; + real_t q_acc_0_10 = 0.0; + real_t q_acc_0_11 = 0.0; + real_t q_acc_0_12 = 0.0; + real_t q_acc_0_13 = 0.0; + real_t q_acc_0_14 = 0.0; + real_t q_acc_0_15 = 0.0; + real_t q_acc_0_16 = 0.0; + real_t q_acc_0_17 = 0.0; + real_t q_acc_0_18 = 0.0; + real_t q_acc_0_19 = 0.0; + real_t q_acc_0_20 = 0.0; + real_t q_acc_0_21 = 0.0; + real_t q_acc_0_22 = 0.0; + real_t q_acc_0_23 = 0.0; + real_t q_acc_0_24 = 0.0; + real_t q_acc_0_25 = 0.0; + real_t q_acc_0_26 = 0.0; + real_t q_acc_0_27 = 0.0; + real_t q_acc_0_28 = 0.0; + real_t q_acc_0_29 = 0.0; + real_t q_acc_1_0 = 0.0; + real_t q_acc_1_1 = 0.0; + real_t q_acc_1_2 = 0.0; + real_t q_acc_1_3 = 0.0; + real_t q_acc_1_4 = 0.0; + real_t q_acc_1_5 = 0.0; + real_t q_acc_1_6 = 0.0; + real_t q_acc_1_7 = 0.0; + real_t q_acc_1_8 = 0.0; + real_t q_acc_1_9 = 0.0; + real_t q_acc_1_10 = 0.0; + real_t q_acc_1_11 = 0.0; + real_t q_acc_1_12 = 0.0; + real_t q_acc_1_13 = 0.0; + real_t q_acc_1_14 = 0.0; + real_t q_acc_1_15 = 0.0; + real_t q_acc_1_16 = 0.0; + real_t q_acc_1_17 = 0.0; + real_t q_acc_1_18 = 0.0; + real_t q_acc_1_19 = 0.0; + real_t q_acc_1_20 = 0.0; + real_t q_acc_1_21 = 0.0; + real_t q_acc_1_22 = 0.0; + real_t q_acc_1_23 = 0.0; + real_t q_acc_1_24 = 0.0; + real_t q_acc_1_25 = 0.0; + real_t q_acc_1_26 = 0.0; + real_t q_acc_1_27 = 0.0; + real_t q_acc_1_28 = 0.0; + real_t q_acc_1_29 = 0.0; + real_t q_acc_2_0 = 0.0; + real_t q_acc_2_1 = 0.0; + real_t q_acc_2_2 = 0.0; + real_t q_acc_2_3 = 0.0; + real_t q_acc_2_4 = 0.0; + real_t q_acc_2_5 = 0.0; + real_t q_acc_2_6 = 0.0; + real_t q_acc_2_7 = 0.0; + real_t q_acc_2_8 = 0.0; + real_t q_acc_2_9 = 0.0; + real_t q_acc_2_10 = 0.0; + real_t q_acc_2_11 = 0.0; + real_t q_acc_2_12 = 0.0; + real_t q_acc_2_13 = 0.0; + real_t q_acc_2_14 = 0.0; + real_t q_acc_2_15 = 0.0; + real_t q_acc_2_16 = 0.0; + real_t q_acc_2_17 = 0.0; + real_t q_acc_2_18 = 0.0; + real_t q_acc_2_19 = 0.0; + real_t q_acc_2_20 = 0.0; + real_t q_acc_2_21 = 0.0; + real_t q_acc_2_22 = 0.0; + real_t q_acc_2_23 = 0.0; + real_t q_acc_2_24 = 0.0; + real_t q_acc_2_25 = 0.0; + real_t q_acc_2_26 = 0.0; + real_t q_acc_2_27 = 0.0; + real_t q_acc_2_28 = 0.0; + real_t q_acc_2_29 = 0.0; + real_t q_acc_3_0 = 0.0; + real_t q_acc_3_1 = 0.0; + real_t q_acc_3_2 = 0.0; + real_t q_acc_3_3 = 0.0; + real_t q_acc_3_4 = 0.0; + real_t q_acc_3_5 = 0.0; + real_t q_acc_3_6 = 0.0; + real_t q_acc_3_7 = 0.0; + real_t q_acc_3_8 = 0.0; + real_t q_acc_3_9 = 0.0; + real_t q_acc_3_10 = 0.0; + real_t q_acc_3_11 = 0.0; + real_t q_acc_3_12 = 0.0; + real_t q_acc_3_13 = 0.0; + real_t q_acc_3_14 = 0.0; + real_t q_acc_3_15 = 0.0; + real_t q_acc_3_16 = 0.0; + real_t q_acc_3_17 = 0.0; + real_t q_acc_3_18 = 0.0; + real_t q_acc_3_19 = 0.0; + real_t q_acc_3_20 = 0.0; + real_t q_acc_3_21 = 0.0; + real_t q_acc_3_22 = 0.0; + real_t q_acc_3_23 = 0.0; + real_t q_acc_3_24 = 0.0; + real_t q_acc_3_25 = 0.0; + real_t q_acc_3_26 = 0.0; + real_t q_acc_3_27 = 0.0; + real_t q_acc_3_28 = 0.0; + real_t q_acc_3_29 = 0.0; + for (int64_t q = 0; q < 5; q += 1) + { + const real_t tmp_qloop_18 = p_affine_0_0 + (-p_affine_0_0 + p_affine_1_0)*_data_q_p_0[q] + (-p_affine_0_0 + p_affine_2_0)*_data_q_p_1[q] + (-p_affine_0_0 + p_affine_3_0)*_data_q_p_2[q]; + const real_t tmp_qloop_19 = (tmp_qloop_18*tmp_qloop_18); + const real_t tmp_qloop_22 = p_affine_0_2 + (-p_affine_0_2 + p_affine_1_2)*_data_q_p_0[q] + (-p_affine_0_2 + p_affine_2_2)*_data_q_p_1[q] + (-p_affine_0_2 + p_affine_3_2)*_data_q_p_2[q]; + const real_t tmp_qloop_23 = -tmp_qloop_12 - tmp_qloop_22; + const real_t tmp_qloop_24 = -tmp_qloop_1 - tmp_qloop_18; + const real_t tmp_qloop_25 = p_affine_0_1 + (-p_affine_0_1 + p_affine_1_1)*_data_q_p_0[q] + (-p_affine_0_1 + p_affine_2_1)*_data_q_p_1[q] + (-p_affine_0_1 + p_affine_3_1)*_data_q_p_2[q]; + const real_t tmp_qloop_26 = -tmp_qloop_25 - tmp_qloop_8; + const real_t tmp_qloop_27 = tmp_qloop_21*(tmp_qloop_10*tmp_qloop_23*tmp_qloop_9 + tmp_qloop_13*tmp_qloop_24*tmp_qloop_6 - tmp_qloop_14*tmp_qloop_24 - tmp_qloop_15*tmp_qloop_26 + tmp_qloop_2*tmp_qloop_26*tmp_qloop_3 - tmp_qloop_23*tmp_qloop_7); + const real_t tmp_qloop_28 = tmp_qloop_20 - tmp_qloop_27; + const real_t tmp_qloop_29 = (tmp_qloop_25*tmp_qloop_25); + const real_t tmp_qloop_30 = (tmp_qloop_22*tmp_qloop_22); + const real_t tmp_qloop_31 = tmp_qloop_19 + tmp_qloop_29 + tmp_qloop_30; + const real_t tmp_qloop_34 = tmp_qloop_18*tmp_qloop_33; + const real_t tmp_qloop_35 = tmp_qloop_28 + tmp_qloop_34; + const real_t tmp_qloop_36 = tmp_qloop_17*(-tmp_qloop_19*tmp_qloop_28 + tmp_qloop_31*tmp_qloop_35); + const real_t tmp_qloop_37 = pow(tmp_qloop_31, -1.5000000000000000); + const real_t tmp_qloop_38 = tmp_qloop_37*1.0; + const real_t tmp_qloop_41 = tmp_qloop_31*tmp_qloop_40; + const real_t tmp_qloop_42 = -tmp_qloop_25*tmp_qloop_28 - tmp_qloop_41; + const real_t tmp_qloop_43 = tmp_qloop_17*tmp_qloop_38; + const real_t tmp_qloop_44 = tmp_qloop_42*tmp_qloop_43; + const real_t tmp_qloop_47 = -tmp_qloop_28; + const real_t tmp_qloop_48 = tmp_qloop_22*tmp_qloop_47; + const real_t tmp_qloop_49 = tmp_qloop_31*tmp_qloop_46 + tmp_qloop_48; + const real_t tmp_qloop_50 = tmp_qloop_43*tmp_qloop_49; + const real_t tmp_qloop_51 = tmp_qloop_18*tmp_qloop_47 + tmp_qloop_31*tmp_qloop_33; + const real_t tmp_qloop_52 = tmp_qloop_43*tmp_qloop_51; + const real_t tmp_qloop_54 = tmp_qloop_25*tmp_qloop_53 + tmp_qloop_28; + const real_t tmp_qloop_55 = tmp_qloop_29*tmp_qloop_47 + tmp_qloop_31*tmp_qloop_54; + const real_t tmp_qloop_56 = tmp_qloop_22*tmp_qloop_46; + const real_t tmp_qloop_57 = tmp_qloop_28 + tmp_qloop_56; + const real_t tmp_qloop_58 = tmp_qloop_30*tmp_qloop_47 + tmp_qloop_31*tmp_qloop_57; + const real_t tmp_qloop_66 = pow(tmp_qloop_31, -2.5000000000000000)*3.0; + const real_t tmp_qloop_67 = tmp_qloop_36*tmp_qloop_66; + const real_t tmp_qloop_69 = tmp_qloop_18*2.0; + const real_t tmp_qloop_70 = -tmp_qloop_41; + const real_t tmp_qloop_71 = tmp_qloop_35*2.0; + const real_t tmp_qloop_73 = -tmp_qloop_31*tmp_qloop_72; + const real_t tmp_qloop_74 = -tmp_qloop_20 + tmp_qloop_27; + const real_t tmp_qloop_75 = tmp_qloop_18*tmp_qloop_68 + tmp_qloop_34*2.0 + tmp_qloop_74; + const real_t tmp_qloop_76 = tmp_qloop_17*tmp_qloop_66; + const real_t tmp_qloop_77 = tmp_qloop_51*tmp_qloop_76; + const real_t tmp_qloop_78 = tmp_qloop_25*tmp_qloop_77; + const real_t tmp_qloop_79 = tmp_qloop_25*2.0; + const real_t tmp_qloop_80 = tmp_qloop_18*tmp_qloop_40 + tmp_qloop_33*tmp_qloop_79; + const real_t tmp_qloop_81 = tmp_qloop_25*tmp_qloop_43; + const real_t tmp_qloop_82 = tmp_qloop_22*2.0; + const real_t tmp_qloop_83 = tmp_qloop_18*tmp_qloop_72 + tmp_qloop_33*tmp_qloop_82; + const real_t tmp_qloop_84 = -tmp_qloop_22*tmp_qloop_78; + const real_t tmp_qloop_85 = tmp_qloop_18*tmp_qloop_22; + const real_t tmp_qloop_86 = tmp_qloop_22*tmp_qloop_43; + const real_t tmp_qloop_87 = tmp_qloop_21*tmp_qloop_25*tmp_qloop_32 - tmp_qloop_40*tmp_qloop_69; + const real_t tmp_qloop_88 = tmp_qloop_18*tmp_qloop_43; + const real_t tmp_qloop_89 = tmp_qloop_42*tmp_qloop_76; + const real_t tmp_qloop_90 = -tmp_qloop_25*tmp_qloop_40 - tmp_qloop_28; + const real_t tmp_qloop_91 = tmp_qloop_25*tmp_qloop_89; + const real_t tmp_qloop_92 = tmp_qloop_25*tmp_qloop_72 - tmp_qloop_40*tmp_qloop_82; + const real_t tmp_qloop_93 = tmp_qloop_85*tmp_qloop_89; + const real_t tmp_qloop_94 = -tmp_qloop_31*tmp_qloop_68; + const real_t tmp_qloop_95 = tmp_qloop_55*tmp_qloop_76; + const real_t tmp_qloop_96 = tmp_qloop_22*tmp_qloop_68 + tmp_qloop_46*tmp_qloop_69; + const real_t tmp_qloop_97 = tmp_qloop_49*tmp_qloop_76; + const real_t tmp_qloop_98 = tmp_qloop_22*tmp_qloop_40 + tmp_qloop_46*tmp_qloop_79; + const real_t tmp_qloop_99 = tmp_qloop_25*tmp_qloop_97; + const real_t tmp_qloop_100 = tmp_qloop_18*tmp_qloop_99; + const real_t tmp_qloop_101 = tmp_qloop_22*tmp_qloop_72 + tmp_qloop_56*2.0 + tmp_qloop_74; + const real_t tmp_qloop_102 = tmp_qloop_58*tmp_qloop_76; + const real_t tmp_qloop_103 = (_data_q_p_0[q]*_data_q_p_0[q]); + const real_t tmp_qloop_104 = tmp_qloop_103*2.0; + const real_t tmp_qloop_105 = (_data_q_p_1[q]*_data_q_p_1[q]); + const real_t tmp_qloop_106 = tmp_qloop_105*2.0; + const real_t tmp_qloop_107 = (_data_q_p_2[q]*_data_q_p_2[q]); + const real_t tmp_qloop_108 = tmp_qloop_107*2.0; + const real_t tmp_qloop_109 = 4.0*_data_q_p_1[q]; + const real_t tmp_qloop_110 = tmp_qloop_109*_data_q_p_2[q]; + const real_t tmp_qloop_111 = 4.0*_data_q_p_0[q]; + const real_t tmp_qloop_112 = tmp_qloop_111*_data_q_p_1[q]; + const real_t tmp_qloop_113 = tmp_qloop_111*_data_q_p_2[q]; + const real_t tmp_qloop_114 = tmp_qloop_112 + tmp_qloop_113; + const real_t tmp_qloop_115 = tmp_qloop_104 + tmp_qloop_106 + tmp_qloop_108 + tmp_qloop_110 + tmp_qloop_114 - 3.0*_data_q_p_0[q] - 3.0*_data_q_p_1[q] - 3.0*_data_q_p_2[q] + 1.0; + const real_t tmp_qloop_116 = 4.0*_data_q_p_2[q]; + const real_t tmp_qloop_117 = tmp_qloop_109 + tmp_qloop_116; + const real_t tmp_qloop_118 = -rho_dof_8*tmp_qloop_109; + const real_t tmp_qloop_119 = rho_dof_0*(tmp_qloop_111 + tmp_qloop_117 - 3.0); + const real_t tmp_qloop_120 = -rho_dof_7*tmp_qloop_116 + tmp_qloop_119; + const real_t tmp_qloop_121 = rho_dof_1*(tmp_qloop_111 - 1.0) + rho_dof_5*tmp_qloop_116 + rho_dof_6*tmp_qloop_109 + rho_dof_9*(-tmp_qloop_117 - 8.0*_data_q_p_0[q] + 4.0) + tmp_qloop_118 + tmp_qloop_120; + const real_t tmp_qloop_122 = tmp_qloop_111 - 4.0; + const real_t tmp_qloop_123 = -rho_dof_9*tmp_qloop_111; + const real_t tmp_qloop_124 = rho_dof_2*(tmp_qloop_109 - 1.0) + rho_dof_4*tmp_qloop_116 + rho_dof_6*tmp_qloop_111 + rho_dof_8*(-tmp_qloop_116 - tmp_qloop_122 - 8.0*_data_q_p_1[q]) + tmp_qloop_120 + tmp_qloop_123; + const real_t tmp_qloop_125 = rho_dof_3*(tmp_qloop_116 - 1.0) + rho_dof_4*tmp_qloop_109 + rho_dof_5*tmp_qloop_111 + rho_dof_7*(-tmp_qloop_109 - tmp_qloop_122 - 8.0*_data_q_p_2[q]) + tmp_qloop_118 + tmp_qloop_119 + tmp_qloop_123; + const real_t tmp_qloop_127 = tmp_qloop_104 - _data_q_p_0[q]; + const real_t tmp_qloop_128 = tmp_qloop_106 - _data_q_p_1[q]; + const real_t tmp_qloop_129 = tmp_qloop_108 - _data_q_p_2[q]; + const real_t tmp_qloop_130 = tmp_qloop_107*-4.0 - tmp_qloop_110 - tmp_qloop_113 + tmp_qloop_116; + const real_t tmp_qloop_131 = tmp_qloop_105*4.0; + const real_t tmp_qloop_132 = tmp_qloop_109 - tmp_qloop_110 - tmp_qloop_112 - tmp_qloop_131; + const real_t tmp_qloop_133 = tmp_qloop_103*4.0; + const real_t tmp_qloop_134 = tmp_qloop_111 - tmp_qloop_114 - tmp_qloop_133; + const real_t tmp_qloop_158 = tmp_qloop_105*tmp_qloop_111; + const real_t jac_blending_0_0 = tmp_qloop_36*tmp_qloop_38; + const real_t jac_blending_0_1 = tmp_qloop_18*tmp_qloop_44; + const real_t jac_blending_0_2 = tmp_qloop_18*tmp_qloop_50; + const real_t jac_blending_1_0 = tmp_qloop_25*tmp_qloop_52; + const real_t jac_blending_1_1 = tmp_qloop_43*tmp_qloop_55; + const real_t tmp_qloop_63 = jac_blending_0_2*jac_blending_1_1; + const real_t jac_blending_1_2 = tmp_qloop_25*tmp_qloop_50; + const real_t tmp_qloop_60 = jac_blending_0_1*jac_blending_1_2; + const real_t jac_blending_2_0 = tmp_qloop_22*tmp_qloop_52; + const real_t jac_blending_2_1 = tmp_qloop_22*tmp_qloop_44; + const real_t tmp_qloop_61 = jac_blending_1_2*jac_blending_2_1; + const real_t jac_blending_2_2 = tmp_qloop_43*tmp_qloop_58; + const real_t tmp_qloop_59 = jac_blending_1_1*jac_blending_2_2; + const real_t tmp_qloop_62 = jac_blending_0_1*jac_blending_2_2; + const real_t tmp_qloop_64 = jac_blending_0_0*tmp_qloop_59 - jac_blending_0_0*tmp_qloop_61 + jac_blending_0_2*jac_blending_1_0*jac_blending_2_1 - jac_blending_1_0*tmp_qloop_62 + jac_blending_2_0*tmp_qloop_60 - jac_blending_2_0*tmp_qloop_63; + const real_t tmp_qloop_65 = 1.0 / (tmp_qloop_64); + const real_t abs_det_jac_blending = tmp_qloop_64; + const real_t tmp_qloop_135 = abs_det_jac_affine_BLUE_DOWN*abs_det_jac_blending*1.0 / (rho_dof_0*tmp_qloop_115 + rho_dof_1*tmp_qloop_127 + rho_dof_2*tmp_qloop_128 + rho_dof_3*tmp_qloop_129 + rho_dof_4*tmp_qloop_110 + rho_dof_5*tmp_qloop_113 + rho_dof_6*tmp_qloop_112 + rho_dof_7*tmp_qloop_130 + rho_dof_8*tmp_qloop_132 + rho_dof_9*tmp_qloop_134)*_data_q_w[q]; + const real_t tmp_qloop_136 = tmp_qloop_135*(1.0 - _data_q_p_0[q] - _data_q_p_1[q] - _data_q_p_2[q]); + const real_t tmp_qloop_142 = tmp_qloop_135*_data_q_p_0[q]; + const real_t tmp_qloop_144 = tmp_qloop_135*_data_q_p_2[q]; + const real_t tmp_qloop_147 = tmp_qloop_135*_data_q_p_1[q]; + const real_t tmp_qloop_159 = tmp_qloop_135*tmp_qloop_158; + const real_t tmp_qloop_161 = tmp_qloop_107*tmp_qloop_135; + const real_t jac_blending_inv_0_0 = tmp_qloop_65*(tmp_qloop_59 - tmp_qloop_61); + const real_t jac_blending_inv_0_1 = tmp_qloop_65*(jac_blending_0_2*jac_blending_2_1 - tmp_qloop_62); + const real_t jac_blending_inv_0_2 = tmp_qloop_65*(tmp_qloop_60 - tmp_qloop_63); + const real_t jac_blending_inv_1_0 = tmp_qloop_65*(-jac_blending_1_0*jac_blending_2_2 + jac_blending_1_2*jac_blending_2_0); + const real_t jac_blending_inv_1_1 = tmp_qloop_65*(jac_blending_0_0*jac_blending_2_2 - jac_blending_0_2*jac_blending_2_0); + const real_t jac_blending_inv_1_2 = tmp_qloop_65*(-jac_blending_0_0*jac_blending_1_2 + jac_blending_0_2*jac_blending_1_0); + const real_t jac_blending_inv_2_0 = tmp_qloop_65*(jac_blending_1_0*jac_blending_2_1 - jac_blending_1_1*jac_blending_2_0); + const real_t tmp_qloop_126 = tmp_qloop_121*(jac_affine_inv_0_0_BLUE_DOWN*jac_blending_inv_0_0 + jac_affine_inv_0_1_BLUE_DOWN*jac_blending_inv_1_0 + jac_affine_inv_0_2_BLUE_DOWN*jac_blending_inv_2_0) + tmp_qloop_124*(jac_affine_inv_1_0_BLUE_DOWN*jac_blending_inv_0_0 + jac_affine_inv_1_1_BLUE_DOWN*jac_blending_inv_1_0 + jac_affine_inv_1_2_BLUE_DOWN*jac_blending_inv_2_0) + tmp_qloop_125*(jac_affine_inv_2_0_BLUE_DOWN*jac_blending_inv_0_0 + jac_affine_inv_2_1_BLUE_DOWN*jac_blending_inv_1_0 + jac_affine_inv_2_2_BLUE_DOWN*jac_blending_inv_2_0); + const real_t tmp_qloop_137 = tmp_qloop_126*tmp_qloop_136; + const real_t tmp_qloop_143 = tmp_qloop_126*tmp_qloop_142; + const real_t tmp_qloop_145 = tmp_qloop_126*tmp_qloop_144; + const real_t tmp_qloop_146 = tmp_qloop_112*tmp_qloop_145; + const real_t tmp_qloop_148 = tmp_qloop_126*tmp_qloop_147; + const real_t tmp_qloop_157 = tmp_qloop_126*tmp_qloop_135; + const real_t tmp_qloop_160 = tmp_qloop_107*tmp_qloop_157; + const real_t jac_blending_inv_2_1 = tmp_qloop_65*(-jac_blending_0_0*jac_blending_2_1 + jac_blending_0_1*jac_blending_2_0); + const real_t tmp_qloop_138 = tmp_qloop_121*(jac_affine_inv_0_0_BLUE_DOWN*jac_blending_inv_0_1 + jac_affine_inv_0_1_BLUE_DOWN*jac_blending_inv_1_1 + jac_affine_inv_0_2_BLUE_DOWN*jac_blending_inv_2_1) + tmp_qloop_124*(jac_affine_inv_1_0_BLUE_DOWN*jac_blending_inv_0_1 + jac_affine_inv_1_1_BLUE_DOWN*jac_blending_inv_1_1 + jac_affine_inv_1_2_BLUE_DOWN*jac_blending_inv_2_1) + tmp_qloop_125*(jac_affine_inv_2_0_BLUE_DOWN*jac_blending_inv_0_1 + jac_affine_inv_2_1_BLUE_DOWN*jac_blending_inv_1_1 + jac_affine_inv_2_2_BLUE_DOWN*jac_blending_inv_2_1); + const real_t tmp_qloop_139 = tmp_qloop_136*tmp_qloop_138; + const real_t tmp_qloop_149 = tmp_qloop_138*tmp_qloop_142; + const real_t tmp_qloop_150 = tmp_qloop_138*tmp_qloop_144; + const real_t tmp_qloop_151 = tmp_qloop_112*tmp_qloop_150; + const real_t tmp_qloop_152 = tmp_qloop_138*tmp_qloop_147; + const real_t tmp_qloop_162 = tmp_qloop_138*tmp_qloop_161; + const real_t jac_blending_inv_2_2 = tmp_qloop_65*(jac_blending_0_0*jac_blending_1_1 - jac_blending_0_1*jac_blending_1_0); + const real_t tmp_qloop_140 = tmp_qloop_121*(jac_affine_inv_0_0_BLUE_DOWN*jac_blending_inv_0_2 + jac_affine_inv_0_1_BLUE_DOWN*jac_blending_inv_1_2 + jac_affine_inv_0_2_BLUE_DOWN*jac_blending_inv_2_2) + tmp_qloop_124*(jac_affine_inv_1_0_BLUE_DOWN*jac_blending_inv_0_2 + jac_affine_inv_1_1_BLUE_DOWN*jac_blending_inv_1_2 + jac_affine_inv_1_2_BLUE_DOWN*jac_blending_inv_2_2) + tmp_qloop_125*(jac_affine_inv_2_0_BLUE_DOWN*jac_blending_inv_0_2 + jac_affine_inv_2_1_BLUE_DOWN*jac_blending_inv_1_2 + jac_affine_inv_2_2_BLUE_DOWN*jac_blending_inv_2_2); + const real_t tmp_qloop_141 = tmp_qloop_136*tmp_qloop_140; + const real_t tmp_qloop_153 = tmp_qloop_140*tmp_qloop_142; + const real_t tmp_qloop_154 = tmp_qloop_140*tmp_qloop_144; + const real_t tmp_qloop_155 = tmp_qloop_112*tmp_qloop_154; + const real_t tmp_qloop_156 = tmp_qloop_140*tmp_qloop_147; + const real_t tmp_qloop_163 = tmp_qloop_140*tmp_qloop_161; + const real_t hessian_blending_0_0_0 = tmp_qloop_17*tmp_qloop_37*(tmp_qloop_19*tmp_qloop_68 - tmp_qloop_28*tmp_qloop_69 + tmp_qloop_31*(tmp_qloop_33 - tmp_qloop_68) + tmp_qloop_35*tmp_qloop_69)*1.0 - tmp_qloop_18*tmp_qloop_67; + const real_t hessian_blending_1_0_0 = tmp_qloop_17*tmp_qloop_37*(tmp_qloop_19*tmp_qloop_40 + tmp_qloop_25*tmp_qloop_71 + tmp_qloop_70)*1.0 - tmp_qloop_25*tmp_qloop_67; + const real_t hessian_blending_2_0_0 = tmp_qloop_17*tmp_qloop_37*(tmp_qloop_19*tmp_qloop_72 + tmp_qloop_22*tmp_qloop_71 + tmp_qloop_73)*1.0 - tmp_qloop_22*tmp_qloop_67; + const real_t hessian_blending_0_0_1 = tmp_qloop_17*tmp_qloop_25*tmp_qloop_37*tmp_qloop_75*1.0 - tmp_qloop_18*tmp_qloop_78; + const real_t hessian_blending_1_0_1 = -tmp_qloop_29*tmp_qloop_77 + tmp_qloop_52 + tmp_qloop_80*tmp_qloop_81; + const real_t hessian_blending_2_0_1 = tmp_qloop_81*tmp_qloop_83 + tmp_qloop_84; + const real_t hessian_blending_0_0_2 = tmp_qloop_17*tmp_qloop_22*tmp_qloop_37*tmp_qloop_75*1.0 - tmp_qloop_77*tmp_qloop_85; + const real_t hessian_blending_1_0_2 = tmp_qloop_80*tmp_qloop_86 + tmp_qloop_84; + const real_t hessian_blending_2_0_2 = -tmp_qloop_30*tmp_qloop_77 + tmp_qloop_52 + tmp_qloop_83*tmp_qloop_86; + const real_t hessian_blending_0_1_0 = -tmp_qloop_19*tmp_qloop_89 + tmp_qloop_44 + tmp_qloop_87*tmp_qloop_88; + const real_t hessian_blending_1_1_0 = tmp_qloop_17*tmp_qloop_18*tmp_qloop_37*tmp_qloop_90*1.0 - tmp_qloop_18*tmp_qloop_91; + const real_t hessian_blending_2_1_0 = tmp_qloop_88*tmp_qloop_92 - tmp_qloop_93; + const real_t hessian_blending_0_1_1 = tmp_qloop_17*tmp_qloop_37*(tmp_qloop_29*tmp_qloop_68 + tmp_qloop_54*tmp_qloop_69 + tmp_qloop_94)*1.0 - tmp_qloop_18*tmp_qloop_95; + const real_t hessian_blending_1_1_1 = tmp_qloop_17*tmp_qloop_37*(tmp_qloop_29*tmp_qloop_40 + tmp_qloop_31*(-tmp_qloop_40 + tmp_qloop_53) + tmp_qloop_47*tmp_qloop_79 + tmp_qloop_54*tmp_qloop_79)*1.0 - tmp_qloop_25*tmp_qloop_95; + const real_t hessian_blending_2_1_1 = tmp_qloop_17*tmp_qloop_37*(tmp_qloop_29*tmp_qloop_72 + tmp_qloop_54*tmp_qloop_82 + tmp_qloop_73)*1.0 - tmp_qloop_22*tmp_qloop_95; + const real_t hessian_blending_0_1_2 = tmp_qloop_17*tmp_qloop_22*tmp_qloop_37*tmp_qloop_87*1.0 - tmp_qloop_93; + const real_t hessian_blending_1_1_2 = tmp_qloop_17*tmp_qloop_22*tmp_qloop_37*tmp_qloop_90*1.0 - tmp_qloop_22*tmp_qloop_91; + const real_t hessian_blending_2_1_2 = -tmp_qloop_30*tmp_qloop_89 + tmp_qloop_44 + tmp_qloop_86*tmp_qloop_92; + const real_t hessian_blending_0_2_0 = -tmp_qloop_19*tmp_qloop_97 + tmp_qloop_50 + tmp_qloop_88*tmp_qloop_96; + const real_t hessian_blending_1_2_0 = -tmp_qloop_100 + tmp_qloop_17*tmp_qloop_18*tmp_qloop_37*tmp_qloop_98*1.0; + const real_t hessian_blending_2_2_0 = tmp_qloop_101*tmp_qloop_17*tmp_qloop_18*tmp_qloop_37*1.0 - tmp_qloop_85*tmp_qloop_97; + const real_t hessian_blending_0_2_1 = -tmp_qloop_100 + tmp_qloop_17*tmp_qloop_25*tmp_qloop_37*tmp_qloop_96*1.0; + const real_t hessian_blending_1_2_1 = -tmp_qloop_29*tmp_qloop_97 + tmp_qloop_50 + tmp_qloop_81*tmp_qloop_98; + const real_t hessian_blending_2_2_1 = tmp_qloop_101*tmp_qloop_17*tmp_qloop_25*tmp_qloop_37*1.0 - tmp_qloop_22*tmp_qloop_99; + const real_t hessian_blending_0_2_2 = -tmp_qloop_102*tmp_qloop_18 + tmp_qloop_17*tmp_qloop_37*(tmp_qloop_30*tmp_qloop_68 + tmp_qloop_57*tmp_qloop_69 + tmp_qloop_94)*1.0; + const real_t hessian_blending_1_2_2 = -tmp_qloop_102*tmp_qloop_25 + tmp_qloop_17*tmp_qloop_37*(tmp_qloop_30*tmp_qloop_40 + tmp_qloop_57*tmp_qloop_79 + tmp_qloop_70)*1.0; + const real_t hessian_blending_2_2_2 = -tmp_qloop_102*tmp_qloop_22 + tmp_qloop_17*tmp_qloop_37*(tmp_qloop_30*tmp_qloop_72 + tmp_qloop_31*(tmp_qloop_46 - tmp_qloop_72) + tmp_qloop_48*2.0 + tmp_qloop_57*tmp_qloop_82)*1.0; + const real_t q_tmp_0_0 = tmp_qloop_115*tmp_qloop_137; + const real_t q_tmp_0_1 = tmp_qloop_127*tmp_qloop_137; + const real_t q_tmp_0_2 = tmp_qloop_128*tmp_qloop_137; + const real_t q_tmp_0_3 = tmp_qloop_129*tmp_qloop_137; + const real_t q_tmp_0_4 = tmp_qloop_110*tmp_qloop_137; + const real_t q_tmp_0_5 = tmp_qloop_113*tmp_qloop_137; + const real_t q_tmp_0_6 = tmp_qloop_112*tmp_qloop_137; + const real_t q_tmp_0_7 = tmp_qloop_130*tmp_qloop_137; + const real_t q_tmp_0_8 = tmp_qloop_132*tmp_qloop_137; + const real_t q_tmp_0_9 = tmp_qloop_134*tmp_qloop_137; + const real_t q_tmp_0_10 = tmp_qloop_115*tmp_qloop_139; + const real_t q_tmp_0_11 = tmp_qloop_127*tmp_qloop_139; + const real_t q_tmp_0_12 = tmp_qloop_128*tmp_qloop_139; + const real_t q_tmp_0_13 = tmp_qloop_129*tmp_qloop_139; + const real_t q_tmp_0_14 = tmp_qloop_110*tmp_qloop_139; + const real_t q_tmp_0_15 = tmp_qloop_113*tmp_qloop_139; + const real_t q_tmp_0_16 = tmp_qloop_112*tmp_qloop_139; + const real_t q_tmp_0_17 = tmp_qloop_130*tmp_qloop_139; + const real_t q_tmp_0_18 = tmp_qloop_132*tmp_qloop_139; + const real_t q_tmp_0_19 = tmp_qloop_134*tmp_qloop_139; + const real_t q_tmp_0_20 = tmp_qloop_115*tmp_qloop_141; + const real_t q_tmp_0_21 = tmp_qloop_127*tmp_qloop_141; + const real_t q_tmp_0_22 = tmp_qloop_128*tmp_qloop_141; + const real_t q_tmp_0_23 = tmp_qloop_129*tmp_qloop_141; + const real_t q_tmp_0_24 = tmp_qloop_110*tmp_qloop_141; + const real_t q_tmp_0_25 = tmp_qloop_113*tmp_qloop_141; + const real_t q_tmp_0_26 = tmp_qloop_112*tmp_qloop_141; + const real_t q_tmp_0_27 = tmp_qloop_130*tmp_qloop_141; + const real_t q_tmp_0_28 = tmp_qloop_132*tmp_qloop_141; + const real_t q_tmp_0_29 = tmp_qloop_134*tmp_qloop_141; + const real_t q_tmp_1_0 = tmp_qloop_115*tmp_qloop_143; + const real_t q_tmp_1_1 = tmp_qloop_127*tmp_qloop_143; + const real_t q_tmp_1_2 = tmp_qloop_128*tmp_qloop_143; + const real_t q_tmp_1_3 = tmp_qloop_129*tmp_qloop_143; + const real_t q_tmp_1_4 = tmp_qloop_146; + const real_t q_tmp_1_5 = tmp_qloop_133*tmp_qloop_145; + const real_t q_tmp_1_6 = tmp_qloop_133*tmp_qloop_148; + const real_t q_tmp_1_7 = tmp_qloop_130*tmp_qloop_143; + const real_t q_tmp_1_8 = tmp_qloop_132*tmp_qloop_143; + const real_t q_tmp_1_9 = tmp_qloop_134*tmp_qloop_143; + const real_t q_tmp_1_10 = tmp_qloop_115*tmp_qloop_149; + const real_t q_tmp_1_11 = tmp_qloop_127*tmp_qloop_149; + const real_t q_tmp_1_12 = tmp_qloop_128*tmp_qloop_149; + const real_t q_tmp_1_13 = tmp_qloop_129*tmp_qloop_149; + const real_t q_tmp_1_14 = tmp_qloop_151; + const real_t q_tmp_1_15 = tmp_qloop_133*tmp_qloop_150; + const real_t q_tmp_1_16 = tmp_qloop_133*tmp_qloop_152; + const real_t q_tmp_1_17 = tmp_qloop_130*tmp_qloop_149; + const real_t q_tmp_1_18 = tmp_qloop_132*tmp_qloop_149; + const real_t q_tmp_1_19 = tmp_qloop_134*tmp_qloop_149; + const real_t q_tmp_1_20 = tmp_qloop_115*tmp_qloop_153; + const real_t q_tmp_1_21 = tmp_qloop_127*tmp_qloop_153; + const real_t q_tmp_1_22 = tmp_qloop_128*tmp_qloop_153; + const real_t q_tmp_1_23 = tmp_qloop_129*tmp_qloop_153; + const real_t q_tmp_1_24 = tmp_qloop_155; + const real_t q_tmp_1_25 = tmp_qloop_133*tmp_qloop_154; + const real_t q_tmp_1_26 = tmp_qloop_133*tmp_qloop_156; + const real_t q_tmp_1_27 = tmp_qloop_130*tmp_qloop_153; + const real_t q_tmp_1_28 = tmp_qloop_132*tmp_qloop_153; + const real_t q_tmp_1_29 = tmp_qloop_134*tmp_qloop_153; + const real_t q_tmp_2_0 = tmp_qloop_115*tmp_qloop_148; + const real_t q_tmp_2_1 = tmp_qloop_127*tmp_qloop_148; + const real_t q_tmp_2_2 = tmp_qloop_128*tmp_qloop_148; + const real_t q_tmp_2_3 = tmp_qloop_129*tmp_qloop_148; + const real_t q_tmp_2_4 = tmp_qloop_131*tmp_qloop_145; + const real_t q_tmp_2_5 = tmp_qloop_146; + const real_t q_tmp_2_6 = tmp_qloop_157*tmp_qloop_158; + const real_t q_tmp_2_7 = tmp_qloop_130*tmp_qloop_148; + const real_t q_tmp_2_8 = tmp_qloop_132*tmp_qloop_148; + const real_t q_tmp_2_9 = tmp_qloop_134*tmp_qloop_148; + const real_t q_tmp_2_10 = tmp_qloop_115*tmp_qloop_152; + const real_t q_tmp_2_11 = tmp_qloop_127*tmp_qloop_152; + const real_t q_tmp_2_12 = tmp_qloop_128*tmp_qloop_152; + const real_t q_tmp_2_13 = tmp_qloop_129*tmp_qloop_152; + const real_t q_tmp_2_14 = tmp_qloop_131*tmp_qloop_150; + const real_t q_tmp_2_15 = tmp_qloop_151; + const real_t q_tmp_2_16 = tmp_qloop_138*tmp_qloop_159; + const real_t q_tmp_2_17 = tmp_qloop_130*tmp_qloop_152; + const real_t q_tmp_2_18 = tmp_qloop_132*tmp_qloop_152; + const real_t q_tmp_2_19 = tmp_qloop_134*tmp_qloop_152; + const real_t q_tmp_2_20 = tmp_qloop_115*tmp_qloop_156; + const real_t q_tmp_2_21 = tmp_qloop_127*tmp_qloop_156; + const real_t q_tmp_2_22 = tmp_qloop_128*tmp_qloop_156; + const real_t q_tmp_2_23 = tmp_qloop_129*tmp_qloop_156; + const real_t q_tmp_2_24 = tmp_qloop_131*tmp_qloop_154; + const real_t q_tmp_2_25 = tmp_qloop_155; + const real_t q_tmp_2_26 = tmp_qloop_140*tmp_qloop_159; + const real_t q_tmp_2_27 = tmp_qloop_130*tmp_qloop_156; + const real_t q_tmp_2_28 = tmp_qloop_132*tmp_qloop_156; + const real_t q_tmp_2_29 = tmp_qloop_134*tmp_qloop_156; + const real_t q_tmp_3_0 = tmp_qloop_115*tmp_qloop_145; + const real_t q_tmp_3_1 = tmp_qloop_127*tmp_qloop_145; + const real_t q_tmp_3_2 = tmp_qloop_128*tmp_qloop_145; + const real_t q_tmp_3_3 = tmp_qloop_129*tmp_qloop_145; + const real_t q_tmp_3_4 = tmp_qloop_109*tmp_qloop_160; + const real_t q_tmp_3_5 = tmp_qloop_111*tmp_qloop_160; + const real_t q_tmp_3_6 = tmp_qloop_146; + const real_t q_tmp_3_7 = tmp_qloop_130*tmp_qloop_145; + const real_t q_tmp_3_8 = tmp_qloop_132*tmp_qloop_145; + const real_t q_tmp_3_9 = tmp_qloop_134*tmp_qloop_145; + const real_t q_tmp_3_10 = tmp_qloop_115*tmp_qloop_150; + const real_t q_tmp_3_11 = tmp_qloop_127*tmp_qloop_150; + const real_t q_tmp_3_12 = tmp_qloop_128*tmp_qloop_150; + const real_t q_tmp_3_13 = tmp_qloop_129*tmp_qloop_150; + const real_t q_tmp_3_14 = tmp_qloop_109*tmp_qloop_162; + const real_t q_tmp_3_15 = tmp_qloop_111*tmp_qloop_162; + const real_t q_tmp_3_16 = tmp_qloop_151; + const real_t q_tmp_3_17 = tmp_qloop_130*tmp_qloop_150; + const real_t q_tmp_3_18 = tmp_qloop_132*tmp_qloop_150; + const real_t q_tmp_3_19 = tmp_qloop_134*tmp_qloop_150; + const real_t q_tmp_3_20 = tmp_qloop_115*tmp_qloop_154; + const real_t q_tmp_3_21 = tmp_qloop_127*tmp_qloop_154; + const real_t q_tmp_3_22 = tmp_qloop_128*tmp_qloop_154; + const real_t q_tmp_3_23 = tmp_qloop_129*tmp_qloop_154; + const real_t q_tmp_3_24 = tmp_qloop_109*tmp_qloop_163; + const real_t q_tmp_3_25 = tmp_qloop_111*tmp_qloop_163; + const real_t q_tmp_3_26 = tmp_qloop_155; + const real_t q_tmp_3_27 = tmp_qloop_130*tmp_qloop_154; + const real_t q_tmp_3_28 = tmp_qloop_132*tmp_qloop_154; + const real_t q_tmp_3_29 = tmp_qloop_134*tmp_qloop_154; + q_acc_0_0 = q_acc_0_0 + q_tmp_0_0; + q_acc_0_1 = q_acc_0_1 + q_tmp_0_1; + q_acc_0_2 = q_acc_0_2 + q_tmp_0_2; + q_acc_0_3 = q_acc_0_3 + q_tmp_0_3; + q_acc_0_4 = q_acc_0_4 + q_tmp_0_4; + q_acc_0_5 = q_acc_0_5 + q_tmp_0_5; + q_acc_0_6 = q_acc_0_6 + q_tmp_0_6; + q_acc_0_7 = q_acc_0_7 + q_tmp_0_7; + q_acc_0_8 = q_acc_0_8 + q_tmp_0_8; + q_acc_0_9 = q_acc_0_9 + q_tmp_0_9; + q_acc_0_10 = q_acc_0_10 + q_tmp_0_10; + q_acc_0_11 = q_acc_0_11 + q_tmp_0_11; + q_acc_0_12 = q_acc_0_12 + q_tmp_0_12; + q_acc_0_13 = q_acc_0_13 + q_tmp_0_13; + q_acc_0_14 = q_acc_0_14 + q_tmp_0_14; + q_acc_0_15 = q_acc_0_15 + q_tmp_0_15; + q_acc_0_16 = q_acc_0_16 + q_tmp_0_16; + q_acc_0_17 = q_acc_0_17 + q_tmp_0_17; + q_acc_0_18 = q_acc_0_18 + q_tmp_0_18; + q_acc_0_19 = q_acc_0_19 + q_tmp_0_19; + q_acc_0_20 = q_acc_0_20 + q_tmp_0_20; + q_acc_0_21 = q_acc_0_21 + q_tmp_0_21; + q_acc_0_22 = q_acc_0_22 + q_tmp_0_22; + q_acc_0_23 = q_acc_0_23 + q_tmp_0_23; + q_acc_0_24 = q_acc_0_24 + q_tmp_0_24; + q_acc_0_25 = q_acc_0_25 + q_tmp_0_25; + q_acc_0_26 = q_acc_0_26 + q_tmp_0_26; + q_acc_0_27 = q_acc_0_27 + q_tmp_0_27; + q_acc_0_28 = q_acc_0_28 + q_tmp_0_28; + q_acc_0_29 = q_acc_0_29 + q_tmp_0_29; + q_acc_1_0 = q_acc_1_0 + q_tmp_1_0; + q_acc_1_1 = q_acc_1_1 + q_tmp_1_1; + q_acc_1_2 = q_acc_1_2 + q_tmp_1_2; + q_acc_1_3 = q_acc_1_3 + q_tmp_1_3; + q_acc_1_4 = q_acc_1_4 + q_tmp_1_4; + q_acc_1_5 = q_acc_1_5 + q_tmp_1_5; + q_acc_1_6 = q_acc_1_6 + q_tmp_1_6; + q_acc_1_7 = q_acc_1_7 + q_tmp_1_7; + q_acc_1_8 = q_acc_1_8 + q_tmp_1_8; + q_acc_1_9 = q_acc_1_9 + q_tmp_1_9; + q_acc_1_10 = q_acc_1_10 + q_tmp_1_10; + q_acc_1_11 = q_acc_1_11 + q_tmp_1_11; + q_acc_1_12 = q_acc_1_12 + q_tmp_1_12; + q_acc_1_13 = q_acc_1_13 + q_tmp_1_13; + q_acc_1_14 = q_acc_1_14 + q_tmp_1_14; + q_acc_1_15 = q_acc_1_15 + q_tmp_1_15; + q_acc_1_16 = q_acc_1_16 + q_tmp_1_16; + q_acc_1_17 = q_acc_1_17 + q_tmp_1_17; + q_acc_1_18 = q_acc_1_18 + q_tmp_1_18; + q_acc_1_19 = q_acc_1_19 + q_tmp_1_19; + q_acc_1_20 = q_acc_1_20 + q_tmp_1_20; + q_acc_1_21 = q_acc_1_21 + q_tmp_1_21; + q_acc_1_22 = q_acc_1_22 + q_tmp_1_22; + q_acc_1_23 = q_acc_1_23 + q_tmp_1_23; + q_acc_1_24 = q_acc_1_24 + q_tmp_1_24; + q_acc_1_25 = q_acc_1_25 + q_tmp_1_25; + q_acc_1_26 = q_acc_1_26 + q_tmp_1_26; + q_acc_1_27 = q_acc_1_27 + q_tmp_1_27; + q_acc_1_28 = q_acc_1_28 + q_tmp_1_28; + q_acc_1_29 = q_acc_1_29 + q_tmp_1_29; + q_acc_2_0 = q_acc_2_0 + q_tmp_2_0; + q_acc_2_1 = q_acc_2_1 + q_tmp_2_1; + q_acc_2_2 = q_acc_2_2 + q_tmp_2_2; + q_acc_2_3 = q_acc_2_3 + q_tmp_2_3; + q_acc_2_4 = q_acc_2_4 + q_tmp_2_4; + q_acc_2_5 = q_acc_2_5 + q_tmp_2_5; + q_acc_2_6 = q_acc_2_6 + q_tmp_2_6; + q_acc_2_7 = q_acc_2_7 + q_tmp_2_7; + q_acc_2_8 = q_acc_2_8 + q_tmp_2_8; + q_acc_2_9 = q_acc_2_9 + q_tmp_2_9; + q_acc_2_10 = q_acc_2_10 + q_tmp_2_10; + q_acc_2_11 = q_acc_2_11 + q_tmp_2_11; + q_acc_2_12 = q_acc_2_12 + q_tmp_2_12; + q_acc_2_13 = q_acc_2_13 + q_tmp_2_13; + q_acc_2_14 = q_acc_2_14 + q_tmp_2_14; + q_acc_2_15 = q_acc_2_15 + q_tmp_2_15; + q_acc_2_16 = q_acc_2_16 + q_tmp_2_16; + q_acc_2_17 = q_acc_2_17 + q_tmp_2_17; + q_acc_2_18 = q_acc_2_18 + q_tmp_2_18; + q_acc_2_19 = q_acc_2_19 + q_tmp_2_19; + q_acc_2_20 = q_acc_2_20 + q_tmp_2_20; + q_acc_2_21 = q_acc_2_21 + q_tmp_2_21; + q_acc_2_22 = q_acc_2_22 + q_tmp_2_22; + q_acc_2_23 = q_acc_2_23 + q_tmp_2_23; + q_acc_2_24 = q_acc_2_24 + q_tmp_2_24; + q_acc_2_25 = q_acc_2_25 + q_tmp_2_25; + q_acc_2_26 = q_acc_2_26 + q_tmp_2_26; + q_acc_2_27 = q_acc_2_27 + q_tmp_2_27; + q_acc_2_28 = q_acc_2_28 + q_tmp_2_28; + q_acc_2_29 = q_acc_2_29 + q_tmp_2_29; + q_acc_3_0 = q_acc_3_0 + q_tmp_3_0; + q_acc_3_1 = q_acc_3_1 + q_tmp_3_1; + q_acc_3_2 = q_acc_3_2 + q_tmp_3_2; + q_acc_3_3 = q_acc_3_3 + q_tmp_3_3; + q_acc_3_4 = q_acc_3_4 + q_tmp_3_4; + q_acc_3_5 = q_acc_3_5 + q_tmp_3_5; + q_acc_3_6 = q_acc_3_6 + q_tmp_3_6; + q_acc_3_7 = q_acc_3_7 + q_tmp_3_7; + q_acc_3_8 = q_acc_3_8 + q_tmp_3_8; + q_acc_3_9 = q_acc_3_9 + q_tmp_3_9; + q_acc_3_10 = q_acc_3_10 + q_tmp_3_10; + q_acc_3_11 = q_acc_3_11 + q_tmp_3_11; + q_acc_3_12 = q_acc_3_12 + q_tmp_3_12; + q_acc_3_13 = q_acc_3_13 + q_tmp_3_13; + q_acc_3_14 = q_acc_3_14 + q_tmp_3_14; + q_acc_3_15 = q_acc_3_15 + q_tmp_3_15; + q_acc_3_16 = q_acc_3_16 + q_tmp_3_16; + q_acc_3_17 = q_acc_3_17 + q_tmp_3_17; + q_acc_3_18 = q_acc_3_18 + q_tmp_3_18; + q_acc_3_19 = q_acc_3_19 + q_tmp_3_19; + q_acc_3_20 = q_acc_3_20 + q_tmp_3_20; + q_acc_3_21 = q_acc_3_21 + q_tmp_3_21; + q_acc_3_22 = q_acc_3_22 + q_tmp_3_22; + q_acc_3_23 = q_acc_3_23 + q_tmp_3_23; + q_acc_3_24 = q_acc_3_24 + q_tmp_3_24; + q_acc_3_25 = q_acc_3_25 + q_tmp_3_25; + q_acc_3_26 = q_acc_3_26 + q_tmp_3_26; + q_acc_3_27 = q_acc_3_27 + q_tmp_3_27; + q_acc_3_28 = q_acc_3_28 + q_tmp_3_28; + q_acc_3_29 = q_acc_3_29 + q_tmp_3_29; + } + const real_t elMatVec_0 = q_acc_0_0*src_dof_0 + q_acc_0_1*src_dof_1 + q_acc_0_10*src_dof_10 + q_acc_0_11*src_dof_11 + q_acc_0_12*src_dof_12 + q_acc_0_13*src_dof_13 + q_acc_0_14*src_dof_14 + q_acc_0_15*src_dof_15 + q_acc_0_16*src_dof_16 + q_acc_0_17*src_dof_17 + q_acc_0_18*src_dof_18 + q_acc_0_19*src_dof_19 + q_acc_0_2*src_dof_2 + q_acc_0_20*src_dof_20 + q_acc_0_21*src_dof_21 + q_acc_0_22*src_dof_22 + q_acc_0_23*src_dof_23 + q_acc_0_24*src_dof_24 + q_acc_0_25*src_dof_25 + q_acc_0_26*src_dof_26 + q_acc_0_27*src_dof_27 + q_acc_0_28*src_dof_28 + q_acc_0_29*src_dof_29 + q_acc_0_3*src_dof_3 + q_acc_0_4*src_dof_4 + q_acc_0_5*src_dof_5 + q_acc_0_6*src_dof_6 + q_acc_0_7*src_dof_7 + q_acc_0_8*src_dof_8 + q_acc_0_9*src_dof_9; + const real_t elMatVec_1 = q_acc_1_0*src_dof_0 + q_acc_1_1*src_dof_1 + q_acc_1_10*src_dof_10 + q_acc_1_11*src_dof_11 + q_acc_1_12*src_dof_12 + q_acc_1_13*src_dof_13 + q_acc_1_14*src_dof_14 + q_acc_1_15*src_dof_15 + q_acc_1_16*src_dof_16 + q_acc_1_17*src_dof_17 + q_acc_1_18*src_dof_18 + q_acc_1_19*src_dof_19 + q_acc_1_2*src_dof_2 + q_acc_1_20*src_dof_20 + q_acc_1_21*src_dof_21 + q_acc_1_22*src_dof_22 + q_acc_1_23*src_dof_23 + q_acc_1_24*src_dof_24 + q_acc_1_25*src_dof_25 + q_acc_1_26*src_dof_26 + q_acc_1_27*src_dof_27 + q_acc_1_28*src_dof_28 + q_acc_1_29*src_dof_29 + q_acc_1_3*src_dof_3 + q_acc_1_4*src_dof_4 + q_acc_1_5*src_dof_5 + q_acc_1_6*src_dof_6 + q_acc_1_7*src_dof_7 + q_acc_1_8*src_dof_8 + q_acc_1_9*src_dof_9; + const real_t elMatVec_2 = q_acc_2_0*src_dof_0 + q_acc_2_1*src_dof_1 + q_acc_2_10*src_dof_10 + q_acc_2_11*src_dof_11 + q_acc_2_12*src_dof_12 + q_acc_2_13*src_dof_13 + q_acc_2_14*src_dof_14 + q_acc_2_15*src_dof_15 + q_acc_2_16*src_dof_16 + q_acc_2_17*src_dof_17 + q_acc_2_18*src_dof_18 + q_acc_2_19*src_dof_19 + q_acc_2_2*src_dof_2 + q_acc_2_20*src_dof_20 + q_acc_2_21*src_dof_21 + q_acc_2_22*src_dof_22 + q_acc_2_23*src_dof_23 + q_acc_2_24*src_dof_24 + q_acc_2_25*src_dof_25 + q_acc_2_26*src_dof_26 + q_acc_2_27*src_dof_27 + q_acc_2_28*src_dof_28 + q_acc_2_29*src_dof_29 + q_acc_2_3*src_dof_3 + q_acc_2_4*src_dof_4 + q_acc_2_5*src_dof_5 + q_acc_2_6*src_dof_6 + q_acc_2_7*src_dof_7 + q_acc_2_8*src_dof_8 + q_acc_2_9*src_dof_9; + const real_t elMatVec_3 = q_acc_3_0*src_dof_0 + q_acc_3_1*src_dof_1 + q_acc_3_10*src_dof_10 + q_acc_3_11*src_dof_11 + q_acc_3_12*src_dof_12 + q_acc_3_13*src_dof_13 + q_acc_3_14*src_dof_14 + q_acc_3_15*src_dof_15 + q_acc_3_16*src_dof_16 + q_acc_3_17*src_dof_17 + q_acc_3_18*src_dof_18 + q_acc_3_19*src_dof_19 + q_acc_3_2*src_dof_2 + q_acc_3_20*src_dof_20 + q_acc_3_21*src_dof_21 + q_acc_3_22*src_dof_22 + q_acc_3_23*src_dof_23 + q_acc_3_24*src_dof_24 + q_acc_3_25*src_dof_25 + q_acc_3_26*src_dof_26 + q_acc_3_27*src_dof_27 + q_acc_3_28*src_dof_28 + q_acc_3_29*src_dof_29 + q_acc_3_3*src_dof_3 + q_acc_3_4*src_dof_4 + q_acc_3_5*src_dof_5 + q_acc_3_6*src_dof_6 + q_acc_3_7*src_dof_7 + q_acc_3_8*src_dof_8 + q_acc_3_9*src_dof_9; + _data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))] = elMatVec_0 + _data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]; + _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))] = elMatVec_1 + _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]; + _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1] = elMatVec_2 + _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]; + _data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))] = elMatVec_3 + _data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]; + } + } + } + const real_t tmp_coords_jac_0_GREEN_UP = 1.0 / (micro_edges_per_macro_edge_float)*1.0; + const real_t tmp_coords_jac_1_GREEN_UP = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0); + const real_t tmp_coords_jac_2_GREEN_UP = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1); + const real_t tmp_coords_jac_3_GREEN_UP = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2); + const real_t tmp_coords_jac_4_GREEN_UP = tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0); + const real_t tmp_coords_jac_5_GREEN_UP = tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1); + const real_t tmp_coords_jac_6_GREEN_UP = tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2); + const real_t p_affine_const_0_0_GREEN_UP = tmp_coords_jac_1_GREEN_UP; + const real_t p_affine_const_0_1_GREEN_UP = tmp_coords_jac_2_GREEN_UP; + const real_t p_affine_const_0_2_GREEN_UP = tmp_coords_jac_3_GREEN_UP; + const real_t p_affine_const_1_0_GREEN_UP = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0); + const real_t p_affine_const_1_1_GREEN_UP = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1); + const real_t p_affine_const_1_2_GREEN_UP = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2); + const real_t p_affine_const_2_0_GREEN_UP = macro_vertex_coord_id_0comp0 + tmp_coords_jac_4_GREEN_UP; + const real_t p_affine_const_2_1_GREEN_UP = macro_vertex_coord_id_0comp1 + tmp_coords_jac_5_GREEN_UP; + const real_t p_affine_const_2_2_GREEN_UP = macro_vertex_coord_id_0comp2 + tmp_coords_jac_6_GREEN_UP; + const real_t p_affine_const_3_0_GREEN_UP = tmp_coords_jac_1_GREEN_UP + tmp_coords_jac_4_GREEN_UP; + const real_t p_affine_const_3_1_GREEN_UP = tmp_coords_jac_2_GREEN_UP + tmp_coords_jac_5_GREEN_UP; + const real_t p_affine_const_3_2_GREEN_UP = tmp_coords_jac_3_GREEN_UP + tmp_coords_jac_6_GREEN_UP; + const real_t jac_affine_0_0_GREEN_UP = -p_affine_const_0_0_GREEN_UP + p_affine_const_1_0_GREEN_UP; + const real_t jac_affine_0_1_GREEN_UP = -p_affine_const_0_0_GREEN_UP + p_affine_const_2_0_GREEN_UP; + const real_t jac_affine_0_2_GREEN_UP = -p_affine_const_0_0_GREEN_UP + p_affine_const_3_0_GREEN_UP; + const real_t jac_affine_1_0_GREEN_UP = -p_affine_const_0_1_GREEN_UP + p_affine_const_1_1_GREEN_UP; + const real_t jac_affine_1_1_GREEN_UP = -p_affine_const_0_1_GREEN_UP + p_affine_const_2_1_GREEN_UP; + const real_t tmp_coords_jac_11_GREEN_UP = jac_affine_0_2_GREEN_UP*jac_affine_1_1_GREEN_UP; + const real_t jac_affine_1_2_GREEN_UP = -p_affine_const_0_1_GREEN_UP + p_affine_const_3_1_GREEN_UP; + const real_t tmp_coords_jac_9_GREEN_UP = jac_affine_0_1_GREEN_UP*jac_affine_1_2_GREEN_UP; + const real_t jac_affine_2_0_GREEN_UP = -p_affine_const_0_2_GREEN_UP + p_affine_const_1_2_GREEN_UP; + const real_t jac_affine_2_1_GREEN_UP = -p_affine_const_0_2_GREEN_UP + p_affine_const_2_2_GREEN_UP; + const real_t tmp_coords_jac_8_GREEN_UP = jac_affine_1_2_GREEN_UP*jac_affine_2_1_GREEN_UP; + const real_t jac_affine_2_2_GREEN_UP = -p_affine_const_0_2_GREEN_UP + p_affine_const_3_2_GREEN_UP; + const real_t tmp_coords_jac_7_GREEN_UP = jac_affine_1_1_GREEN_UP*jac_affine_2_2_GREEN_UP; + const real_t tmp_coords_jac_10_GREEN_UP = jac_affine_0_1_GREEN_UP*jac_affine_2_2_GREEN_UP; + const real_t tmp_coords_jac_12_GREEN_UP = jac_affine_0_0_GREEN_UP*tmp_coords_jac_7_GREEN_UP - jac_affine_0_0_GREEN_UP*tmp_coords_jac_8_GREEN_UP + jac_affine_0_2_GREEN_UP*jac_affine_1_0_GREEN_UP*jac_affine_2_1_GREEN_UP - jac_affine_1_0_GREEN_UP*tmp_coords_jac_10_GREEN_UP - jac_affine_2_0_GREEN_UP*tmp_coords_jac_11_GREEN_UP + jac_affine_2_0_GREEN_UP*tmp_coords_jac_9_GREEN_UP; + const real_t tmp_coords_jac_13_GREEN_UP = 1.0 / (tmp_coords_jac_12_GREEN_UP); + const real_t jac_affine_inv_0_0_GREEN_UP = tmp_coords_jac_13_GREEN_UP*(tmp_coords_jac_7_GREEN_UP - tmp_coords_jac_8_GREEN_UP); + const real_t jac_affine_inv_0_1_GREEN_UP = tmp_coords_jac_13_GREEN_UP*(jac_affine_0_2_GREEN_UP*jac_affine_2_1_GREEN_UP - tmp_coords_jac_10_GREEN_UP); + const real_t jac_affine_inv_0_2_GREEN_UP = tmp_coords_jac_13_GREEN_UP*(-tmp_coords_jac_11_GREEN_UP + tmp_coords_jac_9_GREEN_UP); + const real_t jac_affine_inv_1_0_GREEN_UP = tmp_coords_jac_13_GREEN_UP*(-jac_affine_1_0_GREEN_UP*jac_affine_2_2_GREEN_UP + jac_affine_1_2_GREEN_UP*jac_affine_2_0_GREEN_UP); + const real_t jac_affine_inv_1_1_GREEN_UP = tmp_coords_jac_13_GREEN_UP*(jac_affine_0_0_GREEN_UP*jac_affine_2_2_GREEN_UP - jac_affine_0_2_GREEN_UP*jac_affine_2_0_GREEN_UP); + const real_t jac_affine_inv_1_2_GREEN_UP = tmp_coords_jac_13_GREEN_UP*(-jac_affine_0_0_GREEN_UP*jac_affine_1_2_GREEN_UP + jac_affine_0_2_GREEN_UP*jac_affine_1_0_GREEN_UP); + const real_t jac_affine_inv_2_0_GREEN_UP = tmp_coords_jac_13_GREEN_UP*(jac_affine_1_0_GREEN_UP*jac_affine_2_1_GREEN_UP - jac_affine_1_1_GREEN_UP*jac_affine_2_0_GREEN_UP); + const real_t jac_affine_inv_2_1_GREEN_UP = tmp_coords_jac_13_GREEN_UP*(-jac_affine_0_0_GREEN_UP*jac_affine_2_1_GREEN_UP + jac_affine_0_1_GREEN_UP*jac_affine_2_0_GREEN_UP); + const real_t jac_affine_inv_2_2_GREEN_UP = tmp_coords_jac_13_GREEN_UP*(jac_affine_0_0_GREEN_UP*jac_affine_1_1_GREEN_UP - jac_affine_0_1_GREEN_UP*jac_affine_1_0_GREEN_UP); + const real_t abs_det_jac_affine_GREEN_UP = abs(tmp_coords_jac_12_GREEN_UP); + { + /* CellType.GREEN_UP */ + for (int64_t ctr_2 = 0; ctr_2 < micro_edges_per_macro_edge; ctr_2 += 1) + for (int64_t ctr_1 = 0; ctr_1 < -ctr_2 + micro_edges_per_macro_edge; ctr_1 += 1) + { + for (int64_t ctr_0 = 0; ctr_0 < (int64_t)((-ctr_1 - ctr_2 + micro_edges_per_macro_edge - 1) / (4)) * (4); ctr_0 += 4) + { + + const int64_t phantom_ctr_0 = ctr_0; + real_t _data_float_loop_ctr_array_dim_0[4]; + _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0; + _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1; + _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2; + _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3; + real_t _data_float_loop_ctr_array_dim_1[4]; + _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1; + real_t _data_float_loop_ctr_array_dim_2[4]; + _data_float_loop_ctr_array_dim_2[0] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[1] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[2] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[3] = (real_t) ctr_2; + + const __m256d p_affine_0_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)); + const __m256d p_affine_0_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)); + const __m256d p_affine_0_2 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)); + const __m256d p_affine_1_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)); + const __m256d p_affine_1_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)); + const __m256d p_affine_1_2 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)); + const __m256d p_affine_2_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)); + const __m256d p_affine_2_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)); + const __m256d p_affine_2_2 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)); + const __m256d p_affine_3_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)); + const __m256d p_affine_3_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)); + const __m256d p_affine_3_2 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)); + const __m256d src_dof_0 = _mm256_loadu_pd(& _data_src_vertex_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]); + const __m256d src_dof_1 = _mm256_loadu_pd(& _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]); + const __m256d src_dof_2 = _mm256_loadu_pd(& _data_src_vertex_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]); + const __m256d src_dof_3 = _mm256_loadu_pd(& _data_src_vertex_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]); + const __m256d src_dof_4 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]); + const __m256d src_dof_5 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge - 1)*(micro_edges_per_macro_edge + 1)) / (6)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]); + const __m256d src_dof_6 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d src_dof_7 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]); + const __m256d src_dof_8 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 5*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d src_dof_9 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d src_dof_10 = _mm256_loadu_pd(& _data_src_vertex_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]); + const __m256d src_dof_11 = _mm256_loadu_pd(& _data_src_vertex_1[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]); + const __m256d src_dof_12 = _mm256_loadu_pd(& _data_src_vertex_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]); + const __m256d src_dof_13 = _mm256_loadu_pd(& _data_src_vertex_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]); + const __m256d src_dof_14 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]); + const __m256d src_dof_15 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge - 1)*(micro_edges_per_macro_edge + 1)) / (6)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]); + const __m256d src_dof_16 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d src_dof_17 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]); + const __m256d src_dof_18 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 5*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d src_dof_19 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d src_dof_20 = _mm256_loadu_pd(& _data_src_vertex_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]); + const __m256d src_dof_21 = _mm256_loadu_pd(& _data_src_vertex_2[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]); + const __m256d src_dof_22 = _mm256_loadu_pd(& _data_src_vertex_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]); + const __m256d src_dof_23 = _mm256_loadu_pd(& _data_src_vertex_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]); + const __m256d src_dof_24 = _mm256_loadu_pd(& _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]); + const __m256d src_dof_25 = _mm256_loadu_pd(& _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge - 1)*(micro_edges_per_macro_edge + 1)) / (6)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]); + const __m256d src_dof_26 = _mm256_loadu_pd(& _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d src_dof_27 = _mm256_loadu_pd(& _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]); + const __m256d src_dof_28 = _mm256_loadu_pd(& _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 5*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d src_dof_29 = _mm256_loadu_pd(& _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d rho_dof_0 = _mm256_loadu_pd(& _data_rhoVertex[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]); + const __m256d rho_dof_1 = _mm256_loadu_pd(& _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]); + const __m256d rho_dof_2 = _mm256_loadu_pd(& _data_rhoVertex[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]); + const __m256d rho_dof_3 = _mm256_loadu_pd(& _data_rhoVertex[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]); + const __m256d rho_dof_4 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]); + const __m256d rho_dof_5 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge - 1)*(micro_edges_per_macro_edge + 1)) / (6)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]); + const __m256d rho_dof_6 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d rho_dof_7 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]); + const __m256d rho_dof_8 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 5*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d rho_dof_9 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + __m256d q_acc_0_0 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_1 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_2 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_3 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_4 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_5 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_6 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_7 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_8 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_9 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_10 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_11 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_12 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_13 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_14 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_15 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_16 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_17 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_18 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_19 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_20 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_21 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_22 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_23 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_24 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_25 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_26 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_27 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_28 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_29 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_0 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_1 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_2 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_3 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_4 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_5 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_6 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_7 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_8 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_9 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_10 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_11 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_12 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_13 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_14 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_15 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_16 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_17 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_18 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_19 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_20 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_21 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_22 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_23 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_24 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_25 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_26 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_27 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_28 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_29 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_0 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_1 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_2 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_3 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_4 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_5 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_6 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_7 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_8 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_9 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_10 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_11 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_12 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_13 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_14 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_15 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_16 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_17 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_18 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_19 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_20 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_21 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_22 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_23 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_24 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_25 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_26 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_27 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_28 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_29 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_0 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_1 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_2 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_3 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_4 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_5 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_6 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_7 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_8 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_9 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_10 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_11 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_12 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_13 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_14 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_15 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_16 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_17 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_18 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_19 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_20 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_21 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_22 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_23 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_24 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_25 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_26 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_27 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_28 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_29 = _mm256_set_pd(0.0,0.0,0.0,0.0); + for (int64_t q = 0; q < 5; q += 1) + { + const __m256d tmp_qloop_18 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_1_0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_2_0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_3_0),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]))),p_affine_0_0); + const __m256d tmp_qloop_19 = _mm256_mul_pd(tmp_qloop_18,tmp_qloop_18); + const __m256d tmp_qloop_22 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_2,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_1_2),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_2,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_2_2),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_2,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_3_2),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]))),p_affine_0_2); + const __m256d tmp_qloop_23 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_qloop_12,tmp_qloop_12,tmp_qloop_12,tmp_qloop_12)),_mm256_mul_pd(tmp_qloop_22,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))); + const __m256d tmp_qloop_24 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_qloop_1,tmp_qloop_1,tmp_qloop_1,tmp_qloop_1)),_mm256_mul_pd(tmp_qloop_18,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))); + const __m256d tmp_qloop_25 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_1_1),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_2_1),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_3_1),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]))),p_affine_0_1); + const __m256d tmp_qloop_26 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_qloop_8,tmp_qloop_8,tmp_qloop_8,tmp_qloop_8)),_mm256_mul_pd(tmp_qloop_25,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))); + const __m256d tmp_qloop_27 = _mm256_mul_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_24,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_14,tmp_qloop_14,tmp_qloop_14,tmp_qloop_14)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_26,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_15,tmp_qloop_15,tmp_qloop_15,tmp_qloop_15))),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_23,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_7,tmp_qloop_7,tmp_qloop_7,tmp_qloop_7))),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_23,_mm256_set_pd(tmp_qloop_10,tmp_qloop_10,tmp_qloop_10,tmp_qloop_10)),_mm256_set_pd(tmp_qloop_9,tmp_qloop_9,tmp_qloop_9,tmp_qloop_9))),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_24,_mm256_set_pd(tmp_qloop_13,tmp_qloop_13,tmp_qloop_13,tmp_qloop_13)),_mm256_set_pd(tmp_qloop_6,tmp_qloop_6,tmp_qloop_6,tmp_qloop_6))),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_26,_mm256_set_pd(tmp_qloop_2,tmp_qloop_2,tmp_qloop_2,tmp_qloop_2)),_mm256_set_pd(tmp_qloop_3,tmp_qloop_3,tmp_qloop_3,tmp_qloop_3))),_mm256_set_pd(tmp_qloop_21,tmp_qloop_21,tmp_qloop_21,tmp_qloop_21)); + const __m256d tmp_qloop_28 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_27,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_20,tmp_qloop_20,tmp_qloop_20,tmp_qloop_20)); + const __m256d tmp_qloop_29 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_25); + const __m256d tmp_qloop_30 = _mm256_mul_pd(tmp_qloop_22,tmp_qloop_22); + const __m256d tmp_qloop_31 = _mm256_add_pd(_mm256_add_pd(tmp_qloop_19,tmp_qloop_29),tmp_qloop_30); + const __m256d tmp_qloop_34 = _mm256_mul_pd(tmp_qloop_18,_mm256_set_pd(tmp_qloop_33,tmp_qloop_33,tmp_qloop_33,tmp_qloop_33)); + const __m256d tmp_qloop_35 = _mm256_add_pd(tmp_qloop_28,tmp_qloop_34); + const __m256d tmp_qloop_36 = _mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_31,tmp_qloop_35),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_19,tmp_qloop_28),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17)); + const __m256d tmp_qloop_37 = _mm256_div_pd(_mm256_sqrt_pd(tmp_qloop_31),_mm256_mul_pd(tmp_qloop_31,tmp_qloop_31)); + const __m256d tmp_qloop_38 = _mm256_mul_pd(tmp_qloop_37,_mm256_set_pd(1.0,1.0,1.0,1.0)); + const __m256d tmp_qloop_41 = _mm256_mul_pd(tmp_qloop_31,_mm256_set_pd(tmp_qloop_40,tmp_qloop_40,tmp_qloop_40,tmp_qloop_40)); + const __m256d tmp_qloop_42 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_41,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_25,tmp_qloop_28),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))); + const __m256d tmp_qloop_43 = _mm256_mul_pd(tmp_qloop_38,_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17)); + const __m256d tmp_qloop_44 = _mm256_mul_pd(tmp_qloop_42,tmp_qloop_43); + const __m256d tmp_qloop_47 = _mm256_mul_pd(tmp_qloop_28,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)); + const __m256d tmp_qloop_48 = _mm256_mul_pd(tmp_qloop_22,tmp_qloop_47); + const __m256d tmp_qloop_49 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_31,_mm256_set_pd(tmp_qloop_46,tmp_qloop_46,tmp_qloop_46,tmp_qloop_46)),tmp_qloop_48); + const __m256d tmp_qloop_50 = _mm256_mul_pd(tmp_qloop_43,tmp_qloop_49); + const __m256d tmp_qloop_51 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_31,_mm256_set_pd(tmp_qloop_33,tmp_qloop_33,tmp_qloop_33,tmp_qloop_33)),_mm256_mul_pd(tmp_qloop_18,tmp_qloop_47)); + const __m256d tmp_qloop_52 = _mm256_mul_pd(tmp_qloop_43,tmp_qloop_51); + const __m256d tmp_qloop_54 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_25,_mm256_set_pd(tmp_qloop_53,tmp_qloop_53,tmp_qloop_53,tmp_qloop_53)),tmp_qloop_28); + const __m256d tmp_qloop_55 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_29,tmp_qloop_47),_mm256_mul_pd(tmp_qloop_31,tmp_qloop_54)); + const __m256d tmp_qloop_56 = _mm256_mul_pd(tmp_qloop_22,_mm256_set_pd(tmp_qloop_46,tmp_qloop_46,tmp_qloop_46,tmp_qloop_46)); + const __m256d tmp_qloop_57 = _mm256_add_pd(tmp_qloop_28,tmp_qloop_56); + const __m256d tmp_qloop_58 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_30,tmp_qloop_47),_mm256_mul_pd(tmp_qloop_31,tmp_qloop_57)); + const __m256d tmp_qloop_66 = _mm256_mul_pd(_mm256_div_pd(_mm256_sqrt_pd(tmp_qloop_31),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_31,tmp_qloop_31),tmp_qloop_31)),_mm256_set_pd(3.0,3.0,3.0,3.0)); + const __m256d tmp_qloop_67 = _mm256_mul_pd(tmp_qloop_36,tmp_qloop_66); + const __m256d tmp_qloop_69 = _mm256_mul_pd(tmp_qloop_18,_mm256_set_pd(2.0,2.0,2.0,2.0)); + const __m256d tmp_qloop_70 = _mm256_mul_pd(tmp_qloop_41,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)); + const __m256d tmp_qloop_71 = _mm256_mul_pd(tmp_qloop_35,_mm256_set_pd(2.0,2.0,2.0,2.0)); + const __m256d tmp_qloop_73 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_31,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_72,tmp_qloop_72,tmp_qloop_72,tmp_qloop_72)); + const __m256d tmp_qloop_74 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_qloop_20,tmp_qloop_20,tmp_qloop_20,tmp_qloop_20)),tmp_qloop_27); + const __m256d tmp_qloop_75 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_34,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_mul_pd(tmp_qloop_18,_mm256_set_pd(tmp_qloop_68,tmp_qloop_68,tmp_qloop_68,tmp_qloop_68))),tmp_qloop_74); + const __m256d tmp_qloop_76 = _mm256_mul_pd(tmp_qloop_66,_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17)); + const __m256d tmp_qloop_77 = _mm256_mul_pd(tmp_qloop_51,tmp_qloop_76); + const __m256d tmp_qloop_78 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_77); + const __m256d tmp_qloop_79 = _mm256_mul_pd(tmp_qloop_25,_mm256_set_pd(2.0,2.0,2.0,2.0)); + const __m256d tmp_qloop_80 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_79,_mm256_set_pd(tmp_qloop_33,tmp_qloop_33,tmp_qloop_33,tmp_qloop_33)),_mm256_mul_pd(tmp_qloop_18,_mm256_set_pd(tmp_qloop_40,tmp_qloop_40,tmp_qloop_40,tmp_qloop_40))); + const __m256d tmp_qloop_81 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_43); + const __m256d tmp_qloop_82 = _mm256_mul_pd(tmp_qloop_22,_mm256_set_pd(2.0,2.0,2.0,2.0)); + const __m256d tmp_qloop_83 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_82,_mm256_set_pd(tmp_qloop_33,tmp_qloop_33,tmp_qloop_33,tmp_qloop_33)),_mm256_mul_pd(tmp_qloop_18,_mm256_set_pd(tmp_qloop_72,tmp_qloop_72,tmp_qloop_72,tmp_qloop_72))); + const __m256d tmp_qloop_84 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_22,tmp_qloop_78),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)); + const __m256d tmp_qloop_85 = _mm256_mul_pd(tmp_qloop_18,tmp_qloop_22); + const __m256d tmp_qloop_86 = _mm256_mul_pd(tmp_qloop_22,tmp_qloop_43); + const __m256d tmp_qloop_87 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_69,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_40,tmp_qloop_40,tmp_qloop_40,tmp_qloop_40)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_25,_mm256_set_pd(tmp_qloop_21,tmp_qloop_21,tmp_qloop_21,tmp_qloop_21)),_mm256_set_pd(tmp_qloop_32,tmp_qloop_32,tmp_qloop_32,tmp_qloop_32))); + const __m256d tmp_qloop_88 = _mm256_mul_pd(tmp_qloop_18,tmp_qloop_43); + const __m256d tmp_qloop_89 = _mm256_mul_pd(tmp_qloop_42,tmp_qloop_76); + const __m256d tmp_qloop_90 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_28,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_25,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_40,tmp_qloop_40,tmp_qloop_40,tmp_qloop_40))); + const __m256d tmp_qloop_91 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_89); + const __m256d tmp_qloop_92 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_25,_mm256_set_pd(tmp_qloop_72,tmp_qloop_72,tmp_qloop_72,tmp_qloop_72)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_82,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_40,tmp_qloop_40,tmp_qloop_40,tmp_qloop_40))); + const __m256d tmp_qloop_93 = _mm256_mul_pd(tmp_qloop_85,tmp_qloop_89); + const __m256d tmp_qloop_94 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_31,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_68,tmp_qloop_68,tmp_qloop_68,tmp_qloop_68)); + const __m256d tmp_qloop_95 = _mm256_mul_pd(tmp_qloop_55,tmp_qloop_76); + const __m256d tmp_qloop_96 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_69,_mm256_set_pd(tmp_qloop_46,tmp_qloop_46,tmp_qloop_46,tmp_qloop_46)),_mm256_mul_pd(tmp_qloop_22,_mm256_set_pd(tmp_qloop_68,tmp_qloop_68,tmp_qloop_68,tmp_qloop_68))); + const __m256d tmp_qloop_97 = _mm256_mul_pd(tmp_qloop_49,tmp_qloop_76); + const __m256d tmp_qloop_98 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_22,_mm256_set_pd(tmp_qloop_40,tmp_qloop_40,tmp_qloop_40,tmp_qloop_40)),_mm256_mul_pd(tmp_qloop_79,_mm256_set_pd(tmp_qloop_46,tmp_qloop_46,tmp_qloop_46,tmp_qloop_46))); + const __m256d tmp_qloop_99 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_97); + const __m256d tmp_qloop_100 = _mm256_mul_pd(tmp_qloop_18,tmp_qloop_99); + const __m256d tmp_qloop_101 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_56,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_mul_pd(tmp_qloop_22,_mm256_set_pd(tmp_qloop_72,tmp_qloop_72,tmp_qloop_72,tmp_qloop_72))),tmp_qloop_74); + const __m256d tmp_qloop_102 = _mm256_mul_pd(tmp_qloop_58,tmp_qloop_76); + const __m256d tmp_qloop_103 = _mm256_mul_pd(_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])); + const __m256d tmp_qloop_104 = _mm256_mul_pd(tmp_qloop_103,_mm256_set_pd(2.0,2.0,2.0,2.0)); + const __m256d tmp_qloop_105 = _mm256_mul_pd(_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])); + const __m256d tmp_qloop_106 = _mm256_mul_pd(tmp_qloop_105,_mm256_set_pd(2.0,2.0,2.0,2.0)); + const __m256d tmp_qloop_107 = _mm256_mul_pd(_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q])); + const __m256d tmp_qloop_108 = _mm256_mul_pd(tmp_qloop_107,_mm256_set_pd(2.0,2.0,2.0,2.0)); + const __m256d tmp_qloop_109 = _mm256_mul_pd(_mm256_set_pd(4.0,4.0,4.0,4.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])); + const __m256d tmp_qloop_110 = _mm256_mul_pd(tmp_qloop_109,_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q])); + const __m256d tmp_qloop_111 = _mm256_mul_pd(_mm256_set_pd(4.0,4.0,4.0,4.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])); + const __m256d tmp_qloop_112 = _mm256_mul_pd(tmp_qloop_111,_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])); + const __m256d tmp_qloop_113 = _mm256_mul_pd(tmp_qloop_111,_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q])); + const __m256d tmp_qloop_114 = _mm256_add_pd(tmp_qloop_112,tmp_qloop_113); + const __m256d tmp_qloop_115 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),_mm256_mul_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),tmp_qloop_104),tmp_qloop_106),tmp_qloop_108),tmp_qloop_110),tmp_qloop_114); + const __m256d tmp_qloop_116 = _mm256_mul_pd(_mm256_set_pd(4.0,4.0,4.0,4.0),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q])); + const __m256d tmp_qloop_117 = _mm256_add_pd(tmp_qloop_109,tmp_qloop_116); + const __m256d tmp_qloop_118 = _mm256_mul_pd(_mm256_mul_pd(rho_dof_8,tmp_qloop_109),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)); + const __m256d tmp_qloop_119 = _mm256_mul_pd(rho_dof_0,_mm256_add_pd(_mm256_add_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),tmp_qloop_111),tmp_qloop_117)); + const __m256d tmp_qloop_120 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(rho_dof_7,tmp_qloop_116),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),tmp_qloop_119); + const __m256d tmp_qloop_121 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(rho_dof_1,_mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),tmp_qloop_111)),_mm256_mul_pd(rho_dof_9,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_117,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_set_pd(-8.0,-8.0,-8.0,-8.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]))),_mm256_set_pd(4.0,4.0,4.0,4.0)))),_mm256_mul_pd(rho_dof_5,tmp_qloop_116)),_mm256_mul_pd(rho_dof_6,tmp_qloop_109)),tmp_qloop_118),tmp_qloop_120); + const __m256d tmp_qloop_122 = _mm256_add_pd(_mm256_set_pd(-4.0,-4.0,-4.0,-4.0),tmp_qloop_111); + const __m256d tmp_qloop_123 = _mm256_mul_pd(_mm256_mul_pd(rho_dof_9,tmp_qloop_111),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)); + const __m256d tmp_qloop_124 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(rho_dof_2,_mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),tmp_qloop_109)),_mm256_mul_pd(rho_dof_8,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_116,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_122,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(_mm256_set_pd(-8.0,-8.0,-8.0,-8.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))))),_mm256_mul_pd(rho_dof_4,tmp_qloop_116)),_mm256_mul_pd(rho_dof_6,tmp_qloop_111)),tmp_qloop_120),tmp_qloop_123); + const __m256d tmp_qloop_125 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(rho_dof_3,_mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),tmp_qloop_116)),_mm256_mul_pd(rho_dof_7,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_109,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_122,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(_mm256_set_pd(-8.0,-8.0,-8.0,-8.0),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]))))),_mm256_mul_pd(rho_dof_4,tmp_qloop_109)),_mm256_mul_pd(rho_dof_5,tmp_qloop_111)),tmp_qloop_118),tmp_qloop_119),tmp_qloop_123); + const __m256d tmp_qloop_127 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),tmp_qloop_104); + const __m256d tmp_qloop_128 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])),tmp_qloop_106); + const __m256d tmp_qloop_129 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q])),tmp_qloop_108); + const __m256d tmp_qloop_130 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_110,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_113,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(tmp_qloop_107,_mm256_set_pd(-4.0,-4.0,-4.0,-4.0))),tmp_qloop_116); + const __m256d tmp_qloop_131 = _mm256_mul_pd(tmp_qloop_105,_mm256_set_pd(4.0,4.0,4.0,4.0)); + const __m256d tmp_qloop_132 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_110,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_112,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(tmp_qloop_131,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_109); + const __m256d tmp_qloop_133 = _mm256_mul_pd(tmp_qloop_103,_mm256_set_pd(4.0,4.0,4.0,4.0)); + const __m256d tmp_qloop_134 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_114,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_133,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_111); + const __m256d tmp_qloop_158 = _mm256_mul_pd(tmp_qloop_105,tmp_qloop_111); + const __m256d jac_blending_0_0 = _mm256_mul_pd(tmp_qloop_36,tmp_qloop_38); + const __m256d jac_blending_0_1 = _mm256_mul_pd(tmp_qloop_18,tmp_qloop_44); + const __m256d jac_blending_0_2 = _mm256_mul_pd(tmp_qloop_18,tmp_qloop_50); + const __m256d jac_blending_1_0 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_52); + const __m256d jac_blending_1_1 = _mm256_mul_pd(tmp_qloop_43,tmp_qloop_55); + const __m256d tmp_qloop_63 = _mm256_mul_pd(jac_blending_0_2,jac_blending_1_1); + const __m256d jac_blending_1_2 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_50); + const __m256d tmp_qloop_60 = _mm256_mul_pd(jac_blending_0_1,jac_blending_1_2); + const __m256d jac_blending_2_0 = _mm256_mul_pd(tmp_qloop_22,tmp_qloop_52); + const __m256d jac_blending_2_1 = _mm256_mul_pd(tmp_qloop_22,tmp_qloop_44); + const __m256d tmp_qloop_61 = _mm256_mul_pd(jac_blending_1_2,jac_blending_2_1); + const __m256d jac_blending_2_2 = _mm256_mul_pd(tmp_qloop_43,tmp_qloop_58); + const __m256d tmp_qloop_59 = _mm256_mul_pd(jac_blending_1_1,jac_blending_2_2); + const __m256d tmp_qloop_62 = _mm256_mul_pd(jac_blending_0_1,jac_blending_2_2); + const __m256d tmp_qloop_64 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(jac_blending_0_0,tmp_qloop_59),_mm256_mul_pd(jac_blending_2_0,tmp_qloop_60)),_mm256_mul_pd(_mm256_mul_pd(jac_blending_0_0,tmp_qloop_61),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(_mm256_mul_pd(jac_blending_1_0,tmp_qloop_62),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(_mm256_mul_pd(jac_blending_2_0,tmp_qloop_63),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(_mm256_mul_pd(jac_blending_0_2,jac_blending_1_0),jac_blending_2_1)); + const __m256d tmp_qloop_65 = _mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),tmp_qloop_64); + const __m256d abs_det_jac_blending = tmp_qloop_64; + const __m256d tmp_qloop_135 = _mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(abs_det_jac_blending,_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(rho_dof_0,tmp_qloop_115),_mm256_mul_pd(rho_dof_1,tmp_qloop_127)),_mm256_mul_pd(rho_dof_2,tmp_qloop_128)),_mm256_mul_pd(rho_dof_3,tmp_qloop_129)),_mm256_mul_pd(rho_dof_4,tmp_qloop_110)),_mm256_mul_pd(rho_dof_5,tmp_qloop_113)),_mm256_mul_pd(rho_dof_6,tmp_qloop_112)),_mm256_mul_pd(rho_dof_7,tmp_qloop_130)),_mm256_mul_pd(rho_dof_8,tmp_qloop_132)),_mm256_mul_pd(rho_dof_9,tmp_qloop_134)))),_mm256_set_pd(_data_q_w[q],_data_q_w[q],_data_q_w[q],_data_q_w[q])),_mm256_set_pd(abs_det_jac_affine_GREEN_UP,abs_det_jac_affine_GREEN_UP,abs_det_jac_affine_GREEN_UP,abs_det_jac_affine_GREEN_UP)); + const __m256d tmp_qloop_136 = _mm256_mul_pd(tmp_qloop_135,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]))),_mm256_set_pd(1.0,1.0,1.0,1.0))); + const __m256d tmp_qloop_142 = _mm256_mul_pd(tmp_qloop_135,_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])); + const __m256d tmp_qloop_144 = _mm256_mul_pd(tmp_qloop_135,_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q])); + const __m256d tmp_qloop_147 = _mm256_mul_pd(tmp_qloop_135,_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])); + const __m256d tmp_qloop_159 = _mm256_mul_pd(tmp_qloop_135,tmp_qloop_158); + const __m256d tmp_qloop_161 = _mm256_mul_pd(tmp_qloop_107,tmp_qloop_135); + const __m256d jac_blending_inv_0_0 = _mm256_mul_pd(tmp_qloop_65,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_61,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),tmp_qloop_59)); + const __m256d jac_blending_inv_0_1 = _mm256_mul_pd(tmp_qloop_65,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_62,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(jac_blending_0_2,jac_blending_2_1))); + const __m256d jac_blending_inv_0_2 = _mm256_mul_pd(tmp_qloop_65,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_63,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),tmp_qloop_60)); + const __m256d jac_blending_inv_1_0 = _mm256_mul_pd(tmp_qloop_65,_mm256_add_pd(_mm256_mul_pd(jac_blending_1_2,jac_blending_2_0),_mm256_mul_pd(_mm256_mul_pd(jac_blending_1_0,jac_blending_2_2),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))); + const __m256d jac_blending_inv_1_1 = _mm256_mul_pd(tmp_qloop_65,_mm256_add_pd(_mm256_mul_pd(jac_blending_0_0,jac_blending_2_2),_mm256_mul_pd(_mm256_mul_pd(jac_blending_0_2,jac_blending_2_0),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))); + const __m256d jac_blending_inv_1_2 = _mm256_mul_pd(tmp_qloop_65,_mm256_add_pd(_mm256_mul_pd(jac_blending_0_2,jac_blending_1_0),_mm256_mul_pd(_mm256_mul_pd(jac_blending_0_0,jac_blending_1_2),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))); + const __m256d jac_blending_inv_2_0 = _mm256_mul_pd(tmp_qloop_65,_mm256_add_pd(_mm256_mul_pd(jac_blending_1_0,jac_blending_2_1),_mm256_mul_pd(_mm256_mul_pd(jac_blending_1_1,jac_blending_2_0),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))); + const __m256d tmp_qloop_126 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_121,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,_mm256_set_pd(jac_affine_inv_0_0_GREEN_UP,jac_affine_inv_0_0_GREEN_UP,jac_affine_inv_0_0_GREEN_UP,jac_affine_inv_0_0_GREEN_UP)),_mm256_mul_pd(jac_blending_inv_1_0,_mm256_set_pd(jac_affine_inv_0_1_GREEN_UP,jac_affine_inv_0_1_GREEN_UP,jac_affine_inv_0_1_GREEN_UP,jac_affine_inv_0_1_GREEN_UP))),_mm256_mul_pd(jac_blending_inv_2_0,_mm256_set_pd(jac_affine_inv_0_2_GREEN_UP,jac_affine_inv_0_2_GREEN_UP,jac_affine_inv_0_2_GREEN_UP,jac_affine_inv_0_2_GREEN_UP)))),_mm256_mul_pd(tmp_qloop_124,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,_mm256_set_pd(jac_affine_inv_1_0_GREEN_UP,jac_affine_inv_1_0_GREEN_UP,jac_affine_inv_1_0_GREEN_UP,jac_affine_inv_1_0_GREEN_UP)),_mm256_mul_pd(jac_blending_inv_1_0,_mm256_set_pd(jac_affine_inv_1_1_GREEN_UP,jac_affine_inv_1_1_GREEN_UP,jac_affine_inv_1_1_GREEN_UP,jac_affine_inv_1_1_GREEN_UP))),_mm256_mul_pd(jac_blending_inv_2_0,_mm256_set_pd(jac_affine_inv_1_2_GREEN_UP,jac_affine_inv_1_2_GREEN_UP,jac_affine_inv_1_2_GREEN_UP,jac_affine_inv_1_2_GREEN_UP))))),_mm256_mul_pd(tmp_qloop_125,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,_mm256_set_pd(jac_affine_inv_2_0_GREEN_UP,jac_affine_inv_2_0_GREEN_UP,jac_affine_inv_2_0_GREEN_UP,jac_affine_inv_2_0_GREEN_UP)),_mm256_mul_pd(jac_blending_inv_1_0,_mm256_set_pd(jac_affine_inv_2_1_GREEN_UP,jac_affine_inv_2_1_GREEN_UP,jac_affine_inv_2_1_GREEN_UP,jac_affine_inv_2_1_GREEN_UP))),_mm256_mul_pd(jac_blending_inv_2_0,_mm256_set_pd(jac_affine_inv_2_2_GREEN_UP,jac_affine_inv_2_2_GREEN_UP,jac_affine_inv_2_2_GREEN_UP,jac_affine_inv_2_2_GREEN_UP))))); + const __m256d tmp_qloop_137 = _mm256_mul_pd(tmp_qloop_126,tmp_qloop_136); + const __m256d tmp_qloop_143 = _mm256_mul_pd(tmp_qloop_126,tmp_qloop_142); + const __m256d tmp_qloop_145 = _mm256_mul_pd(tmp_qloop_126,tmp_qloop_144); + const __m256d tmp_qloop_146 = _mm256_mul_pd(tmp_qloop_112,tmp_qloop_145); + const __m256d tmp_qloop_148 = _mm256_mul_pd(tmp_qloop_126,tmp_qloop_147); + const __m256d tmp_qloop_157 = _mm256_mul_pd(tmp_qloop_126,tmp_qloop_135); + const __m256d tmp_qloop_160 = _mm256_mul_pd(tmp_qloop_107,tmp_qloop_157); + const __m256d jac_blending_inv_2_1 = _mm256_mul_pd(tmp_qloop_65,_mm256_add_pd(_mm256_mul_pd(jac_blending_0_1,jac_blending_2_0),_mm256_mul_pd(_mm256_mul_pd(jac_blending_0_0,jac_blending_2_1),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))); + const __m256d tmp_qloop_138 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_121,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,_mm256_set_pd(jac_affine_inv_0_0_GREEN_UP,jac_affine_inv_0_0_GREEN_UP,jac_affine_inv_0_0_GREEN_UP,jac_affine_inv_0_0_GREEN_UP)),_mm256_mul_pd(jac_blending_inv_1_1,_mm256_set_pd(jac_affine_inv_0_1_GREEN_UP,jac_affine_inv_0_1_GREEN_UP,jac_affine_inv_0_1_GREEN_UP,jac_affine_inv_0_1_GREEN_UP))),_mm256_mul_pd(jac_blending_inv_2_1,_mm256_set_pd(jac_affine_inv_0_2_GREEN_UP,jac_affine_inv_0_2_GREEN_UP,jac_affine_inv_0_2_GREEN_UP,jac_affine_inv_0_2_GREEN_UP)))),_mm256_mul_pd(tmp_qloop_124,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,_mm256_set_pd(jac_affine_inv_1_0_GREEN_UP,jac_affine_inv_1_0_GREEN_UP,jac_affine_inv_1_0_GREEN_UP,jac_affine_inv_1_0_GREEN_UP)),_mm256_mul_pd(jac_blending_inv_1_1,_mm256_set_pd(jac_affine_inv_1_1_GREEN_UP,jac_affine_inv_1_1_GREEN_UP,jac_affine_inv_1_1_GREEN_UP,jac_affine_inv_1_1_GREEN_UP))),_mm256_mul_pd(jac_blending_inv_2_1,_mm256_set_pd(jac_affine_inv_1_2_GREEN_UP,jac_affine_inv_1_2_GREEN_UP,jac_affine_inv_1_2_GREEN_UP,jac_affine_inv_1_2_GREEN_UP))))),_mm256_mul_pd(tmp_qloop_125,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,_mm256_set_pd(jac_affine_inv_2_0_GREEN_UP,jac_affine_inv_2_0_GREEN_UP,jac_affine_inv_2_0_GREEN_UP,jac_affine_inv_2_0_GREEN_UP)),_mm256_mul_pd(jac_blending_inv_1_1,_mm256_set_pd(jac_affine_inv_2_1_GREEN_UP,jac_affine_inv_2_1_GREEN_UP,jac_affine_inv_2_1_GREEN_UP,jac_affine_inv_2_1_GREEN_UP))),_mm256_mul_pd(jac_blending_inv_2_1,_mm256_set_pd(jac_affine_inv_2_2_GREEN_UP,jac_affine_inv_2_2_GREEN_UP,jac_affine_inv_2_2_GREEN_UP,jac_affine_inv_2_2_GREEN_UP))))); + const __m256d tmp_qloop_139 = _mm256_mul_pd(tmp_qloop_136,tmp_qloop_138); + const __m256d tmp_qloop_149 = _mm256_mul_pd(tmp_qloop_138,tmp_qloop_142); + const __m256d tmp_qloop_150 = _mm256_mul_pd(tmp_qloop_138,tmp_qloop_144); + const __m256d tmp_qloop_151 = _mm256_mul_pd(tmp_qloop_112,tmp_qloop_150); + const __m256d tmp_qloop_152 = _mm256_mul_pd(tmp_qloop_138,tmp_qloop_147); + const __m256d tmp_qloop_162 = _mm256_mul_pd(tmp_qloop_138,tmp_qloop_161); + const __m256d jac_blending_inv_2_2 = _mm256_mul_pd(tmp_qloop_65,_mm256_add_pd(_mm256_mul_pd(jac_blending_0_0,jac_blending_1_1),_mm256_mul_pd(_mm256_mul_pd(jac_blending_0_1,jac_blending_1_0),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))); + const __m256d tmp_qloop_140 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_121,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_2,_mm256_set_pd(jac_affine_inv_0_0_GREEN_UP,jac_affine_inv_0_0_GREEN_UP,jac_affine_inv_0_0_GREEN_UP,jac_affine_inv_0_0_GREEN_UP)),_mm256_mul_pd(jac_blending_inv_1_2,_mm256_set_pd(jac_affine_inv_0_1_GREEN_UP,jac_affine_inv_0_1_GREEN_UP,jac_affine_inv_0_1_GREEN_UP,jac_affine_inv_0_1_GREEN_UP))),_mm256_mul_pd(jac_blending_inv_2_2,_mm256_set_pd(jac_affine_inv_0_2_GREEN_UP,jac_affine_inv_0_2_GREEN_UP,jac_affine_inv_0_2_GREEN_UP,jac_affine_inv_0_2_GREEN_UP)))),_mm256_mul_pd(tmp_qloop_124,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_2,_mm256_set_pd(jac_affine_inv_1_0_GREEN_UP,jac_affine_inv_1_0_GREEN_UP,jac_affine_inv_1_0_GREEN_UP,jac_affine_inv_1_0_GREEN_UP)),_mm256_mul_pd(jac_blending_inv_1_2,_mm256_set_pd(jac_affine_inv_1_1_GREEN_UP,jac_affine_inv_1_1_GREEN_UP,jac_affine_inv_1_1_GREEN_UP,jac_affine_inv_1_1_GREEN_UP))),_mm256_mul_pd(jac_blending_inv_2_2,_mm256_set_pd(jac_affine_inv_1_2_GREEN_UP,jac_affine_inv_1_2_GREEN_UP,jac_affine_inv_1_2_GREEN_UP,jac_affine_inv_1_2_GREEN_UP))))),_mm256_mul_pd(tmp_qloop_125,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_2,_mm256_set_pd(jac_affine_inv_2_0_GREEN_UP,jac_affine_inv_2_0_GREEN_UP,jac_affine_inv_2_0_GREEN_UP,jac_affine_inv_2_0_GREEN_UP)),_mm256_mul_pd(jac_blending_inv_1_2,_mm256_set_pd(jac_affine_inv_2_1_GREEN_UP,jac_affine_inv_2_1_GREEN_UP,jac_affine_inv_2_1_GREEN_UP,jac_affine_inv_2_1_GREEN_UP))),_mm256_mul_pd(jac_blending_inv_2_2,_mm256_set_pd(jac_affine_inv_2_2_GREEN_UP,jac_affine_inv_2_2_GREEN_UP,jac_affine_inv_2_2_GREEN_UP,jac_affine_inv_2_2_GREEN_UP))))); + const __m256d tmp_qloop_141 = _mm256_mul_pd(tmp_qloop_136,tmp_qloop_140); + const __m256d tmp_qloop_153 = _mm256_mul_pd(tmp_qloop_140,tmp_qloop_142); + const __m256d tmp_qloop_154 = _mm256_mul_pd(tmp_qloop_140,tmp_qloop_144); + const __m256d tmp_qloop_155 = _mm256_mul_pd(tmp_qloop_112,tmp_qloop_154); + const __m256d tmp_qloop_156 = _mm256_mul_pd(tmp_qloop_140,tmp_qloop_147); + const __m256d tmp_qloop_163 = _mm256_mul_pd(tmp_qloop_140,tmp_qloop_161); + const __m256d hessian_blending_0_0_0 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_18,tmp_qloop_67),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_37,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_31,_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_qloop_68,tmp_qloop_68,tmp_qloop_68,tmp_qloop_68)),_mm256_set_pd(tmp_qloop_33,tmp_qloop_33,tmp_qloop_33,tmp_qloop_33))),_mm256_mul_pd(tmp_qloop_19,_mm256_set_pd(tmp_qloop_68,tmp_qloop_68,tmp_qloop_68,tmp_qloop_68))),_mm256_mul_pd(tmp_qloop_35,tmp_qloop_69)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_28,tmp_qloop_69),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17))); + const __m256d hessian_blending_1_0_0 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_25,tmp_qloop_67),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_37,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_19,_mm256_set_pd(tmp_qloop_40,tmp_qloop_40,tmp_qloop_40,tmp_qloop_40)),_mm256_mul_pd(tmp_qloop_25,tmp_qloop_71)),tmp_qloop_70)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17))); + const __m256d hessian_blending_2_0_0 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_22,tmp_qloop_67),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_37,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_19,_mm256_set_pd(tmp_qloop_72,tmp_qloop_72,tmp_qloop_72,tmp_qloop_72)),_mm256_mul_pd(tmp_qloop_22,tmp_qloop_71)),tmp_qloop_73)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17))); + const __m256d hessian_blending_0_0_1 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_18,tmp_qloop_78),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_25,tmp_qloop_37),tmp_qloop_75),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17))); + const __m256d hessian_blending_1_0_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_80,tmp_qloop_81),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_29,tmp_qloop_77),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_52); + const __m256d hessian_blending_2_0_1 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_81,tmp_qloop_83),tmp_qloop_84); + const __m256d hessian_blending_0_0_2 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_77,tmp_qloop_85),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_22,tmp_qloop_37),tmp_qloop_75),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17))); + const __m256d hessian_blending_1_0_2 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_80,tmp_qloop_86),tmp_qloop_84); + const __m256d hessian_blending_2_0_2 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_83,tmp_qloop_86),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_30,tmp_qloop_77),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_52); + const __m256d hessian_blending_0_1_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_87,tmp_qloop_88),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_19,tmp_qloop_89),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_44); + const __m256d hessian_blending_1_1_0 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_18,tmp_qloop_91),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_18,tmp_qloop_37),tmp_qloop_90),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17))); + const __m256d hessian_blending_2_1_0 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_93,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_88,tmp_qloop_92)); + const __m256d hessian_blending_0_1_1 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_18,tmp_qloop_95),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_37,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_29,_mm256_set_pd(tmp_qloop_68,tmp_qloop_68,tmp_qloop_68,tmp_qloop_68)),_mm256_mul_pd(tmp_qloop_54,tmp_qloop_69)),tmp_qloop_94)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17))); + const __m256d hessian_blending_1_1_1 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_25,tmp_qloop_95),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_37,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_31,_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_qloop_40,tmp_qloop_40,tmp_qloop_40,tmp_qloop_40)),_mm256_set_pd(tmp_qloop_53,tmp_qloop_53,tmp_qloop_53,tmp_qloop_53))),_mm256_mul_pd(tmp_qloop_29,_mm256_set_pd(tmp_qloop_40,tmp_qloop_40,tmp_qloop_40,tmp_qloop_40))),_mm256_mul_pd(tmp_qloop_47,tmp_qloop_79)),_mm256_mul_pd(tmp_qloop_54,tmp_qloop_79))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17))); + const __m256d hessian_blending_2_1_1 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_22,tmp_qloop_95),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_37,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_29,_mm256_set_pd(tmp_qloop_72,tmp_qloop_72,tmp_qloop_72,tmp_qloop_72)),_mm256_mul_pd(tmp_qloop_54,tmp_qloop_82)),tmp_qloop_73)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17))); + const __m256d hessian_blending_0_1_2 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_93,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_22,tmp_qloop_37),tmp_qloop_87),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17))); + const __m256d hessian_blending_1_1_2 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_22,tmp_qloop_91),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_22,tmp_qloop_37),tmp_qloop_90),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17))); + const __m256d hessian_blending_2_1_2 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_86,tmp_qloop_92),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_30,tmp_qloop_89),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_44); + const __m256d hessian_blending_0_2_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_88,tmp_qloop_96),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_19,tmp_qloop_97),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_50); + const __m256d hessian_blending_1_2_0 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_100,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_18,tmp_qloop_37),tmp_qloop_98),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17))); + const __m256d hessian_blending_2_2_0 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_85,tmp_qloop_97),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_101,tmp_qloop_18),tmp_qloop_37),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17))); + const __m256d hessian_blending_0_2_1 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_100,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_25,tmp_qloop_37),tmp_qloop_96),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17))); + const __m256d hessian_blending_1_2_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_81,tmp_qloop_98),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_29,tmp_qloop_97),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_50); + const __m256d hessian_blending_2_2_1 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_22,tmp_qloop_99),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_101,tmp_qloop_25),tmp_qloop_37),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17))); + const __m256d hessian_blending_0_2_2 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_102,tmp_qloop_18),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_37,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_30,_mm256_set_pd(tmp_qloop_68,tmp_qloop_68,tmp_qloop_68,tmp_qloop_68)),_mm256_mul_pd(tmp_qloop_57,tmp_qloop_69)),tmp_qloop_94)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17))); + const __m256d hessian_blending_1_2_2 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_102,tmp_qloop_25),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_37,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_30,_mm256_set_pd(tmp_qloop_40,tmp_qloop_40,tmp_qloop_40,tmp_qloop_40)),_mm256_mul_pd(tmp_qloop_57,tmp_qloop_79)),tmp_qloop_70)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17))); + const __m256d hessian_blending_2_2_2 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_102,tmp_qloop_22),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_37,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_31,_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_qloop_72,tmp_qloop_72,tmp_qloop_72,tmp_qloop_72)),_mm256_set_pd(tmp_qloop_46,tmp_qloop_46,tmp_qloop_46,tmp_qloop_46))),_mm256_mul_pd(tmp_qloop_48,_mm256_set_pd(2.0,2.0,2.0,2.0))),_mm256_mul_pd(tmp_qloop_30,_mm256_set_pd(tmp_qloop_72,tmp_qloop_72,tmp_qloop_72,tmp_qloop_72))),_mm256_mul_pd(tmp_qloop_57,tmp_qloop_82))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17))); + const __m256d q_tmp_0_0 = _mm256_mul_pd(tmp_qloop_115,tmp_qloop_137); + const __m256d q_tmp_0_1 = _mm256_mul_pd(tmp_qloop_127,tmp_qloop_137); + const __m256d q_tmp_0_2 = _mm256_mul_pd(tmp_qloop_128,tmp_qloop_137); + const __m256d q_tmp_0_3 = _mm256_mul_pd(tmp_qloop_129,tmp_qloop_137); + const __m256d q_tmp_0_4 = _mm256_mul_pd(tmp_qloop_110,tmp_qloop_137); + const __m256d q_tmp_0_5 = _mm256_mul_pd(tmp_qloop_113,tmp_qloop_137); + const __m256d q_tmp_0_6 = _mm256_mul_pd(tmp_qloop_112,tmp_qloop_137); + const __m256d q_tmp_0_7 = _mm256_mul_pd(tmp_qloop_130,tmp_qloop_137); + const __m256d q_tmp_0_8 = _mm256_mul_pd(tmp_qloop_132,tmp_qloop_137); + const __m256d q_tmp_0_9 = _mm256_mul_pd(tmp_qloop_134,tmp_qloop_137); + const __m256d q_tmp_0_10 = _mm256_mul_pd(tmp_qloop_115,tmp_qloop_139); + const __m256d q_tmp_0_11 = _mm256_mul_pd(tmp_qloop_127,tmp_qloop_139); + const __m256d q_tmp_0_12 = _mm256_mul_pd(tmp_qloop_128,tmp_qloop_139); + const __m256d q_tmp_0_13 = _mm256_mul_pd(tmp_qloop_129,tmp_qloop_139); + const __m256d q_tmp_0_14 = _mm256_mul_pd(tmp_qloop_110,tmp_qloop_139); + const __m256d q_tmp_0_15 = _mm256_mul_pd(tmp_qloop_113,tmp_qloop_139); + const __m256d q_tmp_0_16 = _mm256_mul_pd(tmp_qloop_112,tmp_qloop_139); + const __m256d q_tmp_0_17 = _mm256_mul_pd(tmp_qloop_130,tmp_qloop_139); + const __m256d q_tmp_0_18 = _mm256_mul_pd(tmp_qloop_132,tmp_qloop_139); + const __m256d q_tmp_0_19 = _mm256_mul_pd(tmp_qloop_134,tmp_qloop_139); + const __m256d q_tmp_0_20 = _mm256_mul_pd(tmp_qloop_115,tmp_qloop_141); + const __m256d q_tmp_0_21 = _mm256_mul_pd(tmp_qloop_127,tmp_qloop_141); + const __m256d q_tmp_0_22 = _mm256_mul_pd(tmp_qloop_128,tmp_qloop_141); + const __m256d q_tmp_0_23 = _mm256_mul_pd(tmp_qloop_129,tmp_qloop_141); + const __m256d q_tmp_0_24 = _mm256_mul_pd(tmp_qloop_110,tmp_qloop_141); + const __m256d q_tmp_0_25 = _mm256_mul_pd(tmp_qloop_113,tmp_qloop_141); + const __m256d q_tmp_0_26 = _mm256_mul_pd(tmp_qloop_112,tmp_qloop_141); + const __m256d q_tmp_0_27 = _mm256_mul_pd(tmp_qloop_130,tmp_qloop_141); + const __m256d q_tmp_0_28 = _mm256_mul_pd(tmp_qloop_132,tmp_qloop_141); + const __m256d q_tmp_0_29 = _mm256_mul_pd(tmp_qloop_134,tmp_qloop_141); + const __m256d q_tmp_1_0 = _mm256_mul_pd(tmp_qloop_115,tmp_qloop_143); + const __m256d q_tmp_1_1 = _mm256_mul_pd(tmp_qloop_127,tmp_qloop_143); + const __m256d q_tmp_1_2 = _mm256_mul_pd(tmp_qloop_128,tmp_qloop_143); + const __m256d q_tmp_1_3 = _mm256_mul_pd(tmp_qloop_129,tmp_qloop_143); + const __m256d q_tmp_1_4 = tmp_qloop_146; + const __m256d q_tmp_1_5 = _mm256_mul_pd(tmp_qloop_133,tmp_qloop_145); + const __m256d q_tmp_1_6 = _mm256_mul_pd(tmp_qloop_133,tmp_qloop_148); + const __m256d q_tmp_1_7 = _mm256_mul_pd(tmp_qloop_130,tmp_qloop_143); + const __m256d q_tmp_1_8 = _mm256_mul_pd(tmp_qloop_132,tmp_qloop_143); + const __m256d q_tmp_1_9 = _mm256_mul_pd(tmp_qloop_134,tmp_qloop_143); + const __m256d q_tmp_1_10 = _mm256_mul_pd(tmp_qloop_115,tmp_qloop_149); + const __m256d q_tmp_1_11 = _mm256_mul_pd(tmp_qloop_127,tmp_qloop_149); + const __m256d q_tmp_1_12 = _mm256_mul_pd(tmp_qloop_128,tmp_qloop_149); + const __m256d q_tmp_1_13 = _mm256_mul_pd(tmp_qloop_129,tmp_qloop_149); + const __m256d q_tmp_1_14 = tmp_qloop_151; + const __m256d q_tmp_1_15 = _mm256_mul_pd(tmp_qloop_133,tmp_qloop_150); + const __m256d q_tmp_1_16 = _mm256_mul_pd(tmp_qloop_133,tmp_qloop_152); + const __m256d q_tmp_1_17 = _mm256_mul_pd(tmp_qloop_130,tmp_qloop_149); + const __m256d q_tmp_1_18 = _mm256_mul_pd(tmp_qloop_132,tmp_qloop_149); + const __m256d q_tmp_1_19 = _mm256_mul_pd(tmp_qloop_134,tmp_qloop_149); + const __m256d q_tmp_1_20 = _mm256_mul_pd(tmp_qloop_115,tmp_qloop_153); + const __m256d q_tmp_1_21 = _mm256_mul_pd(tmp_qloop_127,tmp_qloop_153); + const __m256d q_tmp_1_22 = _mm256_mul_pd(tmp_qloop_128,tmp_qloop_153); + const __m256d q_tmp_1_23 = _mm256_mul_pd(tmp_qloop_129,tmp_qloop_153); + const __m256d q_tmp_1_24 = tmp_qloop_155; + const __m256d q_tmp_1_25 = _mm256_mul_pd(tmp_qloop_133,tmp_qloop_154); + const __m256d q_tmp_1_26 = _mm256_mul_pd(tmp_qloop_133,tmp_qloop_156); + const __m256d q_tmp_1_27 = _mm256_mul_pd(tmp_qloop_130,tmp_qloop_153); + const __m256d q_tmp_1_28 = _mm256_mul_pd(tmp_qloop_132,tmp_qloop_153); + const __m256d q_tmp_1_29 = _mm256_mul_pd(tmp_qloop_134,tmp_qloop_153); + const __m256d q_tmp_2_0 = _mm256_mul_pd(tmp_qloop_115,tmp_qloop_148); + const __m256d q_tmp_2_1 = _mm256_mul_pd(tmp_qloop_127,tmp_qloop_148); + const __m256d q_tmp_2_2 = _mm256_mul_pd(tmp_qloop_128,tmp_qloop_148); + const __m256d q_tmp_2_3 = _mm256_mul_pd(tmp_qloop_129,tmp_qloop_148); + const __m256d q_tmp_2_4 = _mm256_mul_pd(tmp_qloop_131,tmp_qloop_145); + const __m256d q_tmp_2_5 = tmp_qloop_146; + const __m256d q_tmp_2_6 = _mm256_mul_pd(tmp_qloop_157,tmp_qloop_158); + const __m256d q_tmp_2_7 = _mm256_mul_pd(tmp_qloop_130,tmp_qloop_148); + const __m256d q_tmp_2_8 = _mm256_mul_pd(tmp_qloop_132,tmp_qloop_148); + const __m256d q_tmp_2_9 = _mm256_mul_pd(tmp_qloop_134,tmp_qloop_148); + const __m256d q_tmp_2_10 = _mm256_mul_pd(tmp_qloop_115,tmp_qloop_152); + const __m256d q_tmp_2_11 = _mm256_mul_pd(tmp_qloop_127,tmp_qloop_152); + const __m256d q_tmp_2_12 = _mm256_mul_pd(tmp_qloop_128,tmp_qloop_152); + const __m256d q_tmp_2_13 = _mm256_mul_pd(tmp_qloop_129,tmp_qloop_152); + const __m256d q_tmp_2_14 = _mm256_mul_pd(tmp_qloop_131,tmp_qloop_150); + const __m256d q_tmp_2_15 = tmp_qloop_151; + const __m256d q_tmp_2_16 = _mm256_mul_pd(tmp_qloop_138,tmp_qloop_159); + const __m256d q_tmp_2_17 = _mm256_mul_pd(tmp_qloop_130,tmp_qloop_152); + const __m256d q_tmp_2_18 = _mm256_mul_pd(tmp_qloop_132,tmp_qloop_152); + const __m256d q_tmp_2_19 = _mm256_mul_pd(tmp_qloop_134,tmp_qloop_152); + const __m256d q_tmp_2_20 = _mm256_mul_pd(tmp_qloop_115,tmp_qloop_156); + const __m256d q_tmp_2_21 = _mm256_mul_pd(tmp_qloop_127,tmp_qloop_156); + const __m256d q_tmp_2_22 = _mm256_mul_pd(tmp_qloop_128,tmp_qloop_156); + const __m256d q_tmp_2_23 = _mm256_mul_pd(tmp_qloop_129,tmp_qloop_156); + const __m256d q_tmp_2_24 = _mm256_mul_pd(tmp_qloop_131,tmp_qloop_154); + const __m256d q_tmp_2_25 = tmp_qloop_155; + const __m256d q_tmp_2_26 = _mm256_mul_pd(tmp_qloop_140,tmp_qloop_159); + const __m256d q_tmp_2_27 = _mm256_mul_pd(tmp_qloop_130,tmp_qloop_156); + const __m256d q_tmp_2_28 = _mm256_mul_pd(tmp_qloop_132,tmp_qloop_156); + const __m256d q_tmp_2_29 = _mm256_mul_pd(tmp_qloop_134,tmp_qloop_156); + const __m256d q_tmp_3_0 = _mm256_mul_pd(tmp_qloop_115,tmp_qloop_145); + const __m256d q_tmp_3_1 = _mm256_mul_pd(tmp_qloop_127,tmp_qloop_145); + const __m256d q_tmp_3_2 = _mm256_mul_pd(tmp_qloop_128,tmp_qloop_145); + const __m256d q_tmp_3_3 = _mm256_mul_pd(tmp_qloop_129,tmp_qloop_145); + const __m256d q_tmp_3_4 = _mm256_mul_pd(tmp_qloop_109,tmp_qloop_160); + const __m256d q_tmp_3_5 = _mm256_mul_pd(tmp_qloop_111,tmp_qloop_160); + const __m256d q_tmp_3_6 = tmp_qloop_146; + const __m256d q_tmp_3_7 = _mm256_mul_pd(tmp_qloop_130,tmp_qloop_145); + const __m256d q_tmp_3_8 = _mm256_mul_pd(tmp_qloop_132,tmp_qloop_145); + const __m256d q_tmp_3_9 = _mm256_mul_pd(tmp_qloop_134,tmp_qloop_145); + const __m256d q_tmp_3_10 = _mm256_mul_pd(tmp_qloop_115,tmp_qloop_150); + const __m256d q_tmp_3_11 = _mm256_mul_pd(tmp_qloop_127,tmp_qloop_150); + const __m256d q_tmp_3_12 = _mm256_mul_pd(tmp_qloop_128,tmp_qloop_150); + const __m256d q_tmp_3_13 = _mm256_mul_pd(tmp_qloop_129,tmp_qloop_150); + const __m256d q_tmp_3_14 = _mm256_mul_pd(tmp_qloop_109,tmp_qloop_162); + const __m256d q_tmp_3_15 = _mm256_mul_pd(tmp_qloop_111,tmp_qloop_162); + const __m256d q_tmp_3_16 = tmp_qloop_151; + const __m256d q_tmp_3_17 = _mm256_mul_pd(tmp_qloop_130,tmp_qloop_150); + const __m256d q_tmp_3_18 = _mm256_mul_pd(tmp_qloop_132,tmp_qloop_150); + const __m256d q_tmp_3_19 = _mm256_mul_pd(tmp_qloop_134,tmp_qloop_150); + const __m256d q_tmp_3_20 = _mm256_mul_pd(tmp_qloop_115,tmp_qloop_154); + const __m256d q_tmp_3_21 = _mm256_mul_pd(tmp_qloop_127,tmp_qloop_154); + const __m256d q_tmp_3_22 = _mm256_mul_pd(tmp_qloop_128,tmp_qloop_154); + const __m256d q_tmp_3_23 = _mm256_mul_pd(tmp_qloop_129,tmp_qloop_154); + const __m256d q_tmp_3_24 = _mm256_mul_pd(tmp_qloop_109,tmp_qloop_163); + const __m256d q_tmp_3_25 = _mm256_mul_pd(tmp_qloop_111,tmp_qloop_163); + const __m256d q_tmp_3_26 = tmp_qloop_155; + const __m256d q_tmp_3_27 = _mm256_mul_pd(tmp_qloop_130,tmp_qloop_154); + const __m256d q_tmp_3_28 = _mm256_mul_pd(tmp_qloop_132,tmp_qloop_154); + const __m256d q_tmp_3_29 = _mm256_mul_pd(tmp_qloop_134,tmp_qloop_154); + q_acc_0_0 = _mm256_add_pd(q_acc_0_0,q_tmp_0_0); + q_acc_0_1 = _mm256_add_pd(q_acc_0_1,q_tmp_0_1); + q_acc_0_2 = _mm256_add_pd(q_acc_0_2,q_tmp_0_2); + q_acc_0_3 = _mm256_add_pd(q_acc_0_3,q_tmp_0_3); + q_acc_0_4 = _mm256_add_pd(q_acc_0_4,q_tmp_0_4); + q_acc_0_5 = _mm256_add_pd(q_acc_0_5,q_tmp_0_5); + q_acc_0_6 = _mm256_add_pd(q_acc_0_6,q_tmp_0_6); + q_acc_0_7 = _mm256_add_pd(q_acc_0_7,q_tmp_0_7); + q_acc_0_8 = _mm256_add_pd(q_acc_0_8,q_tmp_0_8); + q_acc_0_9 = _mm256_add_pd(q_acc_0_9,q_tmp_0_9); + q_acc_0_10 = _mm256_add_pd(q_acc_0_10,q_tmp_0_10); + q_acc_0_11 = _mm256_add_pd(q_acc_0_11,q_tmp_0_11); + q_acc_0_12 = _mm256_add_pd(q_acc_0_12,q_tmp_0_12); + q_acc_0_13 = _mm256_add_pd(q_acc_0_13,q_tmp_0_13); + q_acc_0_14 = _mm256_add_pd(q_acc_0_14,q_tmp_0_14); + q_acc_0_15 = _mm256_add_pd(q_acc_0_15,q_tmp_0_15); + q_acc_0_16 = _mm256_add_pd(q_acc_0_16,q_tmp_0_16); + q_acc_0_17 = _mm256_add_pd(q_acc_0_17,q_tmp_0_17); + q_acc_0_18 = _mm256_add_pd(q_acc_0_18,q_tmp_0_18); + q_acc_0_19 = _mm256_add_pd(q_acc_0_19,q_tmp_0_19); + q_acc_0_20 = _mm256_add_pd(q_acc_0_20,q_tmp_0_20); + q_acc_0_21 = _mm256_add_pd(q_acc_0_21,q_tmp_0_21); + q_acc_0_22 = _mm256_add_pd(q_acc_0_22,q_tmp_0_22); + q_acc_0_23 = _mm256_add_pd(q_acc_0_23,q_tmp_0_23); + q_acc_0_24 = _mm256_add_pd(q_acc_0_24,q_tmp_0_24); + q_acc_0_25 = _mm256_add_pd(q_acc_0_25,q_tmp_0_25); + q_acc_0_26 = _mm256_add_pd(q_acc_0_26,q_tmp_0_26); + q_acc_0_27 = _mm256_add_pd(q_acc_0_27,q_tmp_0_27); + q_acc_0_28 = _mm256_add_pd(q_acc_0_28,q_tmp_0_28); + q_acc_0_29 = _mm256_add_pd(q_acc_0_29,q_tmp_0_29); + q_acc_1_0 = _mm256_add_pd(q_acc_1_0,q_tmp_1_0); + q_acc_1_1 = _mm256_add_pd(q_acc_1_1,q_tmp_1_1); + q_acc_1_2 = _mm256_add_pd(q_acc_1_2,q_tmp_1_2); + q_acc_1_3 = _mm256_add_pd(q_acc_1_3,q_tmp_1_3); + q_acc_1_4 = _mm256_add_pd(q_acc_1_4,q_tmp_1_4); + q_acc_1_5 = _mm256_add_pd(q_acc_1_5,q_tmp_1_5); + q_acc_1_6 = _mm256_add_pd(q_acc_1_6,q_tmp_1_6); + q_acc_1_7 = _mm256_add_pd(q_acc_1_7,q_tmp_1_7); + q_acc_1_8 = _mm256_add_pd(q_acc_1_8,q_tmp_1_8); + q_acc_1_9 = _mm256_add_pd(q_acc_1_9,q_tmp_1_9); + q_acc_1_10 = _mm256_add_pd(q_acc_1_10,q_tmp_1_10); + q_acc_1_11 = _mm256_add_pd(q_acc_1_11,q_tmp_1_11); + q_acc_1_12 = _mm256_add_pd(q_acc_1_12,q_tmp_1_12); + q_acc_1_13 = _mm256_add_pd(q_acc_1_13,q_tmp_1_13); + q_acc_1_14 = _mm256_add_pd(q_acc_1_14,q_tmp_1_14); + q_acc_1_15 = _mm256_add_pd(q_acc_1_15,q_tmp_1_15); + q_acc_1_16 = _mm256_add_pd(q_acc_1_16,q_tmp_1_16); + q_acc_1_17 = _mm256_add_pd(q_acc_1_17,q_tmp_1_17); + q_acc_1_18 = _mm256_add_pd(q_acc_1_18,q_tmp_1_18); + q_acc_1_19 = _mm256_add_pd(q_acc_1_19,q_tmp_1_19); + q_acc_1_20 = _mm256_add_pd(q_acc_1_20,q_tmp_1_20); + q_acc_1_21 = _mm256_add_pd(q_acc_1_21,q_tmp_1_21); + q_acc_1_22 = _mm256_add_pd(q_acc_1_22,q_tmp_1_22); + q_acc_1_23 = _mm256_add_pd(q_acc_1_23,q_tmp_1_23); + q_acc_1_24 = _mm256_add_pd(q_acc_1_24,q_tmp_1_24); + q_acc_1_25 = _mm256_add_pd(q_acc_1_25,q_tmp_1_25); + q_acc_1_26 = _mm256_add_pd(q_acc_1_26,q_tmp_1_26); + q_acc_1_27 = _mm256_add_pd(q_acc_1_27,q_tmp_1_27); + q_acc_1_28 = _mm256_add_pd(q_acc_1_28,q_tmp_1_28); + q_acc_1_29 = _mm256_add_pd(q_acc_1_29,q_tmp_1_29); + q_acc_2_0 = _mm256_add_pd(q_acc_2_0,q_tmp_2_0); + q_acc_2_1 = _mm256_add_pd(q_acc_2_1,q_tmp_2_1); + q_acc_2_2 = _mm256_add_pd(q_acc_2_2,q_tmp_2_2); + q_acc_2_3 = _mm256_add_pd(q_acc_2_3,q_tmp_2_3); + q_acc_2_4 = _mm256_add_pd(q_acc_2_4,q_tmp_2_4); + q_acc_2_5 = _mm256_add_pd(q_acc_2_5,q_tmp_2_5); + q_acc_2_6 = _mm256_add_pd(q_acc_2_6,q_tmp_2_6); + q_acc_2_7 = _mm256_add_pd(q_acc_2_7,q_tmp_2_7); + q_acc_2_8 = _mm256_add_pd(q_acc_2_8,q_tmp_2_8); + q_acc_2_9 = _mm256_add_pd(q_acc_2_9,q_tmp_2_9); + q_acc_2_10 = _mm256_add_pd(q_acc_2_10,q_tmp_2_10); + q_acc_2_11 = _mm256_add_pd(q_acc_2_11,q_tmp_2_11); + q_acc_2_12 = _mm256_add_pd(q_acc_2_12,q_tmp_2_12); + q_acc_2_13 = _mm256_add_pd(q_acc_2_13,q_tmp_2_13); + q_acc_2_14 = _mm256_add_pd(q_acc_2_14,q_tmp_2_14); + q_acc_2_15 = _mm256_add_pd(q_acc_2_15,q_tmp_2_15); + q_acc_2_16 = _mm256_add_pd(q_acc_2_16,q_tmp_2_16); + q_acc_2_17 = _mm256_add_pd(q_acc_2_17,q_tmp_2_17); + q_acc_2_18 = _mm256_add_pd(q_acc_2_18,q_tmp_2_18); + q_acc_2_19 = _mm256_add_pd(q_acc_2_19,q_tmp_2_19); + q_acc_2_20 = _mm256_add_pd(q_acc_2_20,q_tmp_2_20); + q_acc_2_21 = _mm256_add_pd(q_acc_2_21,q_tmp_2_21); + q_acc_2_22 = _mm256_add_pd(q_acc_2_22,q_tmp_2_22); + q_acc_2_23 = _mm256_add_pd(q_acc_2_23,q_tmp_2_23); + q_acc_2_24 = _mm256_add_pd(q_acc_2_24,q_tmp_2_24); + q_acc_2_25 = _mm256_add_pd(q_acc_2_25,q_tmp_2_25); + q_acc_2_26 = _mm256_add_pd(q_acc_2_26,q_tmp_2_26); + q_acc_2_27 = _mm256_add_pd(q_acc_2_27,q_tmp_2_27); + q_acc_2_28 = _mm256_add_pd(q_acc_2_28,q_tmp_2_28); + q_acc_2_29 = _mm256_add_pd(q_acc_2_29,q_tmp_2_29); + q_acc_3_0 = _mm256_add_pd(q_acc_3_0,q_tmp_3_0); + q_acc_3_1 = _mm256_add_pd(q_acc_3_1,q_tmp_3_1); + q_acc_3_2 = _mm256_add_pd(q_acc_3_2,q_tmp_3_2); + q_acc_3_3 = _mm256_add_pd(q_acc_3_3,q_tmp_3_3); + q_acc_3_4 = _mm256_add_pd(q_acc_3_4,q_tmp_3_4); + q_acc_3_5 = _mm256_add_pd(q_acc_3_5,q_tmp_3_5); + q_acc_3_6 = _mm256_add_pd(q_acc_3_6,q_tmp_3_6); + q_acc_3_7 = _mm256_add_pd(q_acc_3_7,q_tmp_3_7); + q_acc_3_8 = _mm256_add_pd(q_acc_3_8,q_tmp_3_8); + q_acc_3_9 = _mm256_add_pd(q_acc_3_9,q_tmp_3_9); + q_acc_3_10 = _mm256_add_pd(q_acc_3_10,q_tmp_3_10); + q_acc_3_11 = _mm256_add_pd(q_acc_3_11,q_tmp_3_11); + q_acc_3_12 = _mm256_add_pd(q_acc_3_12,q_tmp_3_12); + q_acc_3_13 = _mm256_add_pd(q_acc_3_13,q_tmp_3_13); + q_acc_3_14 = _mm256_add_pd(q_acc_3_14,q_tmp_3_14); + q_acc_3_15 = _mm256_add_pd(q_acc_3_15,q_tmp_3_15); + q_acc_3_16 = _mm256_add_pd(q_acc_3_16,q_tmp_3_16); + q_acc_3_17 = _mm256_add_pd(q_acc_3_17,q_tmp_3_17); + q_acc_3_18 = _mm256_add_pd(q_acc_3_18,q_tmp_3_18); + q_acc_3_19 = _mm256_add_pd(q_acc_3_19,q_tmp_3_19); + q_acc_3_20 = _mm256_add_pd(q_acc_3_20,q_tmp_3_20); + q_acc_3_21 = _mm256_add_pd(q_acc_3_21,q_tmp_3_21); + q_acc_3_22 = _mm256_add_pd(q_acc_3_22,q_tmp_3_22); + q_acc_3_23 = _mm256_add_pd(q_acc_3_23,q_tmp_3_23); + q_acc_3_24 = _mm256_add_pd(q_acc_3_24,q_tmp_3_24); + q_acc_3_25 = _mm256_add_pd(q_acc_3_25,q_tmp_3_25); + q_acc_3_26 = _mm256_add_pd(q_acc_3_26,q_tmp_3_26); + q_acc_3_27 = _mm256_add_pd(q_acc_3_27,q_tmp_3_27); + q_acc_3_28 = _mm256_add_pd(q_acc_3_28,q_tmp_3_28); + q_acc_3_29 = _mm256_add_pd(q_acc_3_29,q_tmp_3_29); + } + const __m256d elMatVec_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_0_0,src_dof_0),_mm256_mul_pd(q_acc_0_1,src_dof_1)),_mm256_mul_pd(q_acc_0_10,src_dof_10)),_mm256_mul_pd(q_acc_0_11,src_dof_11)),_mm256_mul_pd(q_acc_0_12,src_dof_12)),_mm256_mul_pd(q_acc_0_13,src_dof_13)),_mm256_mul_pd(q_acc_0_14,src_dof_14)),_mm256_mul_pd(q_acc_0_15,src_dof_15)),_mm256_mul_pd(q_acc_0_16,src_dof_16)),_mm256_mul_pd(q_acc_0_17,src_dof_17)),_mm256_mul_pd(q_acc_0_18,src_dof_18)),_mm256_mul_pd(q_acc_0_19,src_dof_19)),_mm256_mul_pd(q_acc_0_2,src_dof_2)),_mm256_mul_pd(q_acc_0_20,src_dof_20)),_mm256_mul_pd(q_acc_0_21,src_dof_21)),_mm256_mul_pd(q_acc_0_22,src_dof_22)),_mm256_mul_pd(q_acc_0_23,src_dof_23)),_mm256_mul_pd(q_acc_0_24,src_dof_24)),_mm256_mul_pd(q_acc_0_25,src_dof_25)),_mm256_mul_pd(q_acc_0_26,src_dof_26)),_mm256_mul_pd(q_acc_0_27,src_dof_27)),_mm256_mul_pd(q_acc_0_28,src_dof_28)),_mm256_mul_pd(q_acc_0_29,src_dof_29)),_mm256_mul_pd(q_acc_0_3,src_dof_3)),_mm256_mul_pd(q_acc_0_4,src_dof_4)),_mm256_mul_pd(q_acc_0_5,src_dof_5)),_mm256_mul_pd(q_acc_0_6,src_dof_6)),_mm256_mul_pd(q_acc_0_7,src_dof_7)),_mm256_mul_pd(q_acc_0_8,src_dof_8)),_mm256_mul_pd(q_acc_0_9,src_dof_9)); + const __m256d elMatVec_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_1_0,src_dof_0),_mm256_mul_pd(q_acc_1_1,src_dof_1)),_mm256_mul_pd(q_acc_1_10,src_dof_10)),_mm256_mul_pd(q_acc_1_11,src_dof_11)),_mm256_mul_pd(q_acc_1_12,src_dof_12)),_mm256_mul_pd(q_acc_1_13,src_dof_13)),_mm256_mul_pd(q_acc_1_14,src_dof_14)),_mm256_mul_pd(q_acc_1_15,src_dof_15)),_mm256_mul_pd(q_acc_1_16,src_dof_16)),_mm256_mul_pd(q_acc_1_17,src_dof_17)),_mm256_mul_pd(q_acc_1_18,src_dof_18)),_mm256_mul_pd(q_acc_1_19,src_dof_19)),_mm256_mul_pd(q_acc_1_2,src_dof_2)),_mm256_mul_pd(q_acc_1_20,src_dof_20)),_mm256_mul_pd(q_acc_1_21,src_dof_21)),_mm256_mul_pd(q_acc_1_22,src_dof_22)),_mm256_mul_pd(q_acc_1_23,src_dof_23)),_mm256_mul_pd(q_acc_1_24,src_dof_24)),_mm256_mul_pd(q_acc_1_25,src_dof_25)),_mm256_mul_pd(q_acc_1_26,src_dof_26)),_mm256_mul_pd(q_acc_1_27,src_dof_27)),_mm256_mul_pd(q_acc_1_28,src_dof_28)),_mm256_mul_pd(q_acc_1_29,src_dof_29)),_mm256_mul_pd(q_acc_1_3,src_dof_3)),_mm256_mul_pd(q_acc_1_4,src_dof_4)),_mm256_mul_pd(q_acc_1_5,src_dof_5)),_mm256_mul_pd(q_acc_1_6,src_dof_6)),_mm256_mul_pd(q_acc_1_7,src_dof_7)),_mm256_mul_pd(q_acc_1_8,src_dof_8)),_mm256_mul_pd(q_acc_1_9,src_dof_9)); + const __m256d elMatVec_2 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_2_0,src_dof_0),_mm256_mul_pd(q_acc_2_1,src_dof_1)),_mm256_mul_pd(q_acc_2_10,src_dof_10)),_mm256_mul_pd(q_acc_2_11,src_dof_11)),_mm256_mul_pd(q_acc_2_12,src_dof_12)),_mm256_mul_pd(q_acc_2_13,src_dof_13)),_mm256_mul_pd(q_acc_2_14,src_dof_14)),_mm256_mul_pd(q_acc_2_15,src_dof_15)),_mm256_mul_pd(q_acc_2_16,src_dof_16)),_mm256_mul_pd(q_acc_2_17,src_dof_17)),_mm256_mul_pd(q_acc_2_18,src_dof_18)),_mm256_mul_pd(q_acc_2_19,src_dof_19)),_mm256_mul_pd(q_acc_2_2,src_dof_2)),_mm256_mul_pd(q_acc_2_20,src_dof_20)),_mm256_mul_pd(q_acc_2_21,src_dof_21)),_mm256_mul_pd(q_acc_2_22,src_dof_22)),_mm256_mul_pd(q_acc_2_23,src_dof_23)),_mm256_mul_pd(q_acc_2_24,src_dof_24)),_mm256_mul_pd(q_acc_2_25,src_dof_25)),_mm256_mul_pd(q_acc_2_26,src_dof_26)),_mm256_mul_pd(q_acc_2_27,src_dof_27)),_mm256_mul_pd(q_acc_2_28,src_dof_28)),_mm256_mul_pd(q_acc_2_29,src_dof_29)),_mm256_mul_pd(q_acc_2_3,src_dof_3)),_mm256_mul_pd(q_acc_2_4,src_dof_4)),_mm256_mul_pd(q_acc_2_5,src_dof_5)),_mm256_mul_pd(q_acc_2_6,src_dof_6)),_mm256_mul_pd(q_acc_2_7,src_dof_7)),_mm256_mul_pd(q_acc_2_8,src_dof_8)),_mm256_mul_pd(q_acc_2_9,src_dof_9)); + const __m256d elMatVec_3 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_3_0,src_dof_0),_mm256_mul_pd(q_acc_3_1,src_dof_1)),_mm256_mul_pd(q_acc_3_10,src_dof_10)),_mm256_mul_pd(q_acc_3_11,src_dof_11)),_mm256_mul_pd(q_acc_3_12,src_dof_12)),_mm256_mul_pd(q_acc_3_13,src_dof_13)),_mm256_mul_pd(q_acc_3_14,src_dof_14)),_mm256_mul_pd(q_acc_3_15,src_dof_15)),_mm256_mul_pd(q_acc_3_16,src_dof_16)),_mm256_mul_pd(q_acc_3_17,src_dof_17)),_mm256_mul_pd(q_acc_3_18,src_dof_18)),_mm256_mul_pd(q_acc_3_19,src_dof_19)),_mm256_mul_pd(q_acc_3_2,src_dof_2)),_mm256_mul_pd(q_acc_3_20,src_dof_20)),_mm256_mul_pd(q_acc_3_21,src_dof_21)),_mm256_mul_pd(q_acc_3_22,src_dof_22)),_mm256_mul_pd(q_acc_3_23,src_dof_23)),_mm256_mul_pd(q_acc_3_24,src_dof_24)),_mm256_mul_pd(q_acc_3_25,src_dof_25)),_mm256_mul_pd(q_acc_3_26,src_dof_26)),_mm256_mul_pd(q_acc_3_27,src_dof_27)),_mm256_mul_pd(q_acc_3_28,src_dof_28)),_mm256_mul_pd(q_acc_3_29,src_dof_29)),_mm256_mul_pd(q_acc_3_3,src_dof_3)),_mm256_mul_pd(q_acc_3_4,src_dof_4)),_mm256_mul_pd(q_acc_3_5,src_dof_5)),_mm256_mul_pd(q_acc_3_6,src_dof_6)),_mm256_mul_pd(q_acc_3_7,src_dof_7)),_mm256_mul_pd(q_acc_3_8,src_dof_8)),_mm256_mul_pd(q_acc_3_9,src_dof_9)); + _mm256_storeu_pd(&_data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1],_mm256_add_pd(elMatVec_0,_mm256_loadu_pd(& _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]))); + _mm256_storeu_pd(&_data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))],_mm256_add_pd(elMatVec_1,_mm256_loadu_pd(& _data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]))); + _mm256_storeu_pd(&_data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))],_mm256_add_pd(elMatVec_2,_mm256_loadu_pd(& _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]))); + _mm256_storeu_pd(&_data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1],_mm256_add_pd(elMatVec_3,_mm256_loadu_pd(& _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]))); + } + for (int64_t ctr_0 = (int64_t)((-ctr_1 - ctr_2 + micro_edges_per_macro_edge - 1) / (4)) * (4); ctr_0 < -ctr_1 - ctr_2 + micro_edges_per_macro_edge - 1; ctr_0 += 1) + { + + const int64_t phantom_ctr_0 = ctr_0; + real_t _data_float_loop_ctr_array_dim_0[4]; + _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0; + _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1; + _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2; + _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3; + real_t _data_float_loop_ctr_array_dim_1[4]; + _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1; + real_t _data_float_loop_ctr_array_dim_2[4]; + _data_float_loop_ctr_array_dim_2[0] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[1] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[2] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[3] = (real_t) ctr_2; + + const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_0_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_2_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_3_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_3_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_3_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t src_dof_0 = _data_src_vertex_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]; + const real_t src_dof_1 = _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]; + const real_t src_dof_2 = _data_src_vertex_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]; + const real_t src_dof_3 = _data_src_vertex_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]; + const real_t src_dof_4 = _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]; + const real_t src_dof_5 = _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge - 1)*(micro_edges_per_macro_edge + 1)) / (6)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]; + const real_t src_dof_6 = _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t src_dof_7 = _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]; + const real_t src_dof_8 = _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 5*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t src_dof_9 = _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t src_dof_10 = _data_src_vertex_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]; + const real_t src_dof_11 = _data_src_vertex_1[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]; + const real_t src_dof_12 = _data_src_vertex_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]; + const real_t src_dof_13 = _data_src_vertex_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]; + const real_t src_dof_14 = _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]; + const real_t src_dof_15 = _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge - 1)*(micro_edges_per_macro_edge + 1)) / (6)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]; + const real_t src_dof_16 = _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t src_dof_17 = _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]; + const real_t src_dof_18 = _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 5*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t src_dof_19 = _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t src_dof_20 = _data_src_vertex_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]; + const real_t src_dof_21 = _data_src_vertex_2[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]; + const real_t src_dof_22 = _data_src_vertex_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]; + const real_t src_dof_23 = _data_src_vertex_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]; + const real_t src_dof_24 = _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]; + const real_t src_dof_25 = _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge - 1)*(micro_edges_per_macro_edge + 1)) / (6)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]; + const real_t src_dof_26 = _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t src_dof_27 = _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]; + const real_t src_dof_28 = _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 5*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t src_dof_29 = _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t rho_dof_0 = _data_rhoVertex[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]; + const real_t rho_dof_1 = _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]; + const real_t rho_dof_2 = _data_rhoVertex[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]; + const real_t rho_dof_3 = _data_rhoVertex[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]; + const real_t rho_dof_4 = _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]; + const real_t rho_dof_5 = _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge - 1)*(micro_edges_per_macro_edge + 1)) / (6)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]; + const real_t rho_dof_6 = _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t rho_dof_7 = _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]; + const real_t rho_dof_8 = _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 5*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t rho_dof_9 = _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + real_t q_acc_0_0 = 0.0; + real_t q_acc_0_1 = 0.0; + real_t q_acc_0_2 = 0.0; + real_t q_acc_0_3 = 0.0; + real_t q_acc_0_4 = 0.0; + real_t q_acc_0_5 = 0.0; + real_t q_acc_0_6 = 0.0; + real_t q_acc_0_7 = 0.0; + real_t q_acc_0_8 = 0.0; + real_t q_acc_0_9 = 0.0; + real_t q_acc_0_10 = 0.0; + real_t q_acc_0_11 = 0.0; + real_t q_acc_0_12 = 0.0; + real_t q_acc_0_13 = 0.0; + real_t q_acc_0_14 = 0.0; + real_t q_acc_0_15 = 0.0; + real_t q_acc_0_16 = 0.0; + real_t q_acc_0_17 = 0.0; + real_t q_acc_0_18 = 0.0; + real_t q_acc_0_19 = 0.0; + real_t q_acc_0_20 = 0.0; + real_t q_acc_0_21 = 0.0; + real_t q_acc_0_22 = 0.0; + real_t q_acc_0_23 = 0.0; + real_t q_acc_0_24 = 0.0; + real_t q_acc_0_25 = 0.0; + real_t q_acc_0_26 = 0.0; + real_t q_acc_0_27 = 0.0; + real_t q_acc_0_28 = 0.0; + real_t q_acc_0_29 = 0.0; + real_t q_acc_1_0 = 0.0; + real_t q_acc_1_1 = 0.0; + real_t q_acc_1_2 = 0.0; + real_t q_acc_1_3 = 0.0; + real_t q_acc_1_4 = 0.0; + real_t q_acc_1_5 = 0.0; + real_t q_acc_1_6 = 0.0; + real_t q_acc_1_7 = 0.0; + real_t q_acc_1_8 = 0.0; + real_t q_acc_1_9 = 0.0; + real_t q_acc_1_10 = 0.0; + real_t q_acc_1_11 = 0.0; + real_t q_acc_1_12 = 0.0; + real_t q_acc_1_13 = 0.0; + real_t q_acc_1_14 = 0.0; + real_t q_acc_1_15 = 0.0; + real_t q_acc_1_16 = 0.0; + real_t q_acc_1_17 = 0.0; + real_t q_acc_1_18 = 0.0; + real_t q_acc_1_19 = 0.0; + real_t q_acc_1_20 = 0.0; + real_t q_acc_1_21 = 0.0; + real_t q_acc_1_22 = 0.0; + real_t q_acc_1_23 = 0.0; + real_t q_acc_1_24 = 0.0; + real_t q_acc_1_25 = 0.0; + real_t q_acc_1_26 = 0.0; + real_t q_acc_1_27 = 0.0; + real_t q_acc_1_28 = 0.0; + real_t q_acc_1_29 = 0.0; + real_t q_acc_2_0 = 0.0; + real_t q_acc_2_1 = 0.0; + real_t q_acc_2_2 = 0.0; + real_t q_acc_2_3 = 0.0; + real_t q_acc_2_4 = 0.0; + real_t q_acc_2_5 = 0.0; + real_t q_acc_2_6 = 0.0; + real_t q_acc_2_7 = 0.0; + real_t q_acc_2_8 = 0.0; + real_t q_acc_2_9 = 0.0; + real_t q_acc_2_10 = 0.0; + real_t q_acc_2_11 = 0.0; + real_t q_acc_2_12 = 0.0; + real_t q_acc_2_13 = 0.0; + real_t q_acc_2_14 = 0.0; + real_t q_acc_2_15 = 0.0; + real_t q_acc_2_16 = 0.0; + real_t q_acc_2_17 = 0.0; + real_t q_acc_2_18 = 0.0; + real_t q_acc_2_19 = 0.0; + real_t q_acc_2_20 = 0.0; + real_t q_acc_2_21 = 0.0; + real_t q_acc_2_22 = 0.0; + real_t q_acc_2_23 = 0.0; + real_t q_acc_2_24 = 0.0; + real_t q_acc_2_25 = 0.0; + real_t q_acc_2_26 = 0.0; + real_t q_acc_2_27 = 0.0; + real_t q_acc_2_28 = 0.0; + real_t q_acc_2_29 = 0.0; + real_t q_acc_3_0 = 0.0; + real_t q_acc_3_1 = 0.0; + real_t q_acc_3_2 = 0.0; + real_t q_acc_3_3 = 0.0; + real_t q_acc_3_4 = 0.0; + real_t q_acc_3_5 = 0.0; + real_t q_acc_3_6 = 0.0; + real_t q_acc_3_7 = 0.0; + real_t q_acc_3_8 = 0.0; + real_t q_acc_3_9 = 0.0; + real_t q_acc_3_10 = 0.0; + real_t q_acc_3_11 = 0.0; + real_t q_acc_3_12 = 0.0; + real_t q_acc_3_13 = 0.0; + real_t q_acc_3_14 = 0.0; + real_t q_acc_3_15 = 0.0; + real_t q_acc_3_16 = 0.0; + real_t q_acc_3_17 = 0.0; + real_t q_acc_3_18 = 0.0; + real_t q_acc_3_19 = 0.0; + real_t q_acc_3_20 = 0.0; + real_t q_acc_3_21 = 0.0; + real_t q_acc_3_22 = 0.0; + real_t q_acc_3_23 = 0.0; + real_t q_acc_3_24 = 0.0; + real_t q_acc_3_25 = 0.0; + real_t q_acc_3_26 = 0.0; + real_t q_acc_3_27 = 0.0; + real_t q_acc_3_28 = 0.0; + real_t q_acc_3_29 = 0.0; + for (int64_t q = 0; q < 5; q += 1) + { + const real_t tmp_qloop_18 = p_affine_0_0 + (-p_affine_0_0 + p_affine_1_0)*_data_q_p_0[q] + (-p_affine_0_0 + p_affine_2_0)*_data_q_p_1[q] + (-p_affine_0_0 + p_affine_3_0)*_data_q_p_2[q]; + const real_t tmp_qloop_19 = (tmp_qloop_18*tmp_qloop_18); + const real_t tmp_qloop_22 = p_affine_0_2 + (-p_affine_0_2 + p_affine_1_2)*_data_q_p_0[q] + (-p_affine_0_2 + p_affine_2_2)*_data_q_p_1[q] + (-p_affine_0_2 + p_affine_3_2)*_data_q_p_2[q]; + const real_t tmp_qloop_23 = -tmp_qloop_12 - tmp_qloop_22; + const real_t tmp_qloop_24 = -tmp_qloop_1 - tmp_qloop_18; + const real_t tmp_qloop_25 = p_affine_0_1 + (-p_affine_0_1 + p_affine_1_1)*_data_q_p_0[q] + (-p_affine_0_1 + p_affine_2_1)*_data_q_p_1[q] + (-p_affine_0_1 + p_affine_3_1)*_data_q_p_2[q]; + const real_t tmp_qloop_26 = -tmp_qloop_25 - tmp_qloop_8; + const real_t tmp_qloop_27 = tmp_qloop_21*(tmp_qloop_10*tmp_qloop_23*tmp_qloop_9 + tmp_qloop_13*tmp_qloop_24*tmp_qloop_6 - tmp_qloop_14*tmp_qloop_24 - tmp_qloop_15*tmp_qloop_26 + tmp_qloop_2*tmp_qloop_26*tmp_qloop_3 - tmp_qloop_23*tmp_qloop_7); + const real_t tmp_qloop_28 = tmp_qloop_20 - tmp_qloop_27; + const real_t tmp_qloop_29 = (tmp_qloop_25*tmp_qloop_25); + const real_t tmp_qloop_30 = (tmp_qloop_22*tmp_qloop_22); + const real_t tmp_qloop_31 = tmp_qloop_19 + tmp_qloop_29 + tmp_qloop_30; + const real_t tmp_qloop_34 = tmp_qloop_18*tmp_qloop_33; + const real_t tmp_qloop_35 = tmp_qloop_28 + tmp_qloop_34; + const real_t tmp_qloop_36 = tmp_qloop_17*(-tmp_qloop_19*tmp_qloop_28 + tmp_qloop_31*tmp_qloop_35); + const real_t tmp_qloop_37 = pow(tmp_qloop_31, -1.5000000000000000); + const real_t tmp_qloop_38 = tmp_qloop_37*1.0; + const real_t tmp_qloop_41 = tmp_qloop_31*tmp_qloop_40; + const real_t tmp_qloop_42 = -tmp_qloop_25*tmp_qloop_28 - tmp_qloop_41; + const real_t tmp_qloop_43 = tmp_qloop_17*tmp_qloop_38; + const real_t tmp_qloop_44 = tmp_qloop_42*tmp_qloop_43; + const real_t tmp_qloop_47 = -tmp_qloop_28; + const real_t tmp_qloop_48 = tmp_qloop_22*tmp_qloop_47; + const real_t tmp_qloop_49 = tmp_qloop_31*tmp_qloop_46 + tmp_qloop_48; + const real_t tmp_qloop_50 = tmp_qloop_43*tmp_qloop_49; + const real_t tmp_qloop_51 = tmp_qloop_18*tmp_qloop_47 + tmp_qloop_31*tmp_qloop_33; + const real_t tmp_qloop_52 = tmp_qloop_43*tmp_qloop_51; + const real_t tmp_qloop_54 = tmp_qloop_25*tmp_qloop_53 + tmp_qloop_28; + const real_t tmp_qloop_55 = tmp_qloop_29*tmp_qloop_47 + tmp_qloop_31*tmp_qloop_54; + const real_t tmp_qloop_56 = tmp_qloop_22*tmp_qloop_46; + const real_t tmp_qloop_57 = tmp_qloop_28 + tmp_qloop_56; + const real_t tmp_qloop_58 = tmp_qloop_30*tmp_qloop_47 + tmp_qloop_31*tmp_qloop_57; + const real_t tmp_qloop_66 = pow(tmp_qloop_31, -2.5000000000000000)*3.0; + const real_t tmp_qloop_67 = tmp_qloop_36*tmp_qloop_66; + const real_t tmp_qloop_69 = tmp_qloop_18*2.0; + const real_t tmp_qloop_70 = -tmp_qloop_41; + const real_t tmp_qloop_71 = tmp_qloop_35*2.0; + const real_t tmp_qloop_73 = -tmp_qloop_31*tmp_qloop_72; + const real_t tmp_qloop_74 = -tmp_qloop_20 + tmp_qloop_27; + const real_t tmp_qloop_75 = tmp_qloop_18*tmp_qloop_68 + tmp_qloop_34*2.0 + tmp_qloop_74; + const real_t tmp_qloop_76 = tmp_qloop_17*tmp_qloop_66; + const real_t tmp_qloop_77 = tmp_qloop_51*tmp_qloop_76; + const real_t tmp_qloop_78 = tmp_qloop_25*tmp_qloop_77; + const real_t tmp_qloop_79 = tmp_qloop_25*2.0; + const real_t tmp_qloop_80 = tmp_qloop_18*tmp_qloop_40 + tmp_qloop_33*tmp_qloop_79; + const real_t tmp_qloop_81 = tmp_qloop_25*tmp_qloop_43; + const real_t tmp_qloop_82 = tmp_qloop_22*2.0; + const real_t tmp_qloop_83 = tmp_qloop_18*tmp_qloop_72 + tmp_qloop_33*tmp_qloop_82; + const real_t tmp_qloop_84 = -tmp_qloop_22*tmp_qloop_78; + const real_t tmp_qloop_85 = tmp_qloop_18*tmp_qloop_22; + const real_t tmp_qloop_86 = tmp_qloop_22*tmp_qloop_43; + const real_t tmp_qloop_87 = tmp_qloop_21*tmp_qloop_25*tmp_qloop_32 - tmp_qloop_40*tmp_qloop_69; + const real_t tmp_qloop_88 = tmp_qloop_18*tmp_qloop_43; + const real_t tmp_qloop_89 = tmp_qloop_42*tmp_qloop_76; + const real_t tmp_qloop_90 = -tmp_qloop_25*tmp_qloop_40 - tmp_qloop_28; + const real_t tmp_qloop_91 = tmp_qloop_25*tmp_qloop_89; + const real_t tmp_qloop_92 = tmp_qloop_25*tmp_qloop_72 - tmp_qloop_40*tmp_qloop_82; + const real_t tmp_qloop_93 = tmp_qloop_85*tmp_qloop_89; + const real_t tmp_qloop_94 = -tmp_qloop_31*tmp_qloop_68; + const real_t tmp_qloop_95 = tmp_qloop_55*tmp_qloop_76; + const real_t tmp_qloop_96 = tmp_qloop_22*tmp_qloop_68 + tmp_qloop_46*tmp_qloop_69; + const real_t tmp_qloop_97 = tmp_qloop_49*tmp_qloop_76; + const real_t tmp_qloop_98 = tmp_qloop_22*tmp_qloop_40 + tmp_qloop_46*tmp_qloop_79; + const real_t tmp_qloop_99 = tmp_qloop_25*tmp_qloop_97; + const real_t tmp_qloop_100 = tmp_qloop_18*tmp_qloop_99; + const real_t tmp_qloop_101 = tmp_qloop_22*tmp_qloop_72 + tmp_qloop_56*2.0 + tmp_qloop_74; + const real_t tmp_qloop_102 = tmp_qloop_58*tmp_qloop_76; + const real_t tmp_qloop_103 = (_data_q_p_0[q]*_data_q_p_0[q]); + const real_t tmp_qloop_104 = tmp_qloop_103*2.0; + const real_t tmp_qloop_105 = (_data_q_p_1[q]*_data_q_p_1[q]); + const real_t tmp_qloop_106 = tmp_qloop_105*2.0; + const real_t tmp_qloop_107 = (_data_q_p_2[q]*_data_q_p_2[q]); + const real_t tmp_qloop_108 = tmp_qloop_107*2.0; + const real_t tmp_qloop_109 = 4.0*_data_q_p_1[q]; + const real_t tmp_qloop_110 = tmp_qloop_109*_data_q_p_2[q]; + const real_t tmp_qloop_111 = 4.0*_data_q_p_0[q]; + const real_t tmp_qloop_112 = tmp_qloop_111*_data_q_p_1[q]; + const real_t tmp_qloop_113 = tmp_qloop_111*_data_q_p_2[q]; + const real_t tmp_qloop_114 = tmp_qloop_112 + tmp_qloop_113; + const real_t tmp_qloop_115 = tmp_qloop_104 + tmp_qloop_106 + tmp_qloop_108 + tmp_qloop_110 + tmp_qloop_114 - 3.0*_data_q_p_0[q] - 3.0*_data_q_p_1[q] - 3.0*_data_q_p_2[q] + 1.0; + const real_t tmp_qloop_116 = 4.0*_data_q_p_2[q]; + const real_t tmp_qloop_117 = tmp_qloop_109 + tmp_qloop_116; + const real_t tmp_qloop_118 = -rho_dof_8*tmp_qloop_109; + const real_t tmp_qloop_119 = rho_dof_0*(tmp_qloop_111 + tmp_qloop_117 - 3.0); + const real_t tmp_qloop_120 = -rho_dof_7*tmp_qloop_116 + tmp_qloop_119; + const real_t tmp_qloop_121 = rho_dof_1*(tmp_qloop_111 - 1.0) + rho_dof_5*tmp_qloop_116 + rho_dof_6*tmp_qloop_109 + rho_dof_9*(-tmp_qloop_117 - 8.0*_data_q_p_0[q] + 4.0) + tmp_qloop_118 + tmp_qloop_120; + const real_t tmp_qloop_122 = tmp_qloop_111 - 4.0; + const real_t tmp_qloop_123 = -rho_dof_9*tmp_qloop_111; + const real_t tmp_qloop_124 = rho_dof_2*(tmp_qloop_109 - 1.0) + rho_dof_4*tmp_qloop_116 + rho_dof_6*tmp_qloop_111 + rho_dof_8*(-tmp_qloop_116 - tmp_qloop_122 - 8.0*_data_q_p_1[q]) + tmp_qloop_120 + tmp_qloop_123; + const real_t tmp_qloop_125 = rho_dof_3*(tmp_qloop_116 - 1.0) + rho_dof_4*tmp_qloop_109 + rho_dof_5*tmp_qloop_111 + rho_dof_7*(-tmp_qloop_109 - tmp_qloop_122 - 8.0*_data_q_p_2[q]) + tmp_qloop_118 + tmp_qloop_119 + tmp_qloop_123; + const real_t tmp_qloop_127 = tmp_qloop_104 - _data_q_p_0[q]; + const real_t tmp_qloop_128 = tmp_qloop_106 - _data_q_p_1[q]; + const real_t tmp_qloop_129 = tmp_qloop_108 - _data_q_p_2[q]; + const real_t tmp_qloop_130 = tmp_qloop_107*-4.0 - tmp_qloop_110 - tmp_qloop_113 + tmp_qloop_116; + const real_t tmp_qloop_131 = tmp_qloop_105*4.0; + const real_t tmp_qloop_132 = tmp_qloop_109 - tmp_qloop_110 - tmp_qloop_112 - tmp_qloop_131; + const real_t tmp_qloop_133 = tmp_qloop_103*4.0; + const real_t tmp_qloop_134 = tmp_qloop_111 - tmp_qloop_114 - tmp_qloop_133; + const real_t tmp_qloop_158 = tmp_qloop_105*tmp_qloop_111; + const real_t jac_blending_0_0 = tmp_qloop_36*tmp_qloop_38; + const real_t jac_blending_0_1 = tmp_qloop_18*tmp_qloop_44; + const real_t jac_blending_0_2 = tmp_qloop_18*tmp_qloop_50; + const real_t jac_blending_1_0 = tmp_qloop_25*tmp_qloop_52; + const real_t jac_blending_1_1 = tmp_qloop_43*tmp_qloop_55; + const real_t tmp_qloop_63 = jac_blending_0_2*jac_blending_1_1; + const real_t jac_blending_1_2 = tmp_qloop_25*tmp_qloop_50; + const real_t tmp_qloop_60 = jac_blending_0_1*jac_blending_1_2; + const real_t jac_blending_2_0 = tmp_qloop_22*tmp_qloop_52; + const real_t jac_blending_2_1 = tmp_qloop_22*tmp_qloop_44; + const real_t tmp_qloop_61 = jac_blending_1_2*jac_blending_2_1; + const real_t jac_blending_2_2 = tmp_qloop_43*tmp_qloop_58; + const real_t tmp_qloop_59 = jac_blending_1_1*jac_blending_2_2; + const real_t tmp_qloop_62 = jac_blending_0_1*jac_blending_2_2; + const real_t tmp_qloop_64 = jac_blending_0_0*tmp_qloop_59 - jac_blending_0_0*tmp_qloop_61 + jac_blending_0_2*jac_blending_1_0*jac_blending_2_1 - jac_blending_1_0*tmp_qloop_62 + jac_blending_2_0*tmp_qloop_60 - jac_blending_2_0*tmp_qloop_63; + const real_t tmp_qloop_65 = 1.0 / (tmp_qloop_64); + const real_t abs_det_jac_blending = tmp_qloop_64; + const real_t tmp_qloop_135 = abs_det_jac_affine_GREEN_UP*abs_det_jac_blending*1.0 / (rho_dof_0*tmp_qloop_115 + rho_dof_1*tmp_qloop_127 + rho_dof_2*tmp_qloop_128 + rho_dof_3*tmp_qloop_129 + rho_dof_4*tmp_qloop_110 + rho_dof_5*tmp_qloop_113 + rho_dof_6*tmp_qloop_112 + rho_dof_7*tmp_qloop_130 + rho_dof_8*tmp_qloop_132 + rho_dof_9*tmp_qloop_134)*_data_q_w[q]; + const real_t tmp_qloop_136 = tmp_qloop_135*(1.0 - _data_q_p_0[q] - _data_q_p_1[q] - _data_q_p_2[q]); + const real_t tmp_qloop_142 = tmp_qloop_135*_data_q_p_0[q]; + const real_t tmp_qloop_144 = tmp_qloop_135*_data_q_p_2[q]; + const real_t tmp_qloop_147 = tmp_qloop_135*_data_q_p_1[q]; + const real_t tmp_qloop_159 = tmp_qloop_135*tmp_qloop_158; + const real_t tmp_qloop_161 = tmp_qloop_107*tmp_qloop_135; + const real_t jac_blending_inv_0_0 = tmp_qloop_65*(tmp_qloop_59 - tmp_qloop_61); + const real_t jac_blending_inv_0_1 = tmp_qloop_65*(jac_blending_0_2*jac_blending_2_1 - tmp_qloop_62); + const real_t jac_blending_inv_0_2 = tmp_qloop_65*(tmp_qloop_60 - tmp_qloop_63); + const real_t jac_blending_inv_1_0 = tmp_qloop_65*(-jac_blending_1_0*jac_blending_2_2 + jac_blending_1_2*jac_blending_2_0); + const real_t jac_blending_inv_1_1 = tmp_qloop_65*(jac_blending_0_0*jac_blending_2_2 - jac_blending_0_2*jac_blending_2_0); + const real_t jac_blending_inv_1_2 = tmp_qloop_65*(-jac_blending_0_0*jac_blending_1_2 + jac_blending_0_2*jac_blending_1_0); + const real_t jac_blending_inv_2_0 = tmp_qloop_65*(jac_blending_1_0*jac_blending_2_1 - jac_blending_1_1*jac_blending_2_0); + const real_t tmp_qloop_126 = tmp_qloop_121*(jac_affine_inv_0_0_GREEN_UP*jac_blending_inv_0_0 + jac_affine_inv_0_1_GREEN_UP*jac_blending_inv_1_0 + jac_affine_inv_0_2_GREEN_UP*jac_blending_inv_2_0) + tmp_qloop_124*(jac_affine_inv_1_0_GREEN_UP*jac_blending_inv_0_0 + jac_affine_inv_1_1_GREEN_UP*jac_blending_inv_1_0 + jac_affine_inv_1_2_GREEN_UP*jac_blending_inv_2_0) + tmp_qloop_125*(jac_affine_inv_2_0_GREEN_UP*jac_blending_inv_0_0 + jac_affine_inv_2_1_GREEN_UP*jac_blending_inv_1_0 + jac_affine_inv_2_2_GREEN_UP*jac_blending_inv_2_0); + const real_t tmp_qloop_137 = tmp_qloop_126*tmp_qloop_136; + const real_t tmp_qloop_143 = tmp_qloop_126*tmp_qloop_142; + const real_t tmp_qloop_145 = tmp_qloop_126*tmp_qloop_144; + const real_t tmp_qloop_146 = tmp_qloop_112*tmp_qloop_145; + const real_t tmp_qloop_148 = tmp_qloop_126*tmp_qloop_147; + const real_t tmp_qloop_157 = tmp_qloop_126*tmp_qloop_135; + const real_t tmp_qloop_160 = tmp_qloop_107*tmp_qloop_157; + const real_t jac_blending_inv_2_1 = tmp_qloop_65*(-jac_blending_0_0*jac_blending_2_1 + jac_blending_0_1*jac_blending_2_0); + const real_t tmp_qloop_138 = tmp_qloop_121*(jac_affine_inv_0_0_GREEN_UP*jac_blending_inv_0_1 + jac_affine_inv_0_1_GREEN_UP*jac_blending_inv_1_1 + jac_affine_inv_0_2_GREEN_UP*jac_blending_inv_2_1) + tmp_qloop_124*(jac_affine_inv_1_0_GREEN_UP*jac_blending_inv_0_1 + jac_affine_inv_1_1_GREEN_UP*jac_blending_inv_1_1 + jac_affine_inv_1_2_GREEN_UP*jac_blending_inv_2_1) + tmp_qloop_125*(jac_affine_inv_2_0_GREEN_UP*jac_blending_inv_0_1 + jac_affine_inv_2_1_GREEN_UP*jac_blending_inv_1_1 + jac_affine_inv_2_2_GREEN_UP*jac_blending_inv_2_1); + const real_t tmp_qloop_139 = tmp_qloop_136*tmp_qloop_138; + const real_t tmp_qloop_149 = tmp_qloop_138*tmp_qloop_142; + const real_t tmp_qloop_150 = tmp_qloop_138*tmp_qloop_144; + const real_t tmp_qloop_151 = tmp_qloop_112*tmp_qloop_150; + const real_t tmp_qloop_152 = tmp_qloop_138*tmp_qloop_147; + const real_t tmp_qloop_162 = tmp_qloop_138*tmp_qloop_161; + const real_t jac_blending_inv_2_2 = tmp_qloop_65*(jac_blending_0_0*jac_blending_1_1 - jac_blending_0_1*jac_blending_1_0); + const real_t tmp_qloop_140 = tmp_qloop_121*(jac_affine_inv_0_0_GREEN_UP*jac_blending_inv_0_2 + jac_affine_inv_0_1_GREEN_UP*jac_blending_inv_1_2 + jac_affine_inv_0_2_GREEN_UP*jac_blending_inv_2_2) + tmp_qloop_124*(jac_affine_inv_1_0_GREEN_UP*jac_blending_inv_0_2 + jac_affine_inv_1_1_GREEN_UP*jac_blending_inv_1_2 + jac_affine_inv_1_2_GREEN_UP*jac_blending_inv_2_2) + tmp_qloop_125*(jac_affine_inv_2_0_GREEN_UP*jac_blending_inv_0_2 + jac_affine_inv_2_1_GREEN_UP*jac_blending_inv_1_2 + jac_affine_inv_2_2_GREEN_UP*jac_blending_inv_2_2); + const real_t tmp_qloop_141 = tmp_qloop_136*tmp_qloop_140; + const real_t tmp_qloop_153 = tmp_qloop_140*tmp_qloop_142; + const real_t tmp_qloop_154 = tmp_qloop_140*tmp_qloop_144; + const real_t tmp_qloop_155 = tmp_qloop_112*tmp_qloop_154; + const real_t tmp_qloop_156 = tmp_qloop_140*tmp_qloop_147; + const real_t tmp_qloop_163 = tmp_qloop_140*tmp_qloop_161; + const real_t hessian_blending_0_0_0 = tmp_qloop_17*tmp_qloop_37*(tmp_qloop_19*tmp_qloop_68 - tmp_qloop_28*tmp_qloop_69 + tmp_qloop_31*(tmp_qloop_33 - tmp_qloop_68) + tmp_qloop_35*tmp_qloop_69)*1.0 - tmp_qloop_18*tmp_qloop_67; + const real_t hessian_blending_1_0_0 = tmp_qloop_17*tmp_qloop_37*(tmp_qloop_19*tmp_qloop_40 + tmp_qloop_25*tmp_qloop_71 + tmp_qloop_70)*1.0 - tmp_qloop_25*tmp_qloop_67; + const real_t hessian_blending_2_0_0 = tmp_qloop_17*tmp_qloop_37*(tmp_qloop_19*tmp_qloop_72 + tmp_qloop_22*tmp_qloop_71 + tmp_qloop_73)*1.0 - tmp_qloop_22*tmp_qloop_67; + const real_t hessian_blending_0_0_1 = tmp_qloop_17*tmp_qloop_25*tmp_qloop_37*tmp_qloop_75*1.0 - tmp_qloop_18*tmp_qloop_78; + const real_t hessian_blending_1_0_1 = -tmp_qloop_29*tmp_qloop_77 + tmp_qloop_52 + tmp_qloop_80*tmp_qloop_81; + const real_t hessian_blending_2_0_1 = tmp_qloop_81*tmp_qloop_83 + tmp_qloop_84; + const real_t hessian_blending_0_0_2 = tmp_qloop_17*tmp_qloop_22*tmp_qloop_37*tmp_qloop_75*1.0 - tmp_qloop_77*tmp_qloop_85; + const real_t hessian_blending_1_0_2 = tmp_qloop_80*tmp_qloop_86 + tmp_qloop_84; + const real_t hessian_blending_2_0_2 = -tmp_qloop_30*tmp_qloop_77 + tmp_qloop_52 + tmp_qloop_83*tmp_qloop_86; + const real_t hessian_blending_0_1_0 = -tmp_qloop_19*tmp_qloop_89 + tmp_qloop_44 + tmp_qloop_87*tmp_qloop_88; + const real_t hessian_blending_1_1_0 = tmp_qloop_17*tmp_qloop_18*tmp_qloop_37*tmp_qloop_90*1.0 - tmp_qloop_18*tmp_qloop_91; + const real_t hessian_blending_2_1_0 = tmp_qloop_88*tmp_qloop_92 - tmp_qloop_93; + const real_t hessian_blending_0_1_1 = tmp_qloop_17*tmp_qloop_37*(tmp_qloop_29*tmp_qloop_68 + tmp_qloop_54*tmp_qloop_69 + tmp_qloop_94)*1.0 - tmp_qloop_18*tmp_qloop_95; + const real_t hessian_blending_1_1_1 = tmp_qloop_17*tmp_qloop_37*(tmp_qloop_29*tmp_qloop_40 + tmp_qloop_31*(-tmp_qloop_40 + tmp_qloop_53) + tmp_qloop_47*tmp_qloop_79 + tmp_qloop_54*tmp_qloop_79)*1.0 - tmp_qloop_25*tmp_qloop_95; + const real_t hessian_blending_2_1_1 = tmp_qloop_17*tmp_qloop_37*(tmp_qloop_29*tmp_qloop_72 + tmp_qloop_54*tmp_qloop_82 + tmp_qloop_73)*1.0 - tmp_qloop_22*tmp_qloop_95; + const real_t hessian_blending_0_1_2 = tmp_qloop_17*tmp_qloop_22*tmp_qloop_37*tmp_qloop_87*1.0 - tmp_qloop_93; + const real_t hessian_blending_1_1_2 = tmp_qloop_17*tmp_qloop_22*tmp_qloop_37*tmp_qloop_90*1.0 - tmp_qloop_22*tmp_qloop_91; + const real_t hessian_blending_2_1_2 = -tmp_qloop_30*tmp_qloop_89 + tmp_qloop_44 + tmp_qloop_86*tmp_qloop_92; + const real_t hessian_blending_0_2_0 = -tmp_qloop_19*tmp_qloop_97 + tmp_qloop_50 + tmp_qloop_88*tmp_qloop_96; + const real_t hessian_blending_1_2_0 = -tmp_qloop_100 + tmp_qloop_17*tmp_qloop_18*tmp_qloop_37*tmp_qloop_98*1.0; + const real_t hessian_blending_2_2_0 = tmp_qloop_101*tmp_qloop_17*tmp_qloop_18*tmp_qloop_37*1.0 - tmp_qloop_85*tmp_qloop_97; + const real_t hessian_blending_0_2_1 = -tmp_qloop_100 + tmp_qloop_17*tmp_qloop_25*tmp_qloop_37*tmp_qloop_96*1.0; + const real_t hessian_blending_1_2_1 = -tmp_qloop_29*tmp_qloop_97 + tmp_qloop_50 + tmp_qloop_81*tmp_qloop_98; + const real_t hessian_blending_2_2_1 = tmp_qloop_101*tmp_qloop_17*tmp_qloop_25*tmp_qloop_37*1.0 - tmp_qloop_22*tmp_qloop_99; + const real_t hessian_blending_0_2_2 = -tmp_qloop_102*tmp_qloop_18 + tmp_qloop_17*tmp_qloop_37*(tmp_qloop_30*tmp_qloop_68 + tmp_qloop_57*tmp_qloop_69 + tmp_qloop_94)*1.0; + const real_t hessian_blending_1_2_2 = -tmp_qloop_102*tmp_qloop_25 + tmp_qloop_17*tmp_qloop_37*(tmp_qloop_30*tmp_qloop_40 + tmp_qloop_57*tmp_qloop_79 + tmp_qloop_70)*1.0; + const real_t hessian_blending_2_2_2 = -tmp_qloop_102*tmp_qloop_22 + tmp_qloop_17*tmp_qloop_37*(tmp_qloop_30*tmp_qloop_72 + tmp_qloop_31*(tmp_qloop_46 - tmp_qloop_72) + tmp_qloop_48*2.0 + tmp_qloop_57*tmp_qloop_82)*1.0; + const real_t q_tmp_0_0 = tmp_qloop_115*tmp_qloop_137; + const real_t q_tmp_0_1 = tmp_qloop_127*tmp_qloop_137; + const real_t q_tmp_0_2 = tmp_qloop_128*tmp_qloop_137; + const real_t q_tmp_0_3 = tmp_qloop_129*tmp_qloop_137; + const real_t q_tmp_0_4 = tmp_qloop_110*tmp_qloop_137; + const real_t q_tmp_0_5 = tmp_qloop_113*tmp_qloop_137; + const real_t q_tmp_0_6 = tmp_qloop_112*tmp_qloop_137; + const real_t q_tmp_0_7 = tmp_qloop_130*tmp_qloop_137; + const real_t q_tmp_0_8 = tmp_qloop_132*tmp_qloop_137; + const real_t q_tmp_0_9 = tmp_qloop_134*tmp_qloop_137; + const real_t q_tmp_0_10 = tmp_qloop_115*tmp_qloop_139; + const real_t q_tmp_0_11 = tmp_qloop_127*tmp_qloop_139; + const real_t q_tmp_0_12 = tmp_qloop_128*tmp_qloop_139; + const real_t q_tmp_0_13 = tmp_qloop_129*tmp_qloop_139; + const real_t q_tmp_0_14 = tmp_qloop_110*tmp_qloop_139; + const real_t q_tmp_0_15 = tmp_qloop_113*tmp_qloop_139; + const real_t q_tmp_0_16 = tmp_qloop_112*tmp_qloop_139; + const real_t q_tmp_0_17 = tmp_qloop_130*tmp_qloop_139; + const real_t q_tmp_0_18 = tmp_qloop_132*tmp_qloop_139; + const real_t q_tmp_0_19 = tmp_qloop_134*tmp_qloop_139; + const real_t q_tmp_0_20 = tmp_qloop_115*tmp_qloop_141; + const real_t q_tmp_0_21 = tmp_qloop_127*tmp_qloop_141; + const real_t q_tmp_0_22 = tmp_qloop_128*tmp_qloop_141; + const real_t q_tmp_0_23 = tmp_qloop_129*tmp_qloop_141; + const real_t q_tmp_0_24 = tmp_qloop_110*tmp_qloop_141; + const real_t q_tmp_0_25 = tmp_qloop_113*tmp_qloop_141; + const real_t q_tmp_0_26 = tmp_qloop_112*tmp_qloop_141; + const real_t q_tmp_0_27 = tmp_qloop_130*tmp_qloop_141; + const real_t q_tmp_0_28 = tmp_qloop_132*tmp_qloop_141; + const real_t q_tmp_0_29 = tmp_qloop_134*tmp_qloop_141; + const real_t q_tmp_1_0 = tmp_qloop_115*tmp_qloop_143; + const real_t q_tmp_1_1 = tmp_qloop_127*tmp_qloop_143; + const real_t q_tmp_1_2 = tmp_qloop_128*tmp_qloop_143; + const real_t q_tmp_1_3 = tmp_qloop_129*tmp_qloop_143; + const real_t q_tmp_1_4 = tmp_qloop_146; + const real_t q_tmp_1_5 = tmp_qloop_133*tmp_qloop_145; + const real_t q_tmp_1_6 = tmp_qloop_133*tmp_qloop_148; + const real_t q_tmp_1_7 = tmp_qloop_130*tmp_qloop_143; + const real_t q_tmp_1_8 = tmp_qloop_132*tmp_qloop_143; + const real_t q_tmp_1_9 = tmp_qloop_134*tmp_qloop_143; + const real_t q_tmp_1_10 = tmp_qloop_115*tmp_qloop_149; + const real_t q_tmp_1_11 = tmp_qloop_127*tmp_qloop_149; + const real_t q_tmp_1_12 = tmp_qloop_128*tmp_qloop_149; + const real_t q_tmp_1_13 = tmp_qloop_129*tmp_qloop_149; + const real_t q_tmp_1_14 = tmp_qloop_151; + const real_t q_tmp_1_15 = tmp_qloop_133*tmp_qloop_150; + const real_t q_tmp_1_16 = tmp_qloop_133*tmp_qloop_152; + const real_t q_tmp_1_17 = tmp_qloop_130*tmp_qloop_149; + const real_t q_tmp_1_18 = tmp_qloop_132*tmp_qloop_149; + const real_t q_tmp_1_19 = tmp_qloop_134*tmp_qloop_149; + const real_t q_tmp_1_20 = tmp_qloop_115*tmp_qloop_153; + const real_t q_tmp_1_21 = tmp_qloop_127*tmp_qloop_153; + const real_t q_tmp_1_22 = tmp_qloop_128*tmp_qloop_153; + const real_t q_tmp_1_23 = tmp_qloop_129*tmp_qloop_153; + const real_t q_tmp_1_24 = tmp_qloop_155; + const real_t q_tmp_1_25 = tmp_qloop_133*tmp_qloop_154; + const real_t q_tmp_1_26 = tmp_qloop_133*tmp_qloop_156; + const real_t q_tmp_1_27 = tmp_qloop_130*tmp_qloop_153; + const real_t q_tmp_1_28 = tmp_qloop_132*tmp_qloop_153; + const real_t q_tmp_1_29 = tmp_qloop_134*tmp_qloop_153; + const real_t q_tmp_2_0 = tmp_qloop_115*tmp_qloop_148; + const real_t q_tmp_2_1 = tmp_qloop_127*tmp_qloop_148; + const real_t q_tmp_2_2 = tmp_qloop_128*tmp_qloop_148; + const real_t q_tmp_2_3 = tmp_qloop_129*tmp_qloop_148; + const real_t q_tmp_2_4 = tmp_qloop_131*tmp_qloop_145; + const real_t q_tmp_2_5 = tmp_qloop_146; + const real_t q_tmp_2_6 = tmp_qloop_157*tmp_qloop_158; + const real_t q_tmp_2_7 = tmp_qloop_130*tmp_qloop_148; + const real_t q_tmp_2_8 = tmp_qloop_132*tmp_qloop_148; + const real_t q_tmp_2_9 = tmp_qloop_134*tmp_qloop_148; + const real_t q_tmp_2_10 = tmp_qloop_115*tmp_qloop_152; + const real_t q_tmp_2_11 = tmp_qloop_127*tmp_qloop_152; + const real_t q_tmp_2_12 = tmp_qloop_128*tmp_qloop_152; + const real_t q_tmp_2_13 = tmp_qloop_129*tmp_qloop_152; + const real_t q_tmp_2_14 = tmp_qloop_131*tmp_qloop_150; + const real_t q_tmp_2_15 = tmp_qloop_151; + const real_t q_tmp_2_16 = tmp_qloop_138*tmp_qloop_159; + const real_t q_tmp_2_17 = tmp_qloop_130*tmp_qloop_152; + const real_t q_tmp_2_18 = tmp_qloop_132*tmp_qloop_152; + const real_t q_tmp_2_19 = tmp_qloop_134*tmp_qloop_152; + const real_t q_tmp_2_20 = tmp_qloop_115*tmp_qloop_156; + const real_t q_tmp_2_21 = tmp_qloop_127*tmp_qloop_156; + const real_t q_tmp_2_22 = tmp_qloop_128*tmp_qloop_156; + const real_t q_tmp_2_23 = tmp_qloop_129*tmp_qloop_156; + const real_t q_tmp_2_24 = tmp_qloop_131*tmp_qloop_154; + const real_t q_tmp_2_25 = tmp_qloop_155; + const real_t q_tmp_2_26 = tmp_qloop_140*tmp_qloop_159; + const real_t q_tmp_2_27 = tmp_qloop_130*tmp_qloop_156; + const real_t q_tmp_2_28 = tmp_qloop_132*tmp_qloop_156; + const real_t q_tmp_2_29 = tmp_qloop_134*tmp_qloop_156; + const real_t q_tmp_3_0 = tmp_qloop_115*tmp_qloop_145; + const real_t q_tmp_3_1 = tmp_qloop_127*tmp_qloop_145; + const real_t q_tmp_3_2 = tmp_qloop_128*tmp_qloop_145; + const real_t q_tmp_3_3 = tmp_qloop_129*tmp_qloop_145; + const real_t q_tmp_3_4 = tmp_qloop_109*tmp_qloop_160; + const real_t q_tmp_3_5 = tmp_qloop_111*tmp_qloop_160; + const real_t q_tmp_3_6 = tmp_qloop_146; + const real_t q_tmp_3_7 = tmp_qloop_130*tmp_qloop_145; + const real_t q_tmp_3_8 = tmp_qloop_132*tmp_qloop_145; + const real_t q_tmp_3_9 = tmp_qloop_134*tmp_qloop_145; + const real_t q_tmp_3_10 = tmp_qloop_115*tmp_qloop_150; + const real_t q_tmp_3_11 = tmp_qloop_127*tmp_qloop_150; + const real_t q_tmp_3_12 = tmp_qloop_128*tmp_qloop_150; + const real_t q_tmp_3_13 = tmp_qloop_129*tmp_qloop_150; + const real_t q_tmp_3_14 = tmp_qloop_109*tmp_qloop_162; + const real_t q_tmp_3_15 = tmp_qloop_111*tmp_qloop_162; + const real_t q_tmp_3_16 = tmp_qloop_151; + const real_t q_tmp_3_17 = tmp_qloop_130*tmp_qloop_150; + const real_t q_tmp_3_18 = tmp_qloop_132*tmp_qloop_150; + const real_t q_tmp_3_19 = tmp_qloop_134*tmp_qloop_150; + const real_t q_tmp_3_20 = tmp_qloop_115*tmp_qloop_154; + const real_t q_tmp_3_21 = tmp_qloop_127*tmp_qloop_154; + const real_t q_tmp_3_22 = tmp_qloop_128*tmp_qloop_154; + const real_t q_tmp_3_23 = tmp_qloop_129*tmp_qloop_154; + const real_t q_tmp_3_24 = tmp_qloop_109*tmp_qloop_163; + const real_t q_tmp_3_25 = tmp_qloop_111*tmp_qloop_163; + const real_t q_tmp_3_26 = tmp_qloop_155; + const real_t q_tmp_3_27 = tmp_qloop_130*tmp_qloop_154; + const real_t q_tmp_3_28 = tmp_qloop_132*tmp_qloop_154; + const real_t q_tmp_3_29 = tmp_qloop_134*tmp_qloop_154; + q_acc_0_0 = q_acc_0_0 + q_tmp_0_0; + q_acc_0_1 = q_acc_0_1 + q_tmp_0_1; + q_acc_0_2 = q_acc_0_2 + q_tmp_0_2; + q_acc_0_3 = q_acc_0_3 + q_tmp_0_3; + q_acc_0_4 = q_acc_0_4 + q_tmp_0_4; + q_acc_0_5 = q_acc_0_5 + q_tmp_0_5; + q_acc_0_6 = q_acc_0_6 + q_tmp_0_6; + q_acc_0_7 = q_acc_0_7 + q_tmp_0_7; + q_acc_0_8 = q_acc_0_8 + q_tmp_0_8; + q_acc_0_9 = q_acc_0_9 + q_tmp_0_9; + q_acc_0_10 = q_acc_0_10 + q_tmp_0_10; + q_acc_0_11 = q_acc_0_11 + q_tmp_0_11; + q_acc_0_12 = q_acc_0_12 + q_tmp_0_12; + q_acc_0_13 = q_acc_0_13 + q_tmp_0_13; + q_acc_0_14 = q_acc_0_14 + q_tmp_0_14; + q_acc_0_15 = q_acc_0_15 + q_tmp_0_15; + q_acc_0_16 = q_acc_0_16 + q_tmp_0_16; + q_acc_0_17 = q_acc_0_17 + q_tmp_0_17; + q_acc_0_18 = q_acc_0_18 + q_tmp_0_18; + q_acc_0_19 = q_acc_0_19 + q_tmp_0_19; + q_acc_0_20 = q_acc_0_20 + q_tmp_0_20; + q_acc_0_21 = q_acc_0_21 + q_tmp_0_21; + q_acc_0_22 = q_acc_0_22 + q_tmp_0_22; + q_acc_0_23 = q_acc_0_23 + q_tmp_0_23; + q_acc_0_24 = q_acc_0_24 + q_tmp_0_24; + q_acc_0_25 = q_acc_0_25 + q_tmp_0_25; + q_acc_0_26 = q_acc_0_26 + q_tmp_0_26; + q_acc_0_27 = q_acc_0_27 + q_tmp_0_27; + q_acc_0_28 = q_acc_0_28 + q_tmp_0_28; + q_acc_0_29 = q_acc_0_29 + q_tmp_0_29; + q_acc_1_0 = q_acc_1_0 + q_tmp_1_0; + q_acc_1_1 = q_acc_1_1 + q_tmp_1_1; + q_acc_1_2 = q_acc_1_2 + q_tmp_1_2; + q_acc_1_3 = q_acc_1_3 + q_tmp_1_3; + q_acc_1_4 = q_acc_1_4 + q_tmp_1_4; + q_acc_1_5 = q_acc_1_5 + q_tmp_1_5; + q_acc_1_6 = q_acc_1_6 + q_tmp_1_6; + q_acc_1_7 = q_acc_1_7 + q_tmp_1_7; + q_acc_1_8 = q_acc_1_8 + q_tmp_1_8; + q_acc_1_9 = q_acc_1_9 + q_tmp_1_9; + q_acc_1_10 = q_acc_1_10 + q_tmp_1_10; + q_acc_1_11 = q_acc_1_11 + q_tmp_1_11; + q_acc_1_12 = q_acc_1_12 + q_tmp_1_12; + q_acc_1_13 = q_acc_1_13 + q_tmp_1_13; + q_acc_1_14 = q_acc_1_14 + q_tmp_1_14; + q_acc_1_15 = q_acc_1_15 + q_tmp_1_15; + q_acc_1_16 = q_acc_1_16 + q_tmp_1_16; + q_acc_1_17 = q_acc_1_17 + q_tmp_1_17; + q_acc_1_18 = q_acc_1_18 + q_tmp_1_18; + q_acc_1_19 = q_acc_1_19 + q_tmp_1_19; + q_acc_1_20 = q_acc_1_20 + q_tmp_1_20; + q_acc_1_21 = q_acc_1_21 + q_tmp_1_21; + q_acc_1_22 = q_acc_1_22 + q_tmp_1_22; + q_acc_1_23 = q_acc_1_23 + q_tmp_1_23; + q_acc_1_24 = q_acc_1_24 + q_tmp_1_24; + q_acc_1_25 = q_acc_1_25 + q_tmp_1_25; + q_acc_1_26 = q_acc_1_26 + q_tmp_1_26; + q_acc_1_27 = q_acc_1_27 + q_tmp_1_27; + q_acc_1_28 = q_acc_1_28 + q_tmp_1_28; + q_acc_1_29 = q_acc_1_29 + q_tmp_1_29; + q_acc_2_0 = q_acc_2_0 + q_tmp_2_0; + q_acc_2_1 = q_acc_2_1 + q_tmp_2_1; + q_acc_2_2 = q_acc_2_2 + q_tmp_2_2; + q_acc_2_3 = q_acc_2_3 + q_tmp_2_3; + q_acc_2_4 = q_acc_2_4 + q_tmp_2_4; + q_acc_2_5 = q_acc_2_5 + q_tmp_2_5; + q_acc_2_6 = q_acc_2_6 + q_tmp_2_6; + q_acc_2_7 = q_acc_2_7 + q_tmp_2_7; + q_acc_2_8 = q_acc_2_8 + q_tmp_2_8; + q_acc_2_9 = q_acc_2_9 + q_tmp_2_9; + q_acc_2_10 = q_acc_2_10 + q_tmp_2_10; + q_acc_2_11 = q_acc_2_11 + q_tmp_2_11; + q_acc_2_12 = q_acc_2_12 + q_tmp_2_12; + q_acc_2_13 = q_acc_2_13 + q_tmp_2_13; + q_acc_2_14 = q_acc_2_14 + q_tmp_2_14; + q_acc_2_15 = q_acc_2_15 + q_tmp_2_15; + q_acc_2_16 = q_acc_2_16 + q_tmp_2_16; + q_acc_2_17 = q_acc_2_17 + q_tmp_2_17; + q_acc_2_18 = q_acc_2_18 + q_tmp_2_18; + q_acc_2_19 = q_acc_2_19 + q_tmp_2_19; + q_acc_2_20 = q_acc_2_20 + q_tmp_2_20; + q_acc_2_21 = q_acc_2_21 + q_tmp_2_21; + q_acc_2_22 = q_acc_2_22 + q_tmp_2_22; + q_acc_2_23 = q_acc_2_23 + q_tmp_2_23; + q_acc_2_24 = q_acc_2_24 + q_tmp_2_24; + q_acc_2_25 = q_acc_2_25 + q_tmp_2_25; + q_acc_2_26 = q_acc_2_26 + q_tmp_2_26; + q_acc_2_27 = q_acc_2_27 + q_tmp_2_27; + q_acc_2_28 = q_acc_2_28 + q_tmp_2_28; + q_acc_2_29 = q_acc_2_29 + q_tmp_2_29; + q_acc_3_0 = q_acc_3_0 + q_tmp_3_0; + q_acc_3_1 = q_acc_3_1 + q_tmp_3_1; + q_acc_3_2 = q_acc_3_2 + q_tmp_3_2; + q_acc_3_3 = q_acc_3_3 + q_tmp_3_3; + q_acc_3_4 = q_acc_3_4 + q_tmp_3_4; + q_acc_3_5 = q_acc_3_5 + q_tmp_3_5; + q_acc_3_6 = q_acc_3_6 + q_tmp_3_6; + q_acc_3_7 = q_acc_3_7 + q_tmp_3_7; + q_acc_3_8 = q_acc_3_8 + q_tmp_3_8; + q_acc_3_9 = q_acc_3_9 + q_tmp_3_9; + q_acc_3_10 = q_acc_3_10 + q_tmp_3_10; + q_acc_3_11 = q_acc_3_11 + q_tmp_3_11; + q_acc_3_12 = q_acc_3_12 + q_tmp_3_12; + q_acc_3_13 = q_acc_3_13 + q_tmp_3_13; + q_acc_3_14 = q_acc_3_14 + q_tmp_3_14; + q_acc_3_15 = q_acc_3_15 + q_tmp_3_15; + q_acc_3_16 = q_acc_3_16 + q_tmp_3_16; + q_acc_3_17 = q_acc_3_17 + q_tmp_3_17; + q_acc_3_18 = q_acc_3_18 + q_tmp_3_18; + q_acc_3_19 = q_acc_3_19 + q_tmp_3_19; + q_acc_3_20 = q_acc_3_20 + q_tmp_3_20; + q_acc_3_21 = q_acc_3_21 + q_tmp_3_21; + q_acc_3_22 = q_acc_3_22 + q_tmp_3_22; + q_acc_3_23 = q_acc_3_23 + q_tmp_3_23; + q_acc_3_24 = q_acc_3_24 + q_tmp_3_24; + q_acc_3_25 = q_acc_3_25 + q_tmp_3_25; + q_acc_3_26 = q_acc_3_26 + q_tmp_3_26; + q_acc_3_27 = q_acc_3_27 + q_tmp_3_27; + q_acc_3_28 = q_acc_3_28 + q_tmp_3_28; + q_acc_3_29 = q_acc_3_29 + q_tmp_3_29; + } + const real_t elMatVec_0 = q_acc_0_0*src_dof_0 + q_acc_0_1*src_dof_1 + q_acc_0_10*src_dof_10 + q_acc_0_11*src_dof_11 + q_acc_0_12*src_dof_12 + q_acc_0_13*src_dof_13 + q_acc_0_14*src_dof_14 + q_acc_0_15*src_dof_15 + q_acc_0_16*src_dof_16 + q_acc_0_17*src_dof_17 + q_acc_0_18*src_dof_18 + q_acc_0_19*src_dof_19 + q_acc_0_2*src_dof_2 + q_acc_0_20*src_dof_20 + q_acc_0_21*src_dof_21 + q_acc_0_22*src_dof_22 + q_acc_0_23*src_dof_23 + q_acc_0_24*src_dof_24 + q_acc_0_25*src_dof_25 + q_acc_0_26*src_dof_26 + q_acc_0_27*src_dof_27 + q_acc_0_28*src_dof_28 + q_acc_0_29*src_dof_29 + q_acc_0_3*src_dof_3 + q_acc_0_4*src_dof_4 + q_acc_0_5*src_dof_5 + q_acc_0_6*src_dof_6 + q_acc_0_7*src_dof_7 + q_acc_0_8*src_dof_8 + q_acc_0_9*src_dof_9; + const real_t elMatVec_1 = q_acc_1_0*src_dof_0 + q_acc_1_1*src_dof_1 + q_acc_1_10*src_dof_10 + q_acc_1_11*src_dof_11 + q_acc_1_12*src_dof_12 + q_acc_1_13*src_dof_13 + q_acc_1_14*src_dof_14 + q_acc_1_15*src_dof_15 + q_acc_1_16*src_dof_16 + q_acc_1_17*src_dof_17 + q_acc_1_18*src_dof_18 + q_acc_1_19*src_dof_19 + q_acc_1_2*src_dof_2 + q_acc_1_20*src_dof_20 + q_acc_1_21*src_dof_21 + q_acc_1_22*src_dof_22 + q_acc_1_23*src_dof_23 + q_acc_1_24*src_dof_24 + q_acc_1_25*src_dof_25 + q_acc_1_26*src_dof_26 + q_acc_1_27*src_dof_27 + q_acc_1_28*src_dof_28 + q_acc_1_29*src_dof_29 + q_acc_1_3*src_dof_3 + q_acc_1_4*src_dof_4 + q_acc_1_5*src_dof_5 + q_acc_1_6*src_dof_6 + q_acc_1_7*src_dof_7 + q_acc_1_8*src_dof_8 + q_acc_1_9*src_dof_9; + const real_t elMatVec_2 = q_acc_2_0*src_dof_0 + q_acc_2_1*src_dof_1 + q_acc_2_10*src_dof_10 + q_acc_2_11*src_dof_11 + q_acc_2_12*src_dof_12 + q_acc_2_13*src_dof_13 + q_acc_2_14*src_dof_14 + q_acc_2_15*src_dof_15 + q_acc_2_16*src_dof_16 + q_acc_2_17*src_dof_17 + q_acc_2_18*src_dof_18 + q_acc_2_19*src_dof_19 + q_acc_2_2*src_dof_2 + q_acc_2_20*src_dof_20 + q_acc_2_21*src_dof_21 + q_acc_2_22*src_dof_22 + q_acc_2_23*src_dof_23 + q_acc_2_24*src_dof_24 + q_acc_2_25*src_dof_25 + q_acc_2_26*src_dof_26 + q_acc_2_27*src_dof_27 + q_acc_2_28*src_dof_28 + q_acc_2_29*src_dof_29 + q_acc_2_3*src_dof_3 + q_acc_2_4*src_dof_4 + q_acc_2_5*src_dof_5 + q_acc_2_6*src_dof_6 + q_acc_2_7*src_dof_7 + q_acc_2_8*src_dof_8 + q_acc_2_9*src_dof_9; + const real_t elMatVec_3 = q_acc_3_0*src_dof_0 + q_acc_3_1*src_dof_1 + q_acc_3_10*src_dof_10 + q_acc_3_11*src_dof_11 + q_acc_3_12*src_dof_12 + q_acc_3_13*src_dof_13 + q_acc_3_14*src_dof_14 + q_acc_3_15*src_dof_15 + q_acc_3_16*src_dof_16 + q_acc_3_17*src_dof_17 + q_acc_3_18*src_dof_18 + q_acc_3_19*src_dof_19 + q_acc_3_2*src_dof_2 + q_acc_3_20*src_dof_20 + q_acc_3_21*src_dof_21 + q_acc_3_22*src_dof_22 + q_acc_3_23*src_dof_23 + q_acc_3_24*src_dof_24 + q_acc_3_25*src_dof_25 + q_acc_3_26*src_dof_26 + q_acc_3_27*src_dof_27 + q_acc_3_28*src_dof_28 + q_acc_3_29*src_dof_29 + q_acc_3_3*src_dof_3 + q_acc_3_4*src_dof_4 + q_acc_3_5*src_dof_5 + q_acc_3_6*src_dof_6 + q_acc_3_7*src_dof_7 + q_acc_3_8*src_dof_8 + q_acc_3_9*src_dof_9; + _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1] = elMatVec_0 + _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]; + _data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))] = elMatVec_1 + _data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]; + _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))] = elMatVec_2 + _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]; + _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1] = elMatVec_3 + _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]; + } + } + } + const real_t tmp_coords_jac_0_GREEN_DOWN = 1.0 / (micro_edges_per_macro_edge_float)*1.0; + const real_t tmp_coords_jac_1_GREEN_DOWN = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0); + const real_t tmp_coords_jac_2_GREEN_DOWN = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1); + const real_t tmp_coords_jac_3_GREEN_DOWN = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2); + const real_t tmp_coords_jac_4_GREEN_DOWN = tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0); + const real_t tmp_coords_jac_5_GREEN_DOWN = tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1); + const real_t tmp_coords_jac_6_GREEN_DOWN = tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2); + const real_t tmp_coords_jac_7_GREEN_DOWN = tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0); + const real_t tmp_coords_jac_8_GREEN_DOWN = tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1); + const real_t tmp_coords_jac_9_GREEN_DOWN = tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2); + const real_t p_affine_const_0_0_GREEN_DOWN = tmp_coords_jac_1_GREEN_DOWN; + const real_t p_affine_const_0_1_GREEN_DOWN = tmp_coords_jac_2_GREEN_DOWN; + const real_t p_affine_const_0_2_GREEN_DOWN = tmp_coords_jac_3_GREEN_DOWN; + const real_t p_affine_const_1_0_GREEN_DOWN = tmp_coords_jac_1_GREEN_DOWN + tmp_coords_jac_4_GREEN_DOWN; + const real_t p_affine_const_1_1_GREEN_DOWN = tmp_coords_jac_2_GREEN_DOWN + tmp_coords_jac_5_GREEN_DOWN; + const real_t p_affine_const_1_2_GREEN_DOWN = tmp_coords_jac_3_GREEN_DOWN + tmp_coords_jac_6_GREEN_DOWN; + const real_t p_affine_const_2_0_GREEN_DOWN = macro_vertex_coord_id_0comp0 + tmp_coords_jac_4_GREEN_DOWN + tmp_coords_jac_7_GREEN_DOWN; + const real_t p_affine_const_2_1_GREEN_DOWN = macro_vertex_coord_id_0comp1 + tmp_coords_jac_5_GREEN_DOWN + tmp_coords_jac_8_GREEN_DOWN; + const real_t p_affine_const_2_2_GREEN_DOWN = macro_vertex_coord_id_0comp2 + tmp_coords_jac_6_GREEN_DOWN + tmp_coords_jac_9_GREEN_DOWN; + const real_t p_affine_const_3_0_GREEN_DOWN = tmp_coords_jac_1_GREEN_DOWN + tmp_coords_jac_7_GREEN_DOWN; + const real_t p_affine_const_3_1_GREEN_DOWN = tmp_coords_jac_2_GREEN_DOWN + tmp_coords_jac_8_GREEN_DOWN; + const real_t p_affine_const_3_2_GREEN_DOWN = tmp_coords_jac_3_GREEN_DOWN + tmp_coords_jac_9_GREEN_DOWN; + const real_t jac_affine_0_0_GREEN_DOWN = -p_affine_const_0_0_GREEN_DOWN + p_affine_const_1_0_GREEN_DOWN; + const real_t jac_affine_0_1_GREEN_DOWN = -p_affine_const_0_0_GREEN_DOWN + p_affine_const_2_0_GREEN_DOWN; + const real_t jac_affine_0_2_GREEN_DOWN = -p_affine_const_0_0_GREEN_DOWN + p_affine_const_3_0_GREEN_DOWN; + const real_t jac_affine_1_0_GREEN_DOWN = -p_affine_const_0_1_GREEN_DOWN + p_affine_const_1_1_GREEN_DOWN; + const real_t jac_affine_1_1_GREEN_DOWN = -p_affine_const_0_1_GREEN_DOWN + p_affine_const_2_1_GREEN_DOWN; + const real_t tmp_coords_jac_14_GREEN_DOWN = jac_affine_0_2_GREEN_DOWN*jac_affine_1_1_GREEN_DOWN; + const real_t jac_affine_1_2_GREEN_DOWN = -p_affine_const_0_1_GREEN_DOWN + p_affine_const_3_1_GREEN_DOWN; + const real_t tmp_coords_jac_12_GREEN_DOWN = jac_affine_0_1_GREEN_DOWN*jac_affine_1_2_GREEN_DOWN; + const real_t jac_affine_2_0_GREEN_DOWN = -p_affine_const_0_2_GREEN_DOWN + p_affine_const_1_2_GREEN_DOWN; + const real_t jac_affine_2_1_GREEN_DOWN = -p_affine_const_0_2_GREEN_DOWN + p_affine_const_2_2_GREEN_DOWN; + const real_t tmp_coords_jac_11_GREEN_DOWN = jac_affine_1_2_GREEN_DOWN*jac_affine_2_1_GREEN_DOWN; + const real_t jac_affine_2_2_GREEN_DOWN = -p_affine_const_0_2_GREEN_DOWN + p_affine_const_3_2_GREEN_DOWN; + const real_t tmp_coords_jac_10_GREEN_DOWN = jac_affine_1_1_GREEN_DOWN*jac_affine_2_2_GREEN_DOWN; + const real_t tmp_coords_jac_13_GREEN_DOWN = jac_affine_0_1_GREEN_DOWN*jac_affine_2_2_GREEN_DOWN; + const real_t tmp_coords_jac_15_GREEN_DOWN = jac_affine_0_0_GREEN_DOWN*tmp_coords_jac_10_GREEN_DOWN - jac_affine_0_0_GREEN_DOWN*tmp_coords_jac_11_GREEN_DOWN + jac_affine_0_2_GREEN_DOWN*jac_affine_1_0_GREEN_DOWN*jac_affine_2_1_GREEN_DOWN - jac_affine_1_0_GREEN_DOWN*tmp_coords_jac_13_GREEN_DOWN + jac_affine_2_0_GREEN_DOWN*tmp_coords_jac_12_GREEN_DOWN - jac_affine_2_0_GREEN_DOWN*tmp_coords_jac_14_GREEN_DOWN; + const real_t tmp_coords_jac_16_GREEN_DOWN = 1.0 / (tmp_coords_jac_15_GREEN_DOWN); + const real_t jac_affine_inv_0_0_GREEN_DOWN = tmp_coords_jac_16_GREEN_DOWN*(tmp_coords_jac_10_GREEN_DOWN - tmp_coords_jac_11_GREEN_DOWN); + const real_t jac_affine_inv_0_1_GREEN_DOWN = tmp_coords_jac_16_GREEN_DOWN*(jac_affine_0_2_GREEN_DOWN*jac_affine_2_1_GREEN_DOWN - tmp_coords_jac_13_GREEN_DOWN); + const real_t jac_affine_inv_0_2_GREEN_DOWN = tmp_coords_jac_16_GREEN_DOWN*(tmp_coords_jac_12_GREEN_DOWN - tmp_coords_jac_14_GREEN_DOWN); + const real_t jac_affine_inv_1_0_GREEN_DOWN = tmp_coords_jac_16_GREEN_DOWN*(-jac_affine_1_0_GREEN_DOWN*jac_affine_2_2_GREEN_DOWN + jac_affine_1_2_GREEN_DOWN*jac_affine_2_0_GREEN_DOWN); + const real_t jac_affine_inv_1_1_GREEN_DOWN = tmp_coords_jac_16_GREEN_DOWN*(jac_affine_0_0_GREEN_DOWN*jac_affine_2_2_GREEN_DOWN - jac_affine_0_2_GREEN_DOWN*jac_affine_2_0_GREEN_DOWN); + const real_t jac_affine_inv_1_2_GREEN_DOWN = tmp_coords_jac_16_GREEN_DOWN*(-jac_affine_0_0_GREEN_DOWN*jac_affine_1_2_GREEN_DOWN + jac_affine_0_2_GREEN_DOWN*jac_affine_1_0_GREEN_DOWN); + const real_t jac_affine_inv_2_0_GREEN_DOWN = tmp_coords_jac_16_GREEN_DOWN*(jac_affine_1_0_GREEN_DOWN*jac_affine_2_1_GREEN_DOWN - jac_affine_1_1_GREEN_DOWN*jac_affine_2_0_GREEN_DOWN); + const real_t jac_affine_inv_2_1_GREEN_DOWN = tmp_coords_jac_16_GREEN_DOWN*(-jac_affine_0_0_GREEN_DOWN*jac_affine_2_1_GREEN_DOWN + jac_affine_0_1_GREEN_DOWN*jac_affine_2_0_GREEN_DOWN); + const real_t jac_affine_inv_2_2_GREEN_DOWN = tmp_coords_jac_16_GREEN_DOWN*(jac_affine_0_0_GREEN_DOWN*jac_affine_1_1_GREEN_DOWN - jac_affine_0_1_GREEN_DOWN*jac_affine_1_0_GREEN_DOWN); + const real_t abs_det_jac_affine_GREEN_DOWN = abs(tmp_coords_jac_15_GREEN_DOWN); + { + /* CellType.GREEN_DOWN */ + for (int64_t ctr_2 = 0; ctr_2 < micro_edges_per_macro_edge; ctr_2 += 1) + for (int64_t ctr_1 = 0; ctr_1 < -ctr_2 + micro_edges_per_macro_edge; ctr_1 += 1) + { + for (int64_t ctr_0 = 0; ctr_0 < (int64_t)((-ctr_1 - ctr_2 + micro_edges_per_macro_edge - 1) / (4)) * (4); ctr_0 += 4) + { + + const int64_t phantom_ctr_0 = ctr_0; + real_t _data_float_loop_ctr_array_dim_0[4]; + _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0; + _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1; + _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2; + _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3; + real_t _data_float_loop_ctr_array_dim_1[4]; + _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1; + real_t _data_float_loop_ctr_array_dim_2[4]; + _data_float_loop_ctr_array_dim_2[0] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[1] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[2] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[3] = (real_t) ctr_2; + + const __m256d p_affine_0_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)); + const __m256d p_affine_0_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)); + const __m256d p_affine_0_2 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)); + const __m256d p_affine_1_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)); + const __m256d p_affine_1_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)); + const __m256d p_affine_1_2 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)); + const __m256d p_affine_2_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)); + const __m256d p_affine_2_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)); + const __m256d p_affine_2_2 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)); + const __m256d p_affine_3_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)); + const __m256d p_affine_3_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)); + const __m256d p_affine_3_2 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)); + const __m256d src_dof_0 = _mm256_loadu_pd(& _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]); + const __m256d src_dof_1 = _mm256_loadu_pd(& _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]); + const __m256d src_dof_2 = _mm256_loadu_pd(& _data_src_vertex_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]); + const __m256d src_dof_3 = _mm256_loadu_pd(& _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]); + const __m256d src_dof_4 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]); + const __m256d src_dof_5 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 5*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d src_dof_6 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]); + const __m256d src_dof_7 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d src_dof_8 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge - 1)*(micro_edges_per_macro_edge + 1)) / (6)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]); + const __m256d src_dof_9 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d src_dof_10 = _mm256_loadu_pd(& _data_src_vertex_1[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]); + const __m256d src_dof_11 = _mm256_loadu_pd(& _data_src_vertex_1[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]); + const __m256d src_dof_12 = _mm256_loadu_pd(& _data_src_vertex_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]); + const __m256d src_dof_13 = _mm256_loadu_pd(& _data_src_vertex_1[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]); + const __m256d src_dof_14 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]); + const __m256d src_dof_15 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 5*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d src_dof_16 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]); + const __m256d src_dof_17 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d src_dof_18 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge - 1)*(micro_edges_per_macro_edge + 1)) / (6)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]); + const __m256d src_dof_19 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d src_dof_20 = _mm256_loadu_pd(& _data_src_vertex_2[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]); + const __m256d src_dof_21 = _mm256_loadu_pd(& _data_src_vertex_2[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]); + const __m256d src_dof_22 = _mm256_loadu_pd(& _data_src_vertex_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]); + const __m256d src_dof_23 = _mm256_loadu_pd(& _data_src_vertex_2[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]); + const __m256d src_dof_24 = _mm256_loadu_pd(& _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]); + const __m256d src_dof_25 = _mm256_loadu_pd(& _data_src_edge_2[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 5*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d src_dof_26 = _mm256_loadu_pd(& _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]); + const __m256d src_dof_27 = _mm256_loadu_pd(& _data_src_edge_2[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d src_dof_28 = _mm256_loadu_pd(& _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge - 1)*(micro_edges_per_macro_edge + 1)) / (6)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]); + const __m256d src_dof_29 = _mm256_loadu_pd(& _data_src_edge_2[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d rho_dof_0 = _mm256_loadu_pd(& _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]); + const __m256d rho_dof_1 = _mm256_loadu_pd(& _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]); + const __m256d rho_dof_2 = _mm256_loadu_pd(& _data_rhoVertex[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]); + const __m256d rho_dof_3 = _mm256_loadu_pd(& _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]); + const __m256d rho_dof_4 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]); + const __m256d rho_dof_5 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 5*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d rho_dof_6 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]); + const __m256d rho_dof_7 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d rho_dof_8 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge - 1)*(micro_edges_per_macro_edge + 1)) / (6)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]); + const __m256d rho_dof_9 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + __m256d q_acc_0_0 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_1 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_2 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_3 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_4 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_5 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_6 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_7 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_8 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_9 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_10 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_11 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_12 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_13 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_14 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_15 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_16 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_17 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_18 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_19 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_20 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_21 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_22 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_23 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_24 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_25 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_26 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_27 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_28 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_29 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_0 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_1 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_2 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_3 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_4 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_5 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_6 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_7 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_8 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_9 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_10 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_11 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_12 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_13 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_14 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_15 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_16 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_17 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_18 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_19 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_20 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_21 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_22 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_23 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_24 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_25 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_26 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_27 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_28 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_29 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_0 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_1 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_2 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_3 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_4 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_5 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_6 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_7 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_8 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_9 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_10 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_11 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_12 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_13 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_14 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_15 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_16 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_17 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_18 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_19 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_20 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_21 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_22 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_23 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_24 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_25 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_26 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_27 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_28 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_29 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_0 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_1 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_2 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_3 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_4 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_5 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_6 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_7 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_8 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_9 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_10 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_11 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_12 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_13 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_14 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_15 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_16 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_17 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_18 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_19 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_20 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_21 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_22 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_23 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_24 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_25 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_26 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_27 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_28 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_29 = _mm256_set_pd(0.0,0.0,0.0,0.0); + for (int64_t q = 0; q < 5; q += 1) + { + const __m256d tmp_qloop_18 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_1_0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_2_0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_3_0),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]))),p_affine_0_0); + const __m256d tmp_qloop_19 = _mm256_mul_pd(tmp_qloop_18,tmp_qloop_18); + const __m256d tmp_qloop_22 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_2,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_1_2),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_2,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_2_2),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_2,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_3_2),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]))),p_affine_0_2); + const __m256d tmp_qloop_23 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_qloop_12,tmp_qloop_12,tmp_qloop_12,tmp_qloop_12)),_mm256_mul_pd(tmp_qloop_22,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))); + const __m256d tmp_qloop_24 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_qloop_1,tmp_qloop_1,tmp_qloop_1,tmp_qloop_1)),_mm256_mul_pd(tmp_qloop_18,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))); + const __m256d tmp_qloop_25 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_1_1),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_2_1),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_3_1),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]))),p_affine_0_1); + const __m256d tmp_qloop_26 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_qloop_8,tmp_qloop_8,tmp_qloop_8,tmp_qloop_8)),_mm256_mul_pd(tmp_qloop_25,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))); + const __m256d tmp_qloop_27 = _mm256_mul_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_24,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_14,tmp_qloop_14,tmp_qloop_14,tmp_qloop_14)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_26,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_15,tmp_qloop_15,tmp_qloop_15,tmp_qloop_15))),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_23,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_7,tmp_qloop_7,tmp_qloop_7,tmp_qloop_7))),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_23,_mm256_set_pd(tmp_qloop_10,tmp_qloop_10,tmp_qloop_10,tmp_qloop_10)),_mm256_set_pd(tmp_qloop_9,tmp_qloop_9,tmp_qloop_9,tmp_qloop_9))),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_24,_mm256_set_pd(tmp_qloop_13,tmp_qloop_13,tmp_qloop_13,tmp_qloop_13)),_mm256_set_pd(tmp_qloop_6,tmp_qloop_6,tmp_qloop_6,tmp_qloop_6))),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_26,_mm256_set_pd(tmp_qloop_2,tmp_qloop_2,tmp_qloop_2,tmp_qloop_2)),_mm256_set_pd(tmp_qloop_3,tmp_qloop_3,tmp_qloop_3,tmp_qloop_3))),_mm256_set_pd(tmp_qloop_21,tmp_qloop_21,tmp_qloop_21,tmp_qloop_21)); + const __m256d tmp_qloop_28 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_27,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_20,tmp_qloop_20,tmp_qloop_20,tmp_qloop_20)); + const __m256d tmp_qloop_29 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_25); + const __m256d tmp_qloop_30 = _mm256_mul_pd(tmp_qloop_22,tmp_qloop_22); + const __m256d tmp_qloop_31 = _mm256_add_pd(_mm256_add_pd(tmp_qloop_19,tmp_qloop_29),tmp_qloop_30); + const __m256d tmp_qloop_34 = _mm256_mul_pd(tmp_qloop_18,_mm256_set_pd(tmp_qloop_33,tmp_qloop_33,tmp_qloop_33,tmp_qloop_33)); + const __m256d tmp_qloop_35 = _mm256_add_pd(tmp_qloop_28,tmp_qloop_34); + const __m256d tmp_qloop_36 = _mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_31,tmp_qloop_35),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_19,tmp_qloop_28),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17)); + const __m256d tmp_qloop_37 = _mm256_div_pd(_mm256_sqrt_pd(tmp_qloop_31),_mm256_mul_pd(tmp_qloop_31,tmp_qloop_31)); + const __m256d tmp_qloop_38 = _mm256_mul_pd(tmp_qloop_37,_mm256_set_pd(1.0,1.0,1.0,1.0)); + const __m256d tmp_qloop_41 = _mm256_mul_pd(tmp_qloop_31,_mm256_set_pd(tmp_qloop_40,tmp_qloop_40,tmp_qloop_40,tmp_qloop_40)); + const __m256d tmp_qloop_42 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_41,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_25,tmp_qloop_28),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))); + const __m256d tmp_qloop_43 = _mm256_mul_pd(tmp_qloop_38,_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17)); + const __m256d tmp_qloop_44 = _mm256_mul_pd(tmp_qloop_42,tmp_qloop_43); + const __m256d tmp_qloop_47 = _mm256_mul_pd(tmp_qloop_28,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)); + const __m256d tmp_qloop_48 = _mm256_mul_pd(tmp_qloop_22,tmp_qloop_47); + const __m256d tmp_qloop_49 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_31,_mm256_set_pd(tmp_qloop_46,tmp_qloop_46,tmp_qloop_46,tmp_qloop_46)),tmp_qloop_48); + const __m256d tmp_qloop_50 = _mm256_mul_pd(tmp_qloop_43,tmp_qloop_49); + const __m256d tmp_qloop_51 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_31,_mm256_set_pd(tmp_qloop_33,tmp_qloop_33,tmp_qloop_33,tmp_qloop_33)),_mm256_mul_pd(tmp_qloop_18,tmp_qloop_47)); + const __m256d tmp_qloop_52 = _mm256_mul_pd(tmp_qloop_43,tmp_qloop_51); + const __m256d tmp_qloop_54 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_25,_mm256_set_pd(tmp_qloop_53,tmp_qloop_53,tmp_qloop_53,tmp_qloop_53)),tmp_qloop_28); + const __m256d tmp_qloop_55 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_29,tmp_qloop_47),_mm256_mul_pd(tmp_qloop_31,tmp_qloop_54)); + const __m256d tmp_qloop_56 = _mm256_mul_pd(tmp_qloop_22,_mm256_set_pd(tmp_qloop_46,tmp_qloop_46,tmp_qloop_46,tmp_qloop_46)); + const __m256d tmp_qloop_57 = _mm256_add_pd(tmp_qloop_28,tmp_qloop_56); + const __m256d tmp_qloop_58 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_30,tmp_qloop_47),_mm256_mul_pd(tmp_qloop_31,tmp_qloop_57)); + const __m256d tmp_qloop_66 = _mm256_mul_pd(_mm256_div_pd(_mm256_sqrt_pd(tmp_qloop_31),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_31,tmp_qloop_31),tmp_qloop_31)),_mm256_set_pd(3.0,3.0,3.0,3.0)); + const __m256d tmp_qloop_67 = _mm256_mul_pd(tmp_qloop_36,tmp_qloop_66); + const __m256d tmp_qloop_69 = _mm256_mul_pd(tmp_qloop_18,_mm256_set_pd(2.0,2.0,2.0,2.0)); + const __m256d tmp_qloop_70 = _mm256_mul_pd(tmp_qloop_41,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)); + const __m256d tmp_qloop_71 = _mm256_mul_pd(tmp_qloop_35,_mm256_set_pd(2.0,2.0,2.0,2.0)); + const __m256d tmp_qloop_73 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_31,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_72,tmp_qloop_72,tmp_qloop_72,tmp_qloop_72)); + const __m256d tmp_qloop_74 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_qloop_20,tmp_qloop_20,tmp_qloop_20,tmp_qloop_20)),tmp_qloop_27); + const __m256d tmp_qloop_75 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_34,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_mul_pd(tmp_qloop_18,_mm256_set_pd(tmp_qloop_68,tmp_qloop_68,tmp_qloop_68,tmp_qloop_68))),tmp_qloop_74); + const __m256d tmp_qloop_76 = _mm256_mul_pd(tmp_qloop_66,_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17)); + const __m256d tmp_qloop_77 = _mm256_mul_pd(tmp_qloop_51,tmp_qloop_76); + const __m256d tmp_qloop_78 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_77); + const __m256d tmp_qloop_79 = _mm256_mul_pd(tmp_qloop_25,_mm256_set_pd(2.0,2.0,2.0,2.0)); + const __m256d tmp_qloop_80 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_79,_mm256_set_pd(tmp_qloop_33,tmp_qloop_33,tmp_qloop_33,tmp_qloop_33)),_mm256_mul_pd(tmp_qloop_18,_mm256_set_pd(tmp_qloop_40,tmp_qloop_40,tmp_qloop_40,tmp_qloop_40))); + const __m256d tmp_qloop_81 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_43); + const __m256d tmp_qloop_82 = _mm256_mul_pd(tmp_qloop_22,_mm256_set_pd(2.0,2.0,2.0,2.0)); + const __m256d tmp_qloop_83 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_82,_mm256_set_pd(tmp_qloop_33,tmp_qloop_33,tmp_qloop_33,tmp_qloop_33)),_mm256_mul_pd(tmp_qloop_18,_mm256_set_pd(tmp_qloop_72,tmp_qloop_72,tmp_qloop_72,tmp_qloop_72))); + const __m256d tmp_qloop_84 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_22,tmp_qloop_78),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)); + const __m256d tmp_qloop_85 = _mm256_mul_pd(tmp_qloop_18,tmp_qloop_22); + const __m256d tmp_qloop_86 = _mm256_mul_pd(tmp_qloop_22,tmp_qloop_43); + const __m256d tmp_qloop_87 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_69,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_40,tmp_qloop_40,tmp_qloop_40,tmp_qloop_40)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_25,_mm256_set_pd(tmp_qloop_21,tmp_qloop_21,tmp_qloop_21,tmp_qloop_21)),_mm256_set_pd(tmp_qloop_32,tmp_qloop_32,tmp_qloop_32,tmp_qloop_32))); + const __m256d tmp_qloop_88 = _mm256_mul_pd(tmp_qloop_18,tmp_qloop_43); + const __m256d tmp_qloop_89 = _mm256_mul_pd(tmp_qloop_42,tmp_qloop_76); + const __m256d tmp_qloop_90 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_28,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_25,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_40,tmp_qloop_40,tmp_qloop_40,tmp_qloop_40))); + const __m256d tmp_qloop_91 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_89); + const __m256d tmp_qloop_92 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_25,_mm256_set_pd(tmp_qloop_72,tmp_qloop_72,tmp_qloop_72,tmp_qloop_72)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_82,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_40,tmp_qloop_40,tmp_qloop_40,tmp_qloop_40))); + const __m256d tmp_qloop_93 = _mm256_mul_pd(tmp_qloop_85,tmp_qloop_89); + const __m256d tmp_qloop_94 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_31,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_68,tmp_qloop_68,tmp_qloop_68,tmp_qloop_68)); + const __m256d tmp_qloop_95 = _mm256_mul_pd(tmp_qloop_55,tmp_qloop_76); + const __m256d tmp_qloop_96 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_69,_mm256_set_pd(tmp_qloop_46,tmp_qloop_46,tmp_qloop_46,tmp_qloop_46)),_mm256_mul_pd(tmp_qloop_22,_mm256_set_pd(tmp_qloop_68,tmp_qloop_68,tmp_qloop_68,tmp_qloop_68))); + const __m256d tmp_qloop_97 = _mm256_mul_pd(tmp_qloop_49,tmp_qloop_76); + const __m256d tmp_qloop_98 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_22,_mm256_set_pd(tmp_qloop_40,tmp_qloop_40,tmp_qloop_40,tmp_qloop_40)),_mm256_mul_pd(tmp_qloop_79,_mm256_set_pd(tmp_qloop_46,tmp_qloop_46,tmp_qloop_46,tmp_qloop_46))); + const __m256d tmp_qloop_99 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_97); + const __m256d tmp_qloop_100 = _mm256_mul_pd(tmp_qloop_18,tmp_qloop_99); + const __m256d tmp_qloop_101 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_56,_mm256_set_pd(2.0,2.0,2.0,2.0)),_mm256_mul_pd(tmp_qloop_22,_mm256_set_pd(tmp_qloop_72,tmp_qloop_72,tmp_qloop_72,tmp_qloop_72))),tmp_qloop_74); + const __m256d tmp_qloop_102 = _mm256_mul_pd(tmp_qloop_58,tmp_qloop_76); + const __m256d tmp_qloop_103 = _mm256_mul_pd(_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])); + const __m256d tmp_qloop_104 = _mm256_mul_pd(tmp_qloop_103,_mm256_set_pd(2.0,2.0,2.0,2.0)); + const __m256d tmp_qloop_105 = _mm256_mul_pd(_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])); + const __m256d tmp_qloop_106 = _mm256_mul_pd(tmp_qloop_105,_mm256_set_pd(2.0,2.0,2.0,2.0)); + const __m256d tmp_qloop_107 = _mm256_mul_pd(_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q])); + const __m256d tmp_qloop_108 = _mm256_mul_pd(tmp_qloop_107,_mm256_set_pd(2.0,2.0,2.0,2.0)); + const __m256d tmp_qloop_109 = _mm256_mul_pd(_mm256_set_pd(4.0,4.0,4.0,4.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])); + const __m256d tmp_qloop_110 = _mm256_mul_pd(tmp_qloop_109,_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q])); + const __m256d tmp_qloop_111 = _mm256_mul_pd(_mm256_set_pd(4.0,4.0,4.0,4.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])); + const __m256d tmp_qloop_112 = _mm256_mul_pd(tmp_qloop_111,_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])); + const __m256d tmp_qloop_113 = _mm256_mul_pd(tmp_qloop_111,_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q])); + const __m256d tmp_qloop_114 = _mm256_add_pd(tmp_qloop_112,tmp_qloop_113); + const __m256d tmp_qloop_115 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),_mm256_mul_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),tmp_qloop_104),tmp_qloop_106),tmp_qloop_108),tmp_qloop_110),tmp_qloop_114); + const __m256d tmp_qloop_116 = _mm256_mul_pd(_mm256_set_pd(4.0,4.0,4.0,4.0),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q])); + const __m256d tmp_qloop_117 = _mm256_add_pd(tmp_qloop_109,tmp_qloop_116); + const __m256d tmp_qloop_118 = _mm256_mul_pd(_mm256_mul_pd(rho_dof_8,tmp_qloop_109),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)); + const __m256d tmp_qloop_119 = _mm256_mul_pd(rho_dof_0,_mm256_add_pd(_mm256_add_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),tmp_qloop_111),tmp_qloop_117)); + const __m256d tmp_qloop_120 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(rho_dof_7,tmp_qloop_116),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),tmp_qloop_119); + const __m256d tmp_qloop_121 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(rho_dof_1,_mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),tmp_qloop_111)),_mm256_mul_pd(rho_dof_9,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_117,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_set_pd(-8.0,-8.0,-8.0,-8.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]))),_mm256_set_pd(4.0,4.0,4.0,4.0)))),_mm256_mul_pd(rho_dof_5,tmp_qloop_116)),_mm256_mul_pd(rho_dof_6,tmp_qloop_109)),tmp_qloop_118),tmp_qloop_120); + const __m256d tmp_qloop_122 = _mm256_add_pd(_mm256_set_pd(-4.0,-4.0,-4.0,-4.0),tmp_qloop_111); + const __m256d tmp_qloop_123 = _mm256_mul_pd(_mm256_mul_pd(rho_dof_9,tmp_qloop_111),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)); + const __m256d tmp_qloop_124 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(rho_dof_2,_mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),tmp_qloop_109)),_mm256_mul_pd(rho_dof_8,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_116,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_122,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(_mm256_set_pd(-8.0,-8.0,-8.0,-8.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))))),_mm256_mul_pd(rho_dof_4,tmp_qloop_116)),_mm256_mul_pd(rho_dof_6,tmp_qloop_111)),tmp_qloop_120),tmp_qloop_123); + const __m256d tmp_qloop_125 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(rho_dof_3,_mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),tmp_qloop_116)),_mm256_mul_pd(rho_dof_7,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_109,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_122,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(_mm256_set_pd(-8.0,-8.0,-8.0,-8.0),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]))))),_mm256_mul_pd(rho_dof_4,tmp_qloop_109)),_mm256_mul_pd(rho_dof_5,tmp_qloop_111)),tmp_qloop_118),tmp_qloop_119),tmp_qloop_123); + const __m256d tmp_qloop_127 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),tmp_qloop_104); + const __m256d tmp_qloop_128 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])),tmp_qloop_106); + const __m256d tmp_qloop_129 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q])),tmp_qloop_108); + const __m256d tmp_qloop_130 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_110,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_113,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(tmp_qloop_107,_mm256_set_pd(-4.0,-4.0,-4.0,-4.0))),tmp_qloop_116); + const __m256d tmp_qloop_131 = _mm256_mul_pd(tmp_qloop_105,_mm256_set_pd(4.0,4.0,4.0,4.0)); + const __m256d tmp_qloop_132 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_110,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_112,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(tmp_qloop_131,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_109); + const __m256d tmp_qloop_133 = _mm256_mul_pd(tmp_qloop_103,_mm256_set_pd(4.0,4.0,4.0,4.0)); + const __m256d tmp_qloop_134 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_114,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_133,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_111); + const __m256d tmp_qloop_158 = _mm256_mul_pd(tmp_qloop_105,tmp_qloop_111); + const __m256d jac_blending_0_0 = _mm256_mul_pd(tmp_qloop_36,tmp_qloop_38); + const __m256d jac_blending_0_1 = _mm256_mul_pd(tmp_qloop_18,tmp_qloop_44); + const __m256d jac_blending_0_2 = _mm256_mul_pd(tmp_qloop_18,tmp_qloop_50); + const __m256d jac_blending_1_0 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_52); + const __m256d jac_blending_1_1 = _mm256_mul_pd(tmp_qloop_43,tmp_qloop_55); + const __m256d tmp_qloop_63 = _mm256_mul_pd(jac_blending_0_2,jac_blending_1_1); + const __m256d jac_blending_1_2 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_50); + const __m256d tmp_qloop_60 = _mm256_mul_pd(jac_blending_0_1,jac_blending_1_2); + const __m256d jac_blending_2_0 = _mm256_mul_pd(tmp_qloop_22,tmp_qloop_52); + const __m256d jac_blending_2_1 = _mm256_mul_pd(tmp_qloop_22,tmp_qloop_44); + const __m256d tmp_qloop_61 = _mm256_mul_pd(jac_blending_1_2,jac_blending_2_1); + const __m256d jac_blending_2_2 = _mm256_mul_pd(tmp_qloop_43,tmp_qloop_58); + const __m256d tmp_qloop_59 = _mm256_mul_pd(jac_blending_1_1,jac_blending_2_2); + const __m256d tmp_qloop_62 = _mm256_mul_pd(jac_blending_0_1,jac_blending_2_2); + const __m256d tmp_qloop_64 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(jac_blending_0_0,tmp_qloop_59),_mm256_mul_pd(jac_blending_2_0,tmp_qloop_60)),_mm256_mul_pd(_mm256_mul_pd(jac_blending_0_0,tmp_qloop_61),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(_mm256_mul_pd(jac_blending_1_0,tmp_qloop_62),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(_mm256_mul_pd(jac_blending_2_0,tmp_qloop_63),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(_mm256_mul_pd(jac_blending_0_2,jac_blending_1_0),jac_blending_2_1)); + const __m256d tmp_qloop_65 = _mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),tmp_qloop_64); + const __m256d abs_det_jac_blending = tmp_qloop_64; + const __m256d tmp_qloop_135 = _mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(abs_det_jac_blending,_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(rho_dof_0,tmp_qloop_115),_mm256_mul_pd(rho_dof_1,tmp_qloop_127)),_mm256_mul_pd(rho_dof_2,tmp_qloop_128)),_mm256_mul_pd(rho_dof_3,tmp_qloop_129)),_mm256_mul_pd(rho_dof_4,tmp_qloop_110)),_mm256_mul_pd(rho_dof_5,tmp_qloop_113)),_mm256_mul_pd(rho_dof_6,tmp_qloop_112)),_mm256_mul_pd(rho_dof_7,tmp_qloop_130)),_mm256_mul_pd(rho_dof_8,tmp_qloop_132)),_mm256_mul_pd(rho_dof_9,tmp_qloop_134)))),_mm256_set_pd(_data_q_w[q],_data_q_w[q],_data_q_w[q],_data_q_w[q])),_mm256_set_pd(abs_det_jac_affine_GREEN_DOWN,abs_det_jac_affine_GREEN_DOWN,abs_det_jac_affine_GREEN_DOWN,abs_det_jac_affine_GREEN_DOWN)); + const __m256d tmp_qloop_136 = _mm256_mul_pd(tmp_qloop_135,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]))),_mm256_set_pd(1.0,1.0,1.0,1.0))); + const __m256d tmp_qloop_142 = _mm256_mul_pd(tmp_qloop_135,_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])); + const __m256d tmp_qloop_144 = _mm256_mul_pd(tmp_qloop_135,_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q])); + const __m256d tmp_qloop_147 = _mm256_mul_pd(tmp_qloop_135,_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])); + const __m256d tmp_qloop_159 = _mm256_mul_pd(tmp_qloop_135,tmp_qloop_158); + const __m256d tmp_qloop_161 = _mm256_mul_pd(tmp_qloop_107,tmp_qloop_135); + const __m256d jac_blending_inv_0_0 = _mm256_mul_pd(tmp_qloop_65,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_61,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),tmp_qloop_59)); + const __m256d jac_blending_inv_0_1 = _mm256_mul_pd(tmp_qloop_65,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_62,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(jac_blending_0_2,jac_blending_2_1))); + const __m256d jac_blending_inv_0_2 = _mm256_mul_pd(tmp_qloop_65,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_63,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),tmp_qloop_60)); + const __m256d jac_blending_inv_1_0 = _mm256_mul_pd(tmp_qloop_65,_mm256_add_pd(_mm256_mul_pd(jac_blending_1_2,jac_blending_2_0),_mm256_mul_pd(_mm256_mul_pd(jac_blending_1_0,jac_blending_2_2),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))); + const __m256d jac_blending_inv_1_1 = _mm256_mul_pd(tmp_qloop_65,_mm256_add_pd(_mm256_mul_pd(jac_blending_0_0,jac_blending_2_2),_mm256_mul_pd(_mm256_mul_pd(jac_blending_0_2,jac_blending_2_0),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))); + const __m256d jac_blending_inv_1_2 = _mm256_mul_pd(tmp_qloop_65,_mm256_add_pd(_mm256_mul_pd(jac_blending_0_2,jac_blending_1_0),_mm256_mul_pd(_mm256_mul_pd(jac_blending_0_0,jac_blending_1_2),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))); + const __m256d jac_blending_inv_2_0 = _mm256_mul_pd(tmp_qloop_65,_mm256_add_pd(_mm256_mul_pd(jac_blending_1_0,jac_blending_2_1),_mm256_mul_pd(_mm256_mul_pd(jac_blending_1_1,jac_blending_2_0),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))); + const __m256d tmp_qloop_126 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_121,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,_mm256_set_pd(jac_affine_inv_0_0_GREEN_DOWN,jac_affine_inv_0_0_GREEN_DOWN,jac_affine_inv_0_0_GREEN_DOWN,jac_affine_inv_0_0_GREEN_DOWN)),_mm256_mul_pd(jac_blending_inv_1_0,_mm256_set_pd(jac_affine_inv_0_1_GREEN_DOWN,jac_affine_inv_0_1_GREEN_DOWN,jac_affine_inv_0_1_GREEN_DOWN,jac_affine_inv_0_1_GREEN_DOWN))),_mm256_mul_pd(jac_blending_inv_2_0,_mm256_set_pd(jac_affine_inv_0_2_GREEN_DOWN,jac_affine_inv_0_2_GREEN_DOWN,jac_affine_inv_0_2_GREEN_DOWN,jac_affine_inv_0_2_GREEN_DOWN)))),_mm256_mul_pd(tmp_qloop_124,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,_mm256_set_pd(jac_affine_inv_1_0_GREEN_DOWN,jac_affine_inv_1_0_GREEN_DOWN,jac_affine_inv_1_0_GREEN_DOWN,jac_affine_inv_1_0_GREEN_DOWN)),_mm256_mul_pd(jac_blending_inv_1_0,_mm256_set_pd(jac_affine_inv_1_1_GREEN_DOWN,jac_affine_inv_1_1_GREEN_DOWN,jac_affine_inv_1_1_GREEN_DOWN,jac_affine_inv_1_1_GREEN_DOWN))),_mm256_mul_pd(jac_blending_inv_2_0,_mm256_set_pd(jac_affine_inv_1_2_GREEN_DOWN,jac_affine_inv_1_2_GREEN_DOWN,jac_affine_inv_1_2_GREEN_DOWN,jac_affine_inv_1_2_GREEN_DOWN))))),_mm256_mul_pd(tmp_qloop_125,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,_mm256_set_pd(jac_affine_inv_2_0_GREEN_DOWN,jac_affine_inv_2_0_GREEN_DOWN,jac_affine_inv_2_0_GREEN_DOWN,jac_affine_inv_2_0_GREEN_DOWN)),_mm256_mul_pd(jac_blending_inv_1_0,_mm256_set_pd(jac_affine_inv_2_1_GREEN_DOWN,jac_affine_inv_2_1_GREEN_DOWN,jac_affine_inv_2_1_GREEN_DOWN,jac_affine_inv_2_1_GREEN_DOWN))),_mm256_mul_pd(jac_blending_inv_2_0,_mm256_set_pd(jac_affine_inv_2_2_GREEN_DOWN,jac_affine_inv_2_2_GREEN_DOWN,jac_affine_inv_2_2_GREEN_DOWN,jac_affine_inv_2_2_GREEN_DOWN))))); + const __m256d tmp_qloop_137 = _mm256_mul_pd(tmp_qloop_126,tmp_qloop_136); + const __m256d tmp_qloop_143 = _mm256_mul_pd(tmp_qloop_126,tmp_qloop_142); + const __m256d tmp_qloop_145 = _mm256_mul_pd(tmp_qloop_126,tmp_qloop_144); + const __m256d tmp_qloop_146 = _mm256_mul_pd(tmp_qloop_112,tmp_qloop_145); + const __m256d tmp_qloop_148 = _mm256_mul_pd(tmp_qloop_126,tmp_qloop_147); + const __m256d tmp_qloop_157 = _mm256_mul_pd(tmp_qloop_126,tmp_qloop_135); + const __m256d tmp_qloop_160 = _mm256_mul_pd(tmp_qloop_107,tmp_qloop_157); + const __m256d jac_blending_inv_2_1 = _mm256_mul_pd(tmp_qloop_65,_mm256_add_pd(_mm256_mul_pd(jac_blending_0_1,jac_blending_2_0),_mm256_mul_pd(_mm256_mul_pd(jac_blending_0_0,jac_blending_2_1),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))); + const __m256d tmp_qloop_138 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_121,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,_mm256_set_pd(jac_affine_inv_0_0_GREEN_DOWN,jac_affine_inv_0_0_GREEN_DOWN,jac_affine_inv_0_0_GREEN_DOWN,jac_affine_inv_0_0_GREEN_DOWN)),_mm256_mul_pd(jac_blending_inv_1_1,_mm256_set_pd(jac_affine_inv_0_1_GREEN_DOWN,jac_affine_inv_0_1_GREEN_DOWN,jac_affine_inv_0_1_GREEN_DOWN,jac_affine_inv_0_1_GREEN_DOWN))),_mm256_mul_pd(jac_blending_inv_2_1,_mm256_set_pd(jac_affine_inv_0_2_GREEN_DOWN,jac_affine_inv_0_2_GREEN_DOWN,jac_affine_inv_0_2_GREEN_DOWN,jac_affine_inv_0_2_GREEN_DOWN)))),_mm256_mul_pd(tmp_qloop_124,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,_mm256_set_pd(jac_affine_inv_1_0_GREEN_DOWN,jac_affine_inv_1_0_GREEN_DOWN,jac_affine_inv_1_0_GREEN_DOWN,jac_affine_inv_1_0_GREEN_DOWN)),_mm256_mul_pd(jac_blending_inv_1_1,_mm256_set_pd(jac_affine_inv_1_1_GREEN_DOWN,jac_affine_inv_1_1_GREEN_DOWN,jac_affine_inv_1_1_GREEN_DOWN,jac_affine_inv_1_1_GREEN_DOWN))),_mm256_mul_pd(jac_blending_inv_2_1,_mm256_set_pd(jac_affine_inv_1_2_GREEN_DOWN,jac_affine_inv_1_2_GREEN_DOWN,jac_affine_inv_1_2_GREEN_DOWN,jac_affine_inv_1_2_GREEN_DOWN))))),_mm256_mul_pd(tmp_qloop_125,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,_mm256_set_pd(jac_affine_inv_2_0_GREEN_DOWN,jac_affine_inv_2_0_GREEN_DOWN,jac_affine_inv_2_0_GREEN_DOWN,jac_affine_inv_2_0_GREEN_DOWN)),_mm256_mul_pd(jac_blending_inv_1_1,_mm256_set_pd(jac_affine_inv_2_1_GREEN_DOWN,jac_affine_inv_2_1_GREEN_DOWN,jac_affine_inv_2_1_GREEN_DOWN,jac_affine_inv_2_1_GREEN_DOWN))),_mm256_mul_pd(jac_blending_inv_2_1,_mm256_set_pd(jac_affine_inv_2_2_GREEN_DOWN,jac_affine_inv_2_2_GREEN_DOWN,jac_affine_inv_2_2_GREEN_DOWN,jac_affine_inv_2_2_GREEN_DOWN))))); + const __m256d tmp_qloop_139 = _mm256_mul_pd(tmp_qloop_136,tmp_qloop_138); + const __m256d tmp_qloop_149 = _mm256_mul_pd(tmp_qloop_138,tmp_qloop_142); + const __m256d tmp_qloop_150 = _mm256_mul_pd(tmp_qloop_138,tmp_qloop_144); + const __m256d tmp_qloop_151 = _mm256_mul_pd(tmp_qloop_112,tmp_qloop_150); + const __m256d tmp_qloop_152 = _mm256_mul_pd(tmp_qloop_138,tmp_qloop_147); + const __m256d tmp_qloop_162 = _mm256_mul_pd(tmp_qloop_138,tmp_qloop_161); + const __m256d jac_blending_inv_2_2 = _mm256_mul_pd(tmp_qloop_65,_mm256_add_pd(_mm256_mul_pd(jac_blending_0_0,jac_blending_1_1),_mm256_mul_pd(_mm256_mul_pd(jac_blending_0_1,jac_blending_1_0),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))); + const __m256d tmp_qloop_140 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_121,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_2,_mm256_set_pd(jac_affine_inv_0_0_GREEN_DOWN,jac_affine_inv_0_0_GREEN_DOWN,jac_affine_inv_0_0_GREEN_DOWN,jac_affine_inv_0_0_GREEN_DOWN)),_mm256_mul_pd(jac_blending_inv_1_2,_mm256_set_pd(jac_affine_inv_0_1_GREEN_DOWN,jac_affine_inv_0_1_GREEN_DOWN,jac_affine_inv_0_1_GREEN_DOWN,jac_affine_inv_0_1_GREEN_DOWN))),_mm256_mul_pd(jac_blending_inv_2_2,_mm256_set_pd(jac_affine_inv_0_2_GREEN_DOWN,jac_affine_inv_0_2_GREEN_DOWN,jac_affine_inv_0_2_GREEN_DOWN,jac_affine_inv_0_2_GREEN_DOWN)))),_mm256_mul_pd(tmp_qloop_124,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_2,_mm256_set_pd(jac_affine_inv_1_0_GREEN_DOWN,jac_affine_inv_1_0_GREEN_DOWN,jac_affine_inv_1_0_GREEN_DOWN,jac_affine_inv_1_0_GREEN_DOWN)),_mm256_mul_pd(jac_blending_inv_1_2,_mm256_set_pd(jac_affine_inv_1_1_GREEN_DOWN,jac_affine_inv_1_1_GREEN_DOWN,jac_affine_inv_1_1_GREEN_DOWN,jac_affine_inv_1_1_GREEN_DOWN))),_mm256_mul_pd(jac_blending_inv_2_2,_mm256_set_pd(jac_affine_inv_1_2_GREEN_DOWN,jac_affine_inv_1_2_GREEN_DOWN,jac_affine_inv_1_2_GREEN_DOWN,jac_affine_inv_1_2_GREEN_DOWN))))),_mm256_mul_pd(tmp_qloop_125,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_2,_mm256_set_pd(jac_affine_inv_2_0_GREEN_DOWN,jac_affine_inv_2_0_GREEN_DOWN,jac_affine_inv_2_0_GREEN_DOWN,jac_affine_inv_2_0_GREEN_DOWN)),_mm256_mul_pd(jac_blending_inv_1_2,_mm256_set_pd(jac_affine_inv_2_1_GREEN_DOWN,jac_affine_inv_2_1_GREEN_DOWN,jac_affine_inv_2_1_GREEN_DOWN,jac_affine_inv_2_1_GREEN_DOWN))),_mm256_mul_pd(jac_blending_inv_2_2,_mm256_set_pd(jac_affine_inv_2_2_GREEN_DOWN,jac_affine_inv_2_2_GREEN_DOWN,jac_affine_inv_2_2_GREEN_DOWN,jac_affine_inv_2_2_GREEN_DOWN))))); + const __m256d tmp_qloop_141 = _mm256_mul_pd(tmp_qloop_136,tmp_qloop_140); + const __m256d tmp_qloop_153 = _mm256_mul_pd(tmp_qloop_140,tmp_qloop_142); + const __m256d tmp_qloop_154 = _mm256_mul_pd(tmp_qloop_140,tmp_qloop_144); + const __m256d tmp_qloop_155 = _mm256_mul_pd(tmp_qloop_112,tmp_qloop_154); + const __m256d tmp_qloop_156 = _mm256_mul_pd(tmp_qloop_140,tmp_qloop_147); + const __m256d tmp_qloop_163 = _mm256_mul_pd(tmp_qloop_140,tmp_qloop_161); + const __m256d hessian_blending_0_0_0 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_18,tmp_qloop_67),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_37,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_31,_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_qloop_68,tmp_qloop_68,tmp_qloop_68,tmp_qloop_68)),_mm256_set_pd(tmp_qloop_33,tmp_qloop_33,tmp_qloop_33,tmp_qloop_33))),_mm256_mul_pd(tmp_qloop_19,_mm256_set_pd(tmp_qloop_68,tmp_qloop_68,tmp_qloop_68,tmp_qloop_68))),_mm256_mul_pd(tmp_qloop_35,tmp_qloop_69)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_28,tmp_qloop_69),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17))); + const __m256d hessian_blending_1_0_0 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_25,tmp_qloop_67),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_37,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_19,_mm256_set_pd(tmp_qloop_40,tmp_qloop_40,tmp_qloop_40,tmp_qloop_40)),_mm256_mul_pd(tmp_qloop_25,tmp_qloop_71)),tmp_qloop_70)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17))); + const __m256d hessian_blending_2_0_0 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_22,tmp_qloop_67),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_37,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_19,_mm256_set_pd(tmp_qloop_72,tmp_qloop_72,tmp_qloop_72,tmp_qloop_72)),_mm256_mul_pd(tmp_qloop_22,tmp_qloop_71)),tmp_qloop_73)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17))); + const __m256d hessian_blending_0_0_1 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_18,tmp_qloop_78),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_25,tmp_qloop_37),tmp_qloop_75),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17))); + const __m256d hessian_blending_1_0_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_80,tmp_qloop_81),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_29,tmp_qloop_77),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_52); + const __m256d hessian_blending_2_0_1 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_81,tmp_qloop_83),tmp_qloop_84); + const __m256d hessian_blending_0_0_2 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_77,tmp_qloop_85),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_22,tmp_qloop_37),tmp_qloop_75),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17))); + const __m256d hessian_blending_1_0_2 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_80,tmp_qloop_86),tmp_qloop_84); + const __m256d hessian_blending_2_0_2 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_83,tmp_qloop_86),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_30,tmp_qloop_77),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_52); + const __m256d hessian_blending_0_1_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_87,tmp_qloop_88),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_19,tmp_qloop_89),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_44); + const __m256d hessian_blending_1_1_0 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_18,tmp_qloop_91),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_18,tmp_qloop_37),tmp_qloop_90),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17))); + const __m256d hessian_blending_2_1_0 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_93,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_88,tmp_qloop_92)); + const __m256d hessian_blending_0_1_1 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_18,tmp_qloop_95),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_37,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_29,_mm256_set_pd(tmp_qloop_68,tmp_qloop_68,tmp_qloop_68,tmp_qloop_68)),_mm256_mul_pd(tmp_qloop_54,tmp_qloop_69)),tmp_qloop_94)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17))); + const __m256d hessian_blending_1_1_1 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_25,tmp_qloop_95),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_37,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_31,_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_qloop_40,tmp_qloop_40,tmp_qloop_40,tmp_qloop_40)),_mm256_set_pd(tmp_qloop_53,tmp_qloop_53,tmp_qloop_53,tmp_qloop_53))),_mm256_mul_pd(tmp_qloop_29,_mm256_set_pd(tmp_qloop_40,tmp_qloop_40,tmp_qloop_40,tmp_qloop_40))),_mm256_mul_pd(tmp_qloop_47,tmp_qloop_79)),_mm256_mul_pd(tmp_qloop_54,tmp_qloop_79))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17))); + const __m256d hessian_blending_2_1_1 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_22,tmp_qloop_95),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_37,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_29,_mm256_set_pd(tmp_qloop_72,tmp_qloop_72,tmp_qloop_72,tmp_qloop_72)),_mm256_mul_pd(tmp_qloop_54,tmp_qloop_82)),tmp_qloop_73)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17))); + const __m256d hessian_blending_0_1_2 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_93,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_22,tmp_qloop_37),tmp_qloop_87),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17))); + const __m256d hessian_blending_1_1_2 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_22,tmp_qloop_91),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_22,tmp_qloop_37),tmp_qloop_90),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17))); + const __m256d hessian_blending_2_1_2 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_86,tmp_qloop_92),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_30,tmp_qloop_89),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_44); + const __m256d hessian_blending_0_2_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_88,tmp_qloop_96),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_19,tmp_qloop_97),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_50); + const __m256d hessian_blending_1_2_0 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_100,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_18,tmp_qloop_37),tmp_qloop_98),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17))); + const __m256d hessian_blending_2_2_0 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_85,tmp_qloop_97),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_101,tmp_qloop_18),tmp_qloop_37),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17))); + const __m256d hessian_blending_0_2_1 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_100,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_25,tmp_qloop_37),tmp_qloop_96),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17))); + const __m256d hessian_blending_1_2_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_81,tmp_qloop_98),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_29,tmp_qloop_97),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_50); + const __m256d hessian_blending_2_2_1 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_22,tmp_qloop_99),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_101,tmp_qloop_25),tmp_qloop_37),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17))); + const __m256d hessian_blending_0_2_2 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_102,tmp_qloop_18),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_37,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_30,_mm256_set_pd(tmp_qloop_68,tmp_qloop_68,tmp_qloop_68,tmp_qloop_68)),_mm256_mul_pd(tmp_qloop_57,tmp_qloop_69)),tmp_qloop_94)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17))); + const __m256d hessian_blending_1_2_2 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_102,tmp_qloop_25),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_37,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_30,_mm256_set_pd(tmp_qloop_40,tmp_qloop_40,tmp_qloop_40,tmp_qloop_40)),_mm256_mul_pd(tmp_qloop_57,tmp_qloop_79)),tmp_qloop_70)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17))); + const __m256d hessian_blending_2_2_2 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_102,tmp_qloop_22),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_37,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_31,_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_qloop_72,tmp_qloop_72,tmp_qloop_72,tmp_qloop_72)),_mm256_set_pd(tmp_qloop_46,tmp_qloop_46,tmp_qloop_46,tmp_qloop_46))),_mm256_mul_pd(tmp_qloop_48,_mm256_set_pd(2.0,2.0,2.0,2.0))),_mm256_mul_pd(tmp_qloop_30,_mm256_set_pd(tmp_qloop_72,tmp_qloop_72,tmp_qloop_72,tmp_qloop_72))),_mm256_mul_pd(tmp_qloop_57,tmp_qloop_82))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17))); + const __m256d q_tmp_0_0 = _mm256_mul_pd(tmp_qloop_115,tmp_qloop_137); + const __m256d q_tmp_0_1 = _mm256_mul_pd(tmp_qloop_127,tmp_qloop_137); + const __m256d q_tmp_0_2 = _mm256_mul_pd(tmp_qloop_128,tmp_qloop_137); + const __m256d q_tmp_0_3 = _mm256_mul_pd(tmp_qloop_129,tmp_qloop_137); + const __m256d q_tmp_0_4 = _mm256_mul_pd(tmp_qloop_110,tmp_qloop_137); + const __m256d q_tmp_0_5 = _mm256_mul_pd(tmp_qloop_113,tmp_qloop_137); + const __m256d q_tmp_0_6 = _mm256_mul_pd(tmp_qloop_112,tmp_qloop_137); + const __m256d q_tmp_0_7 = _mm256_mul_pd(tmp_qloop_130,tmp_qloop_137); + const __m256d q_tmp_0_8 = _mm256_mul_pd(tmp_qloop_132,tmp_qloop_137); + const __m256d q_tmp_0_9 = _mm256_mul_pd(tmp_qloop_134,tmp_qloop_137); + const __m256d q_tmp_0_10 = _mm256_mul_pd(tmp_qloop_115,tmp_qloop_139); + const __m256d q_tmp_0_11 = _mm256_mul_pd(tmp_qloop_127,tmp_qloop_139); + const __m256d q_tmp_0_12 = _mm256_mul_pd(tmp_qloop_128,tmp_qloop_139); + const __m256d q_tmp_0_13 = _mm256_mul_pd(tmp_qloop_129,tmp_qloop_139); + const __m256d q_tmp_0_14 = _mm256_mul_pd(tmp_qloop_110,tmp_qloop_139); + const __m256d q_tmp_0_15 = _mm256_mul_pd(tmp_qloop_113,tmp_qloop_139); + const __m256d q_tmp_0_16 = _mm256_mul_pd(tmp_qloop_112,tmp_qloop_139); + const __m256d q_tmp_0_17 = _mm256_mul_pd(tmp_qloop_130,tmp_qloop_139); + const __m256d q_tmp_0_18 = _mm256_mul_pd(tmp_qloop_132,tmp_qloop_139); + const __m256d q_tmp_0_19 = _mm256_mul_pd(tmp_qloop_134,tmp_qloop_139); + const __m256d q_tmp_0_20 = _mm256_mul_pd(tmp_qloop_115,tmp_qloop_141); + const __m256d q_tmp_0_21 = _mm256_mul_pd(tmp_qloop_127,tmp_qloop_141); + const __m256d q_tmp_0_22 = _mm256_mul_pd(tmp_qloop_128,tmp_qloop_141); + const __m256d q_tmp_0_23 = _mm256_mul_pd(tmp_qloop_129,tmp_qloop_141); + const __m256d q_tmp_0_24 = _mm256_mul_pd(tmp_qloop_110,tmp_qloop_141); + const __m256d q_tmp_0_25 = _mm256_mul_pd(tmp_qloop_113,tmp_qloop_141); + const __m256d q_tmp_0_26 = _mm256_mul_pd(tmp_qloop_112,tmp_qloop_141); + const __m256d q_tmp_0_27 = _mm256_mul_pd(tmp_qloop_130,tmp_qloop_141); + const __m256d q_tmp_0_28 = _mm256_mul_pd(tmp_qloop_132,tmp_qloop_141); + const __m256d q_tmp_0_29 = _mm256_mul_pd(tmp_qloop_134,tmp_qloop_141); + const __m256d q_tmp_1_0 = _mm256_mul_pd(tmp_qloop_115,tmp_qloop_143); + const __m256d q_tmp_1_1 = _mm256_mul_pd(tmp_qloop_127,tmp_qloop_143); + const __m256d q_tmp_1_2 = _mm256_mul_pd(tmp_qloop_128,tmp_qloop_143); + const __m256d q_tmp_1_3 = _mm256_mul_pd(tmp_qloop_129,tmp_qloop_143); + const __m256d q_tmp_1_4 = tmp_qloop_146; + const __m256d q_tmp_1_5 = _mm256_mul_pd(tmp_qloop_133,tmp_qloop_145); + const __m256d q_tmp_1_6 = _mm256_mul_pd(tmp_qloop_133,tmp_qloop_148); + const __m256d q_tmp_1_7 = _mm256_mul_pd(tmp_qloop_130,tmp_qloop_143); + const __m256d q_tmp_1_8 = _mm256_mul_pd(tmp_qloop_132,tmp_qloop_143); + const __m256d q_tmp_1_9 = _mm256_mul_pd(tmp_qloop_134,tmp_qloop_143); + const __m256d q_tmp_1_10 = _mm256_mul_pd(tmp_qloop_115,tmp_qloop_149); + const __m256d q_tmp_1_11 = _mm256_mul_pd(tmp_qloop_127,tmp_qloop_149); + const __m256d q_tmp_1_12 = _mm256_mul_pd(tmp_qloop_128,tmp_qloop_149); + const __m256d q_tmp_1_13 = _mm256_mul_pd(tmp_qloop_129,tmp_qloop_149); + const __m256d q_tmp_1_14 = tmp_qloop_151; + const __m256d q_tmp_1_15 = _mm256_mul_pd(tmp_qloop_133,tmp_qloop_150); + const __m256d q_tmp_1_16 = _mm256_mul_pd(tmp_qloop_133,tmp_qloop_152); + const __m256d q_tmp_1_17 = _mm256_mul_pd(tmp_qloop_130,tmp_qloop_149); + const __m256d q_tmp_1_18 = _mm256_mul_pd(tmp_qloop_132,tmp_qloop_149); + const __m256d q_tmp_1_19 = _mm256_mul_pd(tmp_qloop_134,tmp_qloop_149); + const __m256d q_tmp_1_20 = _mm256_mul_pd(tmp_qloop_115,tmp_qloop_153); + const __m256d q_tmp_1_21 = _mm256_mul_pd(tmp_qloop_127,tmp_qloop_153); + const __m256d q_tmp_1_22 = _mm256_mul_pd(tmp_qloop_128,tmp_qloop_153); + const __m256d q_tmp_1_23 = _mm256_mul_pd(tmp_qloop_129,tmp_qloop_153); + const __m256d q_tmp_1_24 = tmp_qloop_155; + const __m256d q_tmp_1_25 = _mm256_mul_pd(tmp_qloop_133,tmp_qloop_154); + const __m256d q_tmp_1_26 = _mm256_mul_pd(tmp_qloop_133,tmp_qloop_156); + const __m256d q_tmp_1_27 = _mm256_mul_pd(tmp_qloop_130,tmp_qloop_153); + const __m256d q_tmp_1_28 = _mm256_mul_pd(tmp_qloop_132,tmp_qloop_153); + const __m256d q_tmp_1_29 = _mm256_mul_pd(tmp_qloop_134,tmp_qloop_153); + const __m256d q_tmp_2_0 = _mm256_mul_pd(tmp_qloop_115,tmp_qloop_148); + const __m256d q_tmp_2_1 = _mm256_mul_pd(tmp_qloop_127,tmp_qloop_148); + const __m256d q_tmp_2_2 = _mm256_mul_pd(tmp_qloop_128,tmp_qloop_148); + const __m256d q_tmp_2_3 = _mm256_mul_pd(tmp_qloop_129,tmp_qloop_148); + const __m256d q_tmp_2_4 = _mm256_mul_pd(tmp_qloop_131,tmp_qloop_145); + const __m256d q_tmp_2_5 = tmp_qloop_146; + const __m256d q_tmp_2_6 = _mm256_mul_pd(tmp_qloop_157,tmp_qloop_158); + const __m256d q_tmp_2_7 = _mm256_mul_pd(tmp_qloop_130,tmp_qloop_148); + const __m256d q_tmp_2_8 = _mm256_mul_pd(tmp_qloop_132,tmp_qloop_148); + const __m256d q_tmp_2_9 = _mm256_mul_pd(tmp_qloop_134,tmp_qloop_148); + const __m256d q_tmp_2_10 = _mm256_mul_pd(tmp_qloop_115,tmp_qloop_152); + const __m256d q_tmp_2_11 = _mm256_mul_pd(tmp_qloop_127,tmp_qloop_152); + const __m256d q_tmp_2_12 = _mm256_mul_pd(tmp_qloop_128,tmp_qloop_152); + const __m256d q_tmp_2_13 = _mm256_mul_pd(tmp_qloop_129,tmp_qloop_152); + const __m256d q_tmp_2_14 = _mm256_mul_pd(tmp_qloop_131,tmp_qloop_150); + const __m256d q_tmp_2_15 = tmp_qloop_151; + const __m256d q_tmp_2_16 = _mm256_mul_pd(tmp_qloop_138,tmp_qloop_159); + const __m256d q_tmp_2_17 = _mm256_mul_pd(tmp_qloop_130,tmp_qloop_152); + const __m256d q_tmp_2_18 = _mm256_mul_pd(tmp_qloop_132,tmp_qloop_152); + const __m256d q_tmp_2_19 = _mm256_mul_pd(tmp_qloop_134,tmp_qloop_152); + const __m256d q_tmp_2_20 = _mm256_mul_pd(tmp_qloop_115,tmp_qloop_156); + const __m256d q_tmp_2_21 = _mm256_mul_pd(tmp_qloop_127,tmp_qloop_156); + const __m256d q_tmp_2_22 = _mm256_mul_pd(tmp_qloop_128,tmp_qloop_156); + const __m256d q_tmp_2_23 = _mm256_mul_pd(tmp_qloop_129,tmp_qloop_156); + const __m256d q_tmp_2_24 = _mm256_mul_pd(tmp_qloop_131,tmp_qloop_154); + const __m256d q_tmp_2_25 = tmp_qloop_155; + const __m256d q_tmp_2_26 = _mm256_mul_pd(tmp_qloop_140,tmp_qloop_159); + const __m256d q_tmp_2_27 = _mm256_mul_pd(tmp_qloop_130,tmp_qloop_156); + const __m256d q_tmp_2_28 = _mm256_mul_pd(tmp_qloop_132,tmp_qloop_156); + const __m256d q_tmp_2_29 = _mm256_mul_pd(tmp_qloop_134,tmp_qloop_156); + const __m256d q_tmp_3_0 = _mm256_mul_pd(tmp_qloop_115,tmp_qloop_145); + const __m256d q_tmp_3_1 = _mm256_mul_pd(tmp_qloop_127,tmp_qloop_145); + const __m256d q_tmp_3_2 = _mm256_mul_pd(tmp_qloop_128,tmp_qloop_145); + const __m256d q_tmp_3_3 = _mm256_mul_pd(tmp_qloop_129,tmp_qloop_145); + const __m256d q_tmp_3_4 = _mm256_mul_pd(tmp_qloop_109,tmp_qloop_160); + const __m256d q_tmp_3_5 = _mm256_mul_pd(tmp_qloop_111,tmp_qloop_160); + const __m256d q_tmp_3_6 = tmp_qloop_146; + const __m256d q_tmp_3_7 = _mm256_mul_pd(tmp_qloop_130,tmp_qloop_145); + const __m256d q_tmp_3_8 = _mm256_mul_pd(tmp_qloop_132,tmp_qloop_145); + const __m256d q_tmp_3_9 = _mm256_mul_pd(tmp_qloop_134,tmp_qloop_145); + const __m256d q_tmp_3_10 = _mm256_mul_pd(tmp_qloop_115,tmp_qloop_150); + const __m256d q_tmp_3_11 = _mm256_mul_pd(tmp_qloop_127,tmp_qloop_150); + const __m256d q_tmp_3_12 = _mm256_mul_pd(tmp_qloop_128,tmp_qloop_150); + const __m256d q_tmp_3_13 = _mm256_mul_pd(tmp_qloop_129,tmp_qloop_150); + const __m256d q_tmp_3_14 = _mm256_mul_pd(tmp_qloop_109,tmp_qloop_162); + const __m256d q_tmp_3_15 = _mm256_mul_pd(tmp_qloop_111,tmp_qloop_162); + const __m256d q_tmp_3_16 = tmp_qloop_151; + const __m256d q_tmp_3_17 = _mm256_mul_pd(tmp_qloop_130,tmp_qloop_150); + const __m256d q_tmp_3_18 = _mm256_mul_pd(tmp_qloop_132,tmp_qloop_150); + const __m256d q_tmp_3_19 = _mm256_mul_pd(tmp_qloop_134,tmp_qloop_150); + const __m256d q_tmp_3_20 = _mm256_mul_pd(tmp_qloop_115,tmp_qloop_154); + const __m256d q_tmp_3_21 = _mm256_mul_pd(tmp_qloop_127,tmp_qloop_154); + const __m256d q_tmp_3_22 = _mm256_mul_pd(tmp_qloop_128,tmp_qloop_154); + const __m256d q_tmp_3_23 = _mm256_mul_pd(tmp_qloop_129,tmp_qloop_154); + const __m256d q_tmp_3_24 = _mm256_mul_pd(tmp_qloop_109,tmp_qloop_163); + const __m256d q_tmp_3_25 = _mm256_mul_pd(tmp_qloop_111,tmp_qloop_163); + const __m256d q_tmp_3_26 = tmp_qloop_155; + const __m256d q_tmp_3_27 = _mm256_mul_pd(tmp_qloop_130,tmp_qloop_154); + const __m256d q_tmp_3_28 = _mm256_mul_pd(tmp_qloop_132,tmp_qloop_154); + const __m256d q_tmp_3_29 = _mm256_mul_pd(tmp_qloop_134,tmp_qloop_154); + q_acc_0_0 = _mm256_add_pd(q_acc_0_0,q_tmp_0_0); + q_acc_0_1 = _mm256_add_pd(q_acc_0_1,q_tmp_0_1); + q_acc_0_2 = _mm256_add_pd(q_acc_0_2,q_tmp_0_2); + q_acc_0_3 = _mm256_add_pd(q_acc_0_3,q_tmp_0_3); + q_acc_0_4 = _mm256_add_pd(q_acc_0_4,q_tmp_0_4); + q_acc_0_5 = _mm256_add_pd(q_acc_0_5,q_tmp_0_5); + q_acc_0_6 = _mm256_add_pd(q_acc_0_6,q_tmp_0_6); + q_acc_0_7 = _mm256_add_pd(q_acc_0_7,q_tmp_0_7); + q_acc_0_8 = _mm256_add_pd(q_acc_0_8,q_tmp_0_8); + q_acc_0_9 = _mm256_add_pd(q_acc_0_9,q_tmp_0_9); + q_acc_0_10 = _mm256_add_pd(q_acc_0_10,q_tmp_0_10); + q_acc_0_11 = _mm256_add_pd(q_acc_0_11,q_tmp_0_11); + q_acc_0_12 = _mm256_add_pd(q_acc_0_12,q_tmp_0_12); + q_acc_0_13 = _mm256_add_pd(q_acc_0_13,q_tmp_0_13); + q_acc_0_14 = _mm256_add_pd(q_acc_0_14,q_tmp_0_14); + q_acc_0_15 = _mm256_add_pd(q_acc_0_15,q_tmp_0_15); + q_acc_0_16 = _mm256_add_pd(q_acc_0_16,q_tmp_0_16); + q_acc_0_17 = _mm256_add_pd(q_acc_0_17,q_tmp_0_17); + q_acc_0_18 = _mm256_add_pd(q_acc_0_18,q_tmp_0_18); + q_acc_0_19 = _mm256_add_pd(q_acc_0_19,q_tmp_0_19); + q_acc_0_20 = _mm256_add_pd(q_acc_0_20,q_tmp_0_20); + q_acc_0_21 = _mm256_add_pd(q_acc_0_21,q_tmp_0_21); + q_acc_0_22 = _mm256_add_pd(q_acc_0_22,q_tmp_0_22); + q_acc_0_23 = _mm256_add_pd(q_acc_0_23,q_tmp_0_23); + q_acc_0_24 = _mm256_add_pd(q_acc_0_24,q_tmp_0_24); + q_acc_0_25 = _mm256_add_pd(q_acc_0_25,q_tmp_0_25); + q_acc_0_26 = _mm256_add_pd(q_acc_0_26,q_tmp_0_26); + q_acc_0_27 = _mm256_add_pd(q_acc_0_27,q_tmp_0_27); + q_acc_0_28 = _mm256_add_pd(q_acc_0_28,q_tmp_0_28); + q_acc_0_29 = _mm256_add_pd(q_acc_0_29,q_tmp_0_29); + q_acc_1_0 = _mm256_add_pd(q_acc_1_0,q_tmp_1_0); + q_acc_1_1 = _mm256_add_pd(q_acc_1_1,q_tmp_1_1); + q_acc_1_2 = _mm256_add_pd(q_acc_1_2,q_tmp_1_2); + q_acc_1_3 = _mm256_add_pd(q_acc_1_3,q_tmp_1_3); + q_acc_1_4 = _mm256_add_pd(q_acc_1_4,q_tmp_1_4); + q_acc_1_5 = _mm256_add_pd(q_acc_1_5,q_tmp_1_5); + q_acc_1_6 = _mm256_add_pd(q_acc_1_6,q_tmp_1_6); + q_acc_1_7 = _mm256_add_pd(q_acc_1_7,q_tmp_1_7); + q_acc_1_8 = _mm256_add_pd(q_acc_1_8,q_tmp_1_8); + q_acc_1_9 = _mm256_add_pd(q_acc_1_9,q_tmp_1_9); + q_acc_1_10 = _mm256_add_pd(q_acc_1_10,q_tmp_1_10); + q_acc_1_11 = _mm256_add_pd(q_acc_1_11,q_tmp_1_11); + q_acc_1_12 = _mm256_add_pd(q_acc_1_12,q_tmp_1_12); + q_acc_1_13 = _mm256_add_pd(q_acc_1_13,q_tmp_1_13); + q_acc_1_14 = _mm256_add_pd(q_acc_1_14,q_tmp_1_14); + q_acc_1_15 = _mm256_add_pd(q_acc_1_15,q_tmp_1_15); + q_acc_1_16 = _mm256_add_pd(q_acc_1_16,q_tmp_1_16); + q_acc_1_17 = _mm256_add_pd(q_acc_1_17,q_tmp_1_17); + q_acc_1_18 = _mm256_add_pd(q_acc_1_18,q_tmp_1_18); + q_acc_1_19 = _mm256_add_pd(q_acc_1_19,q_tmp_1_19); + q_acc_1_20 = _mm256_add_pd(q_acc_1_20,q_tmp_1_20); + q_acc_1_21 = _mm256_add_pd(q_acc_1_21,q_tmp_1_21); + q_acc_1_22 = _mm256_add_pd(q_acc_1_22,q_tmp_1_22); + q_acc_1_23 = _mm256_add_pd(q_acc_1_23,q_tmp_1_23); + q_acc_1_24 = _mm256_add_pd(q_acc_1_24,q_tmp_1_24); + q_acc_1_25 = _mm256_add_pd(q_acc_1_25,q_tmp_1_25); + q_acc_1_26 = _mm256_add_pd(q_acc_1_26,q_tmp_1_26); + q_acc_1_27 = _mm256_add_pd(q_acc_1_27,q_tmp_1_27); + q_acc_1_28 = _mm256_add_pd(q_acc_1_28,q_tmp_1_28); + q_acc_1_29 = _mm256_add_pd(q_acc_1_29,q_tmp_1_29); + q_acc_2_0 = _mm256_add_pd(q_acc_2_0,q_tmp_2_0); + q_acc_2_1 = _mm256_add_pd(q_acc_2_1,q_tmp_2_1); + q_acc_2_2 = _mm256_add_pd(q_acc_2_2,q_tmp_2_2); + q_acc_2_3 = _mm256_add_pd(q_acc_2_3,q_tmp_2_3); + q_acc_2_4 = _mm256_add_pd(q_acc_2_4,q_tmp_2_4); + q_acc_2_5 = _mm256_add_pd(q_acc_2_5,q_tmp_2_5); + q_acc_2_6 = _mm256_add_pd(q_acc_2_6,q_tmp_2_6); + q_acc_2_7 = _mm256_add_pd(q_acc_2_7,q_tmp_2_7); + q_acc_2_8 = _mm256_add_pd(q_acc_2_8,q_tmp_2_8); + q_acc_2_9 = _mm256_add_pd(q_acc_2_9,q_tmp_2_9); + q_acc_2_10 = _mm256_add_pd(q_acc_2_10,q_tmp_2_10); + q_acc_2_11 = _mm256_add_pd(q_acc_2_11,q_tmp_2_11); + q_acc_2_12 = _mm256_add_pd(q_acc_2_12,q_tmp_2_12); + q_acc_2_13 = _mm256_add_pd(q_acc_2_13,q_tmp_2_13); + q_acc_2_14 = _mm256_add_pd(q_acc_2_14,q_tmp_2_14); + q_acc_2_15 = _mm256_add_pd(q_acc_2_15,q_tmp_2_15); + q_acc_2_16 = _mm256_add_pd(q_acc_2_16,q_tmp_2_16); + q_acc_2_17 = _mm256_add_pd(q_acc_2_17,q_tmp_2_17); + q_acc_2_18 = _mm256_add_pd(q_acc_2_18,q_tmp_2_18); + q_acc_2_19 = _mm256_add_pd(q_acc_2_19,q_tmp_2_19); + q_acc_2_20 = _mm256_add_pd(q_acc_2_20,q_tmp_2_20); + q_acc_2_21 = _mm256_add_pd(q_acc_2_21,q_tmp_2_21); + q_acc_2_22 = _mm256_add_pd(q_acc_2_22,q_tmp_2_22); + q_acc_2_23 = _mm256_add_pd(q_acc_2_23,q_tmp_2_23); + q_acc_2_24 = _mm256_add_pd(q_acc_2_24,q_tmp_2_24); + q_acc_2_25 = _mm256_add_pd(q_acc_2_25,q_tmp_2_25); + q_acc_2_26 = _mm256_add_pd(q_acc_2_26,q_tmp_2_26); + q_acc_2_27 = _mm256_add_pd(q_acc_2_27,q_tmp_2_27); + q_acc_2_28 = _mm256_add_pd(q_acc_2_28,q_tmp_2_28); + q_acc_2_29 = _mm256_add_pd(q_acc_2_29,q_tmp_2_29); + q_acc_3_0 = _mm256_add_pd(q_acc_3_0,q_tmp_3_0); + q_acc_3_1 = _mm256_add_pd(q_acc_3_1,q_tmp_3_1); + q_acc_3_2 = _mm256_add_pd(q_acc_3_2,q_tmp_3_2); + q_acc_3_3 = _mm256_add_pd(q_acc_3_3,q_tmp_3_3); + q_acc_3_4 = _mm256_add_pd(q_acc_3_4,q_tmp_3_4); + q_acc_3_5 = _mm256_add_pd(q_acc_3_5,q_tmp_3_5); + q_acc_3_6 = _mm256_add_pd(q_acc_3_6,q_tmp_3_6); + q_acc_3_7 = _mm256_add_pd(q_acc_3_7,q_tmp_3_7); + q_acc_3_8 = _mm256_add_pd(q_acc_3_8,q_tmp_3_8); + q_acc_3_9 = _mm256_add_pd(q_acc_3_9,q_tmp_3_9); + q_acc_3_10 = _mm256_add_pd(q_acc_3_10,q_tmp_3_10); + q_acc_3_11 = _mm256_add_pd(q_acc_3_11,q_tmp_3_11); + q_acc_3_12 = _mm256_add_pd(q_acc_3_12,q_tmp_3_12); + q_acc_3_13 = _mm256_add_pd(q_acc_3_13,q_tmp_3_13); + q_acc_3_14 = _mm256_add_pd(q_acc_3_14,q_tmp_3_14); + q_acc_3_15 = _mm256_add_pd(q_acc_3_15,q_tmp_3_15); + q_acc_3_16 = _mm256_add_pd(q_acc_3_16,q_tmp_3_16); + q_acc_3_17 = _mm256_add_pd(q_acc_3_17,q_tmp_3_17); + q_acc_3_18 = _mm256_add_pd(q_acc_3_18,q_tmp_3_18); + q_acc_3_19 = _mm256_add_pd(q_acc_3_19,q_tmp_3_19); + q_acc_3_20 = _mm256_add_pd(q_acc_3_20,q_tmp_3_20); + q_acc_3_21 = _mm256_add_pd(q_acc_3_21,q_tmp_3_21); + q_acc_3_22 = _mm256_add_pd(q_acc_3_22,q_tmp_3_22); + q_acc_3_23 = _mm256_add_pd(q_acc_3_23,q_tmp_3_23); + q_acc_3_24 = _mm256_add_pd(q_acc_3_24,q_tmp_3_24); + q_acc_3_25 = _mm256_add_pd(q_acc_3_25,q_tmp_3_25); + q_acc_3_26 = _mm256_add_pd(q_acc_3_26,q_tmp_3_26); + q_acc_3_27 = _mm256_add_pd(q_acc_3_27,q_tmp_3_27); + q_acc_3_28 = _mm256_add_pd(q_acc_3_28,q_tmp_3_28); + q_acc_3_29 = _mm256_add_pd(q_acc_3_29,q_tmp_3_29); + } + const __m256d elMatVec_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_0_0,src_dof_0),_mm256_mul_pd(q_acc_0_1,src_dof_1)),_mm256_mul_pd(q_acc_0_10,src_dof_10)),_mm256_mul_pd(q_acc_0_11,src_dof_11)),_mm256_mul_pd(q_acc_0_12,src_dof_12)),_mm256_mul_pd(q_acc_0_13,src_dof_13)),_mm256_mul_pd(q_acc_0_14,src_dof_14)),_mm256_mul_pd(q_acc_0_15,src_dof_15)),_mm256_mul_pd(q_acc_0_16,src_dof_16)),_mm256_mul_pd(q_acc_0_17,src_dof_17)),_mm256_mul_pd(q_acc_0_18,src_dof_18)),_mm256_mul_pd(q_acc_0_19,src_dof_19)),_mm256_mul_pd(q_acc_0_2,src_dof_2)),_mm256_mul_pd(q_acc_0_20,src_dof_20)),_mm256_mul_pd(q_acc_0_21,src_dof_21)),_mm256_mul_pd(q_acc_0_22,src_dof_22)),_mm256_mul_pd(q_acc_0_23,src_dof_23)),_mm256_mul_pd(q_acc_0_24,src_dof_24)),_mm256_mul_pd(q_acc_0_25,src_dof_25)),_mm256_mul_pd(q_acc_0_26,src_dof_26)),_mm256_mul_pd(q_acc_0_27,src_dof_27)),_mm256_mul_pd(q_acc_0_28,src_dof_28)),_mm256_mul_pd(q_acc_0_29,src_dof_29)),_mm256_mul_pd(q_acc_0_3,src_dof_3)),_mm256_mul_pd(q_acc_0_4,src_dof_4)),_mm256_mul_pd(q_acc_0_5,src_dof_5)),_mm256_mul_pd(q_acc_0_6,src_dof_6)),_mm256_mul_pd(q_acc_0_7,src_dof_7)),_mm256_mul_pd(q_acc_0_8,src_dof_8)),_mm256_mul_pd(q_acc_0_9,src_dof_9)); + const __m256d elMatVec_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_1_0,src_dof_0),_mm256_mul_pd(q_acc_1_1,src_dof_1)),_mm256_mul_pd(q_acc_1_10,src_dof_10)),_mm256_mul_pd(q_acc_1_11,src_dof_11)),_mm256_mul_pd(q_acc_1_12,src_dof_12)),_mm256_mul_pd(q_acc_1_13,src_dof_13)),_mm256_mul_pd(q_acc_1_14,src_dof_14)),_mm256_mul_pd(q_acc_1_15,src_dof_15)),_mm256_mul_pd(q_acc_1_16,src_dof_16)),_mm256_mul_pd(q_acc_1_17,src_dof_17)),_mm256_mul_pd(q_acc_1_18,src_dof_18)),_mm256_mul_pd(q_acc_1_19,src_dof_19)),_mm256_mul_pd(q_acc_1_2,src_dof_2)),_mm256_mul_pd(q_acc_1_20,src_dof_20)),_mm256_mul_pd(q_acc_1_21,src_dof_21)),_mm256_mul_pd(q_acc_1_22,src_dof_22)),_mm256_mul_pd(q_acc_1_23,src_dof_23)),_mm256_mul_pd(q_acc_1_24,src_dof_24)),_mm256_mul_pd(q_acc_1_25,src_dof_25)),_mm256_mul_pd(q_acc_1_26,src_dof_26)),_mm256_mul_pd(q_acc_1_27,src_dof_27)),_mm256_mul_pd(q_acc_1_28,src_dof_28)),_mm256_mul_pd(q_acc_1_29,src_dof_29)),_mm256_mul_pd(q_acc_1_3,src_dof_3)),_mm256_mul_pd(q_acc_1_4,src_dof_4)),_mm256_mul_pd(q_acc_1_5,src_dof_5)),_mm256_mul_pd(q_acc_1_6,src_dof_6)),_mm256_mul_pd(q_acc_1_7,src_dof_7)),_mm256_mul_pd(q_acc_1_8,src_dof_8)),_mm256_mul_pd(q_acc_1_9,src_dof_9)); + const __m256d elMatVec_2 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_2_0,src_dof_0),_mm256_mul_pd(q_acc_2_1,src_dof_1)),_mm256_mul_pd(q_acc_2_10,src_dof_10)),_mm256_mul_pd(q_acc_2_11,src_dof_11)),_mm256_mul_pd(q_acc_2_12,src_dof_12)),_mm256_mul_pd(q_acc_2_13,src_dof_13)),_mm256_mul_pd(q_acc_2_14,src_dof_14)),_mm256_mul_pd(q_acc_2_15,src_dof_15)),_mm256_mul_pd(q_acc_2_16,src_dof_16)),_mm256_mul_pd(q_acc_2_17,src_dof_17)),_mm256_mul_pd(q_acc_2_18,src_dof_18)),_mm256_mul_pd(q_acc_2_19,src_dof_19)),_mm256_mul_pd(q_acc_2_2,src_dof_2)),_mm256_mul_pd(q_acc_2_20,src_dof_20)),_mm256_mul_pd(q_acc_2_21,src_dof_21)),_mm256_mul_pd(q_acc_2_22,src_dof_22)),_mm256_mul_pd(q_acc_2_23,src_dof_23)),_mm256_mul_pd(q_acc_2_24,src_dof_24)),_mm256_mul_pd(q_acc_2_25,src_dof_25)),_mm256_mul_pd(q_acc_2_26,src_dof_26)),_mm256_mul_pd(q_acc_2_27,src_dof_27)),_mm256_mul_pd(q_acc_2_28,src_dof_28)),_mm256_mul_pd(q_acc_2_29,src_dof_29)),_mm256_mul_pd(q_acc_2_3,src_dof_3)),_mm256_mul_pd(q_acc_2_4,src_dof_4)),_mm256_mul_pd(q_acc_2_5,src_dof_5)),_mm256_mul_pd(q_acc_2_6,src_dof_6)),_mm256_mul_pd(q_acc_2_7,src_dof_7)),_mm256_mul_pd(q_acc_2_8,src_dof_8)),_mm256_mul_pd(q_acc_2_9,src_dof_9)); + const __m256d elMatVec_3 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_3_0,src_dof_0),_mm256_mul_pd(q_acc_3_1,src_dof_1)),_mm256_mul_pd(q_acc_3_10,src_dof_10)),_mm256_mul_pd(q_acc_3_11,src_dof_11)),_mm256_mul_pd(q_acc_3_12,src_dof_12)),_mm256_mul_pd(q_acc_3_13,src_dof_13)),_mm256_mul_pd(q_acc_3_14,src_dof_14)),_mm256_mul_pd(q_acc_3_15,src_dof_15)),_mm256_mul_pd(q_acc_3_16,src_dof_16)),_mm256_mul_pd(q_acc_3_17,src_dof_17)),_mm256_mul_pd(q_acc_3_18,src_dof_18)),_mm256_mul_pd(q_acc_3_19,src_dof_19)),_mm256_mul_pd(q_acc_3_2,src_dof_2)),_mm256_mul_pd(q_acc_3_20,src_dof_20)),_mm256_mul_pd(q_acc_3_21,src_dof_21)),_mm256_mul_pd(q_acc_3_22,src_dof_22)),_mm256_mul_pd(q_acc_3_23,src_dof_23)),_mm256_mul_pd(q_acc_3_24,src_dof_24)),_mm256_mul_pd(q_acc_3_25,src_dof_25)),_mm256_mul_pd(q_acc_3_26,src_dof_26)),_mm256_mul_pd(q_acc_3_27,src_dof_27)),_mm256_mul_pd(q_acc_3_28,src_dof_28)),_mm256_mul_pd(q_acc_3_29,src_dof_29)),_mm256_mul_pd(q_acc_3_3,src_dof_3)),_mm256_mul_pd(q_acc_3_4,src_dof_4)),_mm256_mul_pd(q_acc_3_5,src_dof_5)),_mm256_mul_pd(q_acc_3_6,src_dof_6)),_mm256_mul_pd(q_acc_3_7,src_dof_7)),_mm256_mul_pd(q_acc_3_8,src_dof_8)),_mm256_mul_pd(q_acc_3_9,src_dof_9)); + _mm256_storeu_pd(&_data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))],_mm256_add_pd(elMatVec_0,_mm256_loadu_pd(& _data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]))); + _mm256_storeu_pd(&_data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1],_mm256_add_pd(elMatVec_1,_mm256_loadu_pd(& _data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]))); + _mm256_storeu_pd(&_data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1],_mm256_add_pd(elMatVec_2,_mm256_loadu_pd(& _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]))); + _mm256_storeu_pd(&_data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))],_mm256_add_pd(elMatVec_3,_mm256_loadu_pd(& _data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]))); + } + for (int64_t ctr_0 = (int64_t)((-ctr_1 - ctr_2 + micro_edges_per_macro_edge - 1) / (4)) * (4); ctr_0 < -ctr_1 - ctr_2 + micro_edges_per_macro_edge - 1; ctr_0 += 1) + { + + const int64_t phantom_ctr_0 = ctr_0; + real_t _data_float_loop_ctr_array_dim_0[4]; + _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0; + _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1; + _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2; + _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3; + real_t _data_float_loop_ctr_array_dim_1[4]; + _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1; + real_t _data_float_loop_ctr_array_dim_2[4]; + _data_float_loop_ctr_array_dim_2[0] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[1] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[2] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[3] = (real_t) ctr_2; + + const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_0_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_2_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_3_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_3_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_3_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t src_dof_0 = _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]; + const real_t src_dof_1 = _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]; + const real_t src_dof_2 = _data_src_vertex_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]; + const real_t src_dof_3 = _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]; + const real_t src_dof_4 = _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]; + const real_t src_dof_5 = _data_src_edge_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 5*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t src_dof_6 = _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]; + const real_t src_dof_7 = _data_src_edge_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t src_dof_8 = _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge - 1)*(micro_edges_per_macro_edge + 1)) / (6)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]; + const real_t src_dof_9 = _data_src_edge_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t src_dof_10 = _data_src_vertex_1[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]; + const real_t src_dof_11 = _data_src_vertex_1[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]; + const real_t src_dof_12 = _data_src_vertex_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]; + const real_t src_dof_13 = _data_src_vertex_1[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]; + const real_t src_dof_14 = _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]; + const real_t src_dof_15 = _data_src_edge_1[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 5*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t src_dof_16 = _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]; + const real_t src_dof_17 = _data_src_edge_1[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t src_dof_18 = _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge - 1)*(micro_edges_per_macro_edge + 1)) / (6)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]; + const real_t src_dof_19 = _data_src_edge_1[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t src_dof_20 = _data_src_vertex_2[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]; + const real_t src_dof_21 = _data_src_vertex_2[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]; + const real_t src_dof_22 = _data_src_vertex_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]; + const real_t src_dof_23 = _data_src_vertex_2[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]; + const real_t src_dof_24 = _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]; + const real_t src_dof_25 = _data_src_edge_2[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 5*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t src_dof_26 = _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]; + const real_t src_dof_27 = _data_src_edge_2[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t src_dof_28 = _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge - 1)*(micro_edges_per_macro_edge + 1)) / (6)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]; + const real_t src_dof_29 = _data_src_edge_2[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t rho_dof_0 = _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]; + const real_t rho_dof_1 = _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]; + const real_t rho_dof_2 = _data_rhoVertex[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]; + const real_t rho_dof_3 = _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]; + const real_t rho_dof_4 = _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]; + const real_t rho_dof_5 = _data_rhoEdge[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 5*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t rho_dof_6 = _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]; + const real_t rho_dof_7 = _data_rhoEdge[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t rho_dof_8 = _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge - 1)*(micro_edges_per_macro_edge + 1)) / (6)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]; + const real_t rho_dof_9 = _data_rhoEdge[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + real_t q_acc_0_0 = 0.0; + real_t q_acc_0_1 = 0.0; + real_t q_acc_0_2 = 0.0; + real_t q_acc_0_3 = 0.0; + real_t q_acc_0_4 = 0.0; + real_t q_acc_0_5 = 0.0; + real_t q_acc_0_6 = 0.0; + real_t q_acc_0_7 = 0.0; + real_t q_acc_0_8 = 0.0; + real_t q_acc_0_9 = 0.0; + real_t q_acc_0_10 = 0.0; + real_t q_acc_0_11 = 0.0; + real_t q_acc_0_12 = 0.0; + real_t q_acc_0_13 = 0.0; + real_t q_acc_0_14 = 0.0; + real_t q_acc_0_15 = 0.0; + real_t q_acc_0_16 = 0.0; + real_t q_acc_0_17 = 0.0; + real_t q_acc_0_18 = 0.0; + real_t q_acc_0_19 = 0.0; + real_t q_acc_0_20 = 0.0; + real_t q_acc_0_21 = 0.0; + real_t q_acc_0_22 = 0.0; + real_t q_acc_0_23 = 0.0; + real_t q_acc_0_24 = 0.0; + real_t q_acc_0_25 = 0.0; + real_t q_acc_0_26 = 0.0; + real_t q_acc_0_27 = 0.0; + real_t q_acc_0_28 = 0.0; + real_t q_acc_0_29 = 0.0; + real_t q_acc_1_0 = 0.0; + real_t q_acc_1_1 = 0.0; + real_t q_acc_1_2 = 0.0; + real_t q_acc_1_3 = 0.0; + real_t q_acc_1_4 = 0.0; + real_t q_acc_1_5 = 0.0; + real_t q_acc_1_6 = 0.0; + real_t q_acc_1_7 = 0.0; + real_t q_acc_1_8 = 0.0; + real_t q_acc_1_9 = 0.0; + real_t q_acc_1_10 = 0.0; + real_t q_acc_1_11 = 0.0; + real_t q_acc_1_12 = 0.0; + real_t q_acc_1_13 = 0.0; + real_t q_acc_1_14 = 0.0; + real_t q_acc_1_15 = 0.0; + real_t q_acc_1_16 = 0.0; + real_t q_acc_1_17 = 0.0; + real_t q_acc_1_18 = 0.0; + real_t q_acc_1_19 = 0.0; + real_t q_acc_1_20 = 0.0; + real_t q_acc_1_21 = 0.0; + real_t q_acc_1_22 = 0.0; + real_t q_acc_1_23 = 0.0; + real_t q_acc_1_24 = 0.0; + real_t q_acc_1_25 = 0.0; + real_t q_acc_1_26 = 0.0; + real_t q_acc_1_27 = 0.0; + real_t q_acc_1_28 = 0.0; + real_t q_acc_1_29 = 0.0; + real_t q_acc_2_0 = 0.0; + real_t q_acc_2_1 = 0.0; + real_t q_acc_2_2 = 0.0; + real_t q_acc_2_3 = 0.0; + real_t q_acc_2_4 = 0.0; + real_t q_acc_2_5 = 0.0; + real_t q_acc_2_6 = 0.0; + real_t q_acc_2_7 = 0.0; + real_t q_acc_2_8 = 0.0; + real_t q_acc_2_9 = 0.0; + real_t q_acc_2_10 = 0.0; + real_t q_acc_2_11 = 0.0; + real_t q_acc_2_12 = 0.0; + real_t q_acc_2_13 = 0.0; + real_t q_acc_2_14 = 0.0; + real_t q_acc_2_15 = 0.0; + real_t q_acc_2_16 = 0.0; + real_t q_acc_2_17 = 0.0; + real_t q_acc_2_18 = 0.0; + real_t q_acc_2_19 = 0.0; + real_t q_acc_2_20 = 0.0; + real_t q_acc_2_21 = 0.0; + real_t q_acc_2_22 = 0.0; + real_t q_acc_2_23 = 0.0; + real_t q_acc_2_24 = 0.0; + real_t q_acc_2_25 = 0.0; + real_t q_acc_2_26 = 0.0; + real_t q_acc_2_27 = 0.0; + real_t q_acc_2_28 = 0.0; + real_t q_acc_2_29 = 0.0; + real_t q_acc_3_0 = 0.0; + real_t q_acc_3_1 = 0.0; + real_t q_acc_3_2 = 0.0; + real_t q_acc_3_3 = 0.0; + real_t q_acc_3_4 = 0.0; + real_t q_acc_3_5 = 0.0; + real_t q_acc_3_6 = 0.0; + real_t q_acc_3_7 = 0.0; + real_t q_acc_3_8 = 0.0; + real_t q_acc_3_9 = 0.0; + real_t q_acc_3_10 = 0.0; + real_t q_acc_3_11 = 0.0; + real_t q_acc_3_12 = 0.0; + real_t q_acc_3_13 = 0.0; + real_t q_acc_3_14 = 0.0; + real_t q_acc_3_15 = 0.0; + real_t q_acc_3_16 = 0.0; + real_t q_acc_3_17 = 0.0; + real_t q_acc_3_18 = 0.0; + real_t q_acc_3_19 = 0.0; + real_t q_acc_3_20 = 0.0; + real_t q_acc_3_21 = 0.0; + real_t q_acc_3_22 = 0.0; + real_t q_acc_3_23 = 0.0; + real_t q_acc_3_24 = 0.0; + real_t q_acc_3_25 = 0.0; + real_t q_acc_3_26 = 0.0; + real_t q_acc_3_27 = 0.0; + real_t q_acc_3_28 = 0.0; + real_t q_acc_3_29 = 0.0; + for (int64_t q = 0; q < 5; q += 1) + { + const real_t tmp_qloop_18 = p_affine_0_0 + (-p_affine_0_0 + p_affine_1_0)*_data_q_p_0[q] + (-p_affine_0_0 + p_affine_2_0)*_data_q_p_1[q] + (-p_affine_0_0 + p_affine_3_0)*_data_q_p_2[q]; + const real_t tmp_qloop_19 = (tmp_qloop_18*tmp_qloop_18); + const real_t tmp_qloop_22 = p_affine_0_2 + (-p_affine_0_2 + p_affine_1_2)*_data_q_p_0[q] + (-p_affine_0_2 + p_affine_2_2)*_data_q_p_1[q] + (-p_affine_0_2 + p_affine_3_2)*_data_q_p_2[q]; + const real_t tmp_qloop_23 = -tmp_qloop_12 - tmp_qloop_22; + const real_t tmp_qloop_24 = -tmp_qloop_1 - tmp_qloop_18; + const real_t tmp_qloop_25 = p_affine_0_1 + (-p_affine_0_1 + p_affine_1_1)*_data_q_p_0[q] + (-p_affine_0_1 + p_affine_2_1)*_data_q_p_1[q] + (-p_affine_0_1 + p_affine_3_1)*_data_q_p_2[q]; + const real_t tmp_qloop_26 = -tmp_qloop_25 - tmp_qloop_8; + const real_t tmp_qloop_27 = tmp_qloop_21*(tmp_qloop_10*tmp_qloop_23*tmp_qloop_9 + tmp_qloop_13*tmp_qloop_24*tmp_qloop_6 - tmp_qloop_14*tmp_qloop_24 - tmp_qloop_15*tmp_qloop_26 + tmp_qloop_2*tmp_qloop_26*tmp_qloop_3 - tmp_qloop_23*tmp_qloop_7); + const real_t tmp_qloop_28 = tmp_qloop_20 - tmp_qloop_27; + const real_t tmp_qloop_29 = (tmp_qloop_25*tmp_qloop_25); + const real_t tmp_qloop_30 = (tmp_qloop_22*tmp_qloop_22); + const real_t tmp_qloop_31 = tmp_qloop_19 + tmp_qloop_29 + tmp_qloop_30; + const real_t tmp_qloop_34 = tmp_qloop_18*tmp_qloop_33; + const real_t tmp_qloop_35 = tmp_qloop_28 + tmp_qloop_34; + const real_t tmp_qloop_36 = tmp_qloop_17*(-tmp_qloop_19*tmp_qloop_28 + tmp_qloop_31*tmp_qloop_35); + const real_t tmp_qloop_37 = pow(tmp_qloop_31, -1.5000000000000000); + const real_t tmp_qloop_38 = tmp_qloop_37*1.0; + const real_t tmp_qloop_41 = tmp_qloop_31*tmp_qloop_40; + const real_t tmp_qloop_42 = -tmp_qloop_25*tmp_qloop_28 - tmp_qloop_41; + const real_t tmp_qloop_43 = tmp_qloop_17*tmp_qloop_38; + const real_t tmp_qloop_44 = tmp_qloop_42*tmp_qloop_43; + const real_t tmp_qloop_47 = -tmp_qloop_28; + const real_t tmp_qloop_48 = tmp_qloop_22*tmp_qloop_47; + const real_t tmp_qloop_49 = tmp_qloop_31*tmp_qloop_46 + tmp_qloop_48; + const real_t tmp_qloop_50 = tmp_qloop_43*tmp_qloop_49; + const real_t tmp_qloop_51 = tmp_qloop_18*tmp_qloop_47 + tmp_qloop_31*tmp_qloop_33; + const real_t tmp_qloop_52 = tmp_qloop_43*tmp_qloop_51; + const real_t tmp_qloop_54 = tmp_qloop_25*tmp_qloop_53 + tmp_qloop_28; + const real_t tmp_qloop_55 = tmp_qloop_29*tmp_qloop_47 + tmp_qloop_31*tmp_qloop_54; + const real_t tmp_qloop_56 = tmp_qloop_22*tmp_qloop_46; + const real_t tmp_qloop_57 = tmp_qloop_28 + tmp_qloop_56; + const real_t tmp_qloop_58 = tmp_qloop_30*tmp_qloop_47 + tmp_qloop_31*tmp_qloop_57; + const real_t tmp_qloop_66 = pow(tmp_qloop_31, -2.5000000000000000)*3.0; + const real_t tmp_qloop_67 = tmp_qloop_36*tmp_qloop_66; + const real_t tmp_qloop_69 = tmp_qloop_18*2.0; + const real_t tmp_qloop_70 = -tmp_qloop_41; + const real_t tmp_qloop_71 = tmp_qloop_35*2.0; + const real_t tmp_qloop_73 = -tmp_qloop_31*tmp_qloop_72; + const real_t tmp_qloop_74 = -tmp_qloop_20 + tmp_qloop_27; + const real_t tmp_qloop_75 = tmp_qloop_18*tmp_qloop_68 + tmp_qloop_34*2.0 + tmp_qloop_74; + const real_t tmp_qloop_76 = tmp_qloop_17*tmp_qloop_66; + const real_t tmp_qloop_77 = tmp_qloop_51*tmp_qloop_76; + const real_t tmp_qloop_78 = tmp_qloop_25*tmp_qloop_77; + const real_t tmp_qloop_79 = tmp_qloop_25*2.0; + const real_t tmp_qloop_80 = tmp_qloop_18*tmp_qloop_40 + tmp_qloop_33*tmp_qloop_79; + const real_t tmp_qloop_81 = tmp_qloop_25*tmp_qloop_43; + const real_t tmp_qloop_82 = tmp_qloop_22*2.0; + const real_t tmp_qloop_83 = tmp_qloop_18*tmp_qloop_72 + tmp_qloop_33*tmp_qloop_82; + const real_t tmp_qloop_84 = -tmp_qloop_22*tmp_qloop_78; + const real_t tmp_qloop_85 = tmp_qloop_18*tmp_qloop_22; + const real_t tmp_qloop_86 = tmp_qloop_22*tmp_qloop_43; + const real_t tmp_qloop_87 = tmp_qloop_21*tmp_qloop_25*tmp_qloop_32 - tmp_qloop_40*tmp_qloop_69; + const real_t tmp_qloop_88 = tmp_qloop_18*tmp_qloop_43; + const real_t tmp_qloop_89 = tmp_qloop_42*tmp_qloop_76; + const real_t tmp_qloop_90 = -tmp_qloop_25*tmp_qloop_40 - tmp_qloop_28; + const real_t tmp_qloop_91 = tmp_qloop_25*tmp_qloop_89; + const real_t tmp_qloop_92 = tmp_qloop_25*tmp_qloop_72 - tmp_qloop_40*tmp_qloop_82; + const real_t tmp_qloop_93 = tmp_qloop_85*tmp_qloop_89; + const real_t tmp_qloop_94 = -tmp_qloop_31*tmp_qloop_68; + const real_t tmp_qloop_95 = tmp_qloop_55*tmp_qloop_76; + const real_t tmp_qloop_96 = tmp_qloop_22*tmp_qloop_68 + tmp_qloop_46*tmp_qloop_69; + const real_t tmp_qloop_97 = tmp_qloop_49*tmp_qloop_76; + const real_t tmp_qloop_98 = tmp_qloop_22*tmp_qloop_40 + tmp_qloop_46*tmp_qloop_79; + const real_t tmp_qloop_99 = tmp_qloop_25*tmp_qloop_97; + const real_t tmp_qloop_100 = tmp_qloop_18*tmp_qloop_99; + const real_t tmp_qloop_101 = tmp_qloop_22*tmp_qloop_72 + tmp_qloop_56*2.0 + tmp_qloop_74; + const real_t tmp_qloop_102 = tmp_qloop_58*tmp_qloop_76; + const real_t tmp_qloop_103 = (_data_q_p_0[q]*_data_q_p_0[q]); + const real_t tmp_qloop_104 = tmp_qloop_103*2.0; + const real_t tmp_qloop_105 = (_data_q_p_1[q]*_data_q_p_1[q]); + const real_t tmp_qloop_106 = tmp_qloop_105*2.0; + const real_t tmp_qloop_107 = (_data_q_p_2[q]*_data_q_p_2[q]); + const real_t tmp_qloop_108 = tmp_qloop_107*2.0; + const real_t tmp_qloop_109 = 4.0*_data_q_p_1[q]; + const real_t tmp_qloop_110 = tmp_qloop_109*_data_q_p_2[q]; + const real_t tmp_qloop_111 = 4.0*_data_q_p_0[q]; + const real_t tmp_qloop_112 = tmp_qloop_111*_data_q_p_1[q]; + const real_t tmp_qloop_113 = tmp_qloop_111*_data_q_p_2[q]; + const real_t tmp_qloop_114 = tmp_qloop_112 + tmp_qloop_113; + const real_t tmp_qloop_115 = tmp_qloop_104 + tmp_qloop_106 + tmp_qloop_108 + tmp_qloop_110 + tmp_qloop_114 - 3.0*_data_q_p_0[q] - 3.0*_data_q_p_1[q] - 3.0*_data_q_p_2[q] + 1.0; + const real_t tmp_qloop_116 = 4.0*_data_q_p_2[q]; + const real_t tmp_qloop_117 = tmp_qloop_109 + tmp_qloop_116; + const real_t tmp_qloop_118 = -rho_dof_8*tmp_qloop_109; + const real_t tmp_qloop_119 = rho_dof_0*(tmp_qloop_111 + tmp_qloop_117 - 3.0); + const real_t tmp_qloop_120 = -rho_dof_7*tmp_qloop_116 + tmp_qloop_119; + const real_t tmp_qloop_121 = rho_dof_1*(tmp_qloop_111 - 1.0) + rho_dof_5*tmp_qloop_116 + rho_dof_6*tmp_qloop_109 + rho_dof_9*(-tmp_qloop_117 - 8.0*_data_q_p_0[q] + 4.0) + tmp_qloop_118 + tmp_qloop_120; + const real_t tmp_qloop_122 = tmp_qloop_111 - 4.0; + const real_t tmp_qloop_123 = -rho_dof_9*tmp_qloop_111; + const real_t tmp_qloop_124 = rho_dof_2*(tmp_qloop_109 - 1.0) + rho_dof_4*tmp_qloop_116 + rho_dof_6*tmp_qloop_111 + rho_dof_8*(-tmp_qloop_116 - tmp_qloop_122 - 8.0*_data_q_p_1[q]) + tmp_qloop_120 + tmp_qloop_123; + const real_t tmp_qloop_125 = rho_dof_3*(tmp_qloop_116 - 1.0) + rho_dof_4*tmp_qloop_109 + rho_dof_5*tmp_qloop_111 + rho_dof_7*(-tmp_qloop_109 - tmp_qloop_122 - 8.0*_data_q_p_2[q]) + tmp_qloop_118 + tmp_qloop_119 + tmp_qloop_123; + const real_t tmp_qloop_127 = tmp_qloop_104 - _data_q_p_0[q]; + const real_t tmp_qloop_128 = tmp_qloop_106 - _data_q_p_1[q]; + const real_t tmp_qloop_129 = tmp_qloop_108 - _data_q_p_2[q]; + const real_t tmp_qloop_130 = tmp_qloop_107*-4.0 - tmp_qloop_110 - tmp_qloop_113 + tmp_qloop_116; + const real_t tmp_qloop_131 = tmp_qloop_105*4.0; + const real_t tmp_qloop_132 = tmp_qloop_109 - tmp_qloop_110 - tmp_qloop_112 - tmp_qloop_131; + const real_t tmp_qloop_133 = tmp_qloop_103*4.0; + const real_t tmp_qloop_134 = tmp_qloop_111 - tmp_qloop_114 - tmp_qloop_133; + const real_t tmp_qloop_158 = tmp_qloop_105*tmp_qloop_111; + const real_t jac_blending_0_0 = tmp_qloop_36*tmp_qloop_38; + const real_t jac_blending_0_1 = tmp_qloop_18*tmp_qloop_44; + const real_t jac_blending_0_2 = tmp_qloop_18*tmp_qloop_50; + const real_t jac_blending_1_0 = tmp_qloop_25*tmp_qloop_52; + const real_t jac_blending_1_1 = tmp_qloop_43*tmp_qloop_55; + const real_t tmp_qloop_63 = jac_blending_0_2*jac_blending_1_1; + const real_t jac_blending_1_2 = tmp_qloop_25*tmp_qloop_50; + const real_t tmp_qloop_60 = jac_blending_0_1*jac_blending_1_2; + const real_t jac_blending_2_0 = tmp_qloop_22*tmp_qloop_52; + const real_t jac_blending_2_1 = tmp_qloop_22*tmp_qloop_44; + const real_t tmp_qloop_61 = jac_blending_1_2*jac_blending_2_1; + const real_t jac_blending_2_2 = tmp_qloop_43*tmp_qloop_58; + const real_t tmp_qloop_59 = jac_blending_1_1*jac_blending_2_2; + const real_t tmp_qloop_62 = jac_blending_0_1*jac_blending_2_2; + const real_t tmp_qloop_64 = jac_blending_0_0*tmp_qloop_59 - jac_blending_0_0*tmp_qloop_61 + jac_blending_0_2*jac_blending_1_0*jac_blending_2_1 - jac_blending_1_0*tmp_qloop_62 + jac_blending_2_0*tmp_qloop_60 - jac_blending_2_0*tmp_qloop_63; + const real_t tmp_qloop_65 = 1.0 / (tmp_qloop_64); + const real_t abs_det_jac_blending = tmp_qloop_64; + const real_t tmp_qloop_135 = abs_det_jac_affine_GREEN_DOWN*abs_det_jac_blending*1.0 / (rho_dof_0*tmp_qloop_115 + rho_dof_1*tmp_qloop_127 + rho_dof_2*tmp_qloop_128 + rho_dof_3*tmp_qloop_129 + rho_dof_4*tmp_qloop_110 + rho_dof_5*tmp_qloop_113 + rho_dof_6*tmp_qloop_112 + rho_dof_7*tmp_qloop_130 + rho_dof_8*tmp_qloop_132 + rho_dof_9*tmp_qloop_134)*_data_q_w[q]; + const real_t tmp_qloop_136 = tmp_qloop_135*(1.0 - _data_q_p_0[q] - _data_q_p_1[q] - _data_q_p_2[q]); + const real_t tmp_qloop_142 = tmp_qloop_135*_data_q_p_0[q]; + const real_t tmp_qloop_144 = tmp_qloop_135*_data_q_p_2[q]; + const real_t tmp_qloop_147 = tmp_qloop_135*_data_q_p_1[q]; + const real_t tmp_qloop_159 = tmp_qloop_135*tmp_qloop_158; + const real_t tmp_qloop_161 = tmp_qloop_107*tmp_qloop_135; + const real_t jac_blending_inv_0_0 = tmp_qloop_65*(tmp_qloop_59 - tmp_qloop_61); + const real_t jac_blending_inv_0_1 = tmp_qloop_65*(jac_blending_0_2*jac_blending_2_1 - tmp_qloop_62); + const real_t jac_blending_inv_0_2 = tmp_qloop_65*(tmp_qloop_60 - tmp_qloop_63); + const real_t jac_blending_inv_1_0 = tmp_qloop_65*(-jac_blending_1_0*jac_blending_2_2 + jac_blending_1_2*jac_blending_2_0); + const real_t jac_blending_inv_1_1 = tmp_qloop_65*(jac_blending_0_0*jac_blending_2_2 - jac_blending_0_2*jac_blending_2_0); + const real_t jac_blending_inv_1_2 = tmp_qloop_65*(-jac_blending_0_0*jac_blending_1_2 + jac_blending_0_2*jac_blending_1_0); + const real_t jac_blending_inv_2_0 = tmp_qloop_65*(jac_blending_1_0*jac_blending_2_1 - jac_blending_1_1*jac_blending_2_0); + const real_t tmp_qloop_126 = tmp_qloop_121*(jac_affine_inv_0_0_GREEN_DOWN*jac_blending_inv_0_0 + jac_affine_inv_0_1_GREEN_DOWN*jac_blending_inv_1_0 + jac_affine_inv_0_2_GREEN_DOWN*jac_blending_inv_2_0) + tmp_qloop_124*(jac_affine_inv_1_0_GREEN_DOWN*jac_blending_inv_0_0 + jac_affine_inv_1_1_GREEN_DOWN*jac_blending_inv_1_0 + jac_affine_inv_1_2_GREEN_DOWN*jac_blending_inv_2_0) + tmp_qloop_125*(jac_affine_inv_2_0_GREEN_DOWN*jac_blending_inv_0_0 + jac_affine_inv_2_1_GREEN_DOWN*jac_blending_inv_1_0 + jac_affine_inv_2_2_GREEN_DOWN*jac_blending_inv_2_0); + const real_t tmp_qloop_137 = tmp_qloop_126*tmp_qloop_136; + const real_t tmp_qloop_143 = tmp_qloop_126*tmp_qloop_142; + const real_t tmp_qloop_145 = tmp_qloop_126*tmp_qloop_144; + const real_t tmp_qloop_146 = tmp_qloop_112*tmp_qloop_145; + const real_t tmp_qloop_148 = tmp_qloop_126*tmp_qloop_147; + const real_t tmp_qloop_157 = tmp_qloop_126*tmp_qloop_135; + const real_t tmp_qloop_160 = tmp_qloop_107*tmp_qloop_157; + const real_t jac_blending_inv_2_1 = tmp_qloop_65*(-jac_blending_0_0*jac_blending_2_1 + jac_blending_0_1*jac_blending_2_0); + const real_t tmp_qloop_138 = tmp_qloop_121*(jac_affine_inv_0_0_GREEN_DOWN*jac_blending_inv_0_1 + jac_affine_inv_0_1_GREEN_DOWN*jac_blending_inv_1_1 + jac_affine_inv_0_2_GREEN_DOWN*jac_blending_inv_2_1) + tmp_qloop_124*(jac_affine_inv_1_0_GREEN_DOWN*jac_blending_inv_0_1 + jac_affine_inv_1_1_GREEN_DOWN*jac_blending_inv_1_1 + jac_affine_inv_1_2_GREEN_DOWN*jac_blending_inv_2_1) + tmp_qloop_125*(jac_affine_inv_2_0_GREEN_DOWN*jac_blending_inv_0_1 + jac_affine_inv_2_1_GREEN_DOWN*jac_blending_inv_1_1 + jac_affine_inv_2_2_GREEN_DOWN*jac_blending_inv_2_1); + const real_t tmp_qloop_139 = tmp_qloop_136*tmp_qloop_138; + const real_t tmp_qloop_149 = tmp_qloop_138*tmp_qloop_142; + const real_t tmp_qloop_150 = tmp_qloop_138*tmp_qloop_144; + const real_t tmp_qloop_151 = tmp_qloop_112*tmp_qloop_150; + const real_t tmp_qloop_152 = tmp_qloop_138*tmp_qloop_147; + const real_t tmp_qloop_162 = tmp_qloop_138*tmp_qloop_161; + const real_t jac_blending_inv_2_2 = tmp_qloop_65*(jac_blending_0_0*jac_blending_1_1 - jac_blending_0_1*jac_blending_1_0); + const real_t tmp_qloop_140 = tmp_qloop_121*(jac_affine_inv_0_0_GREEN_DOWN*jac_blending_inv_0_2 + jac_affine_inv_0_1_GREEN_DOWN*jac_blending_inv_1_2 + jac_affine_inv_0_2_GREEN_DOWN*jac_blending_inv_2_2) + tmp_qloop_124*(jac_affine_inv_1_0_GREEN_DOWN*jac_blending_inv_0_2 + jac_affine_inv_1_1_GREEN_DOWN*jac_blending_inv_1_2 + jac_affine_inv_1_2_GREEN_DOWN*jac_blending_inv_2_2) + tmp_qloop_125*(jac_affine_inv_2_0_GREEN_DOWN*jac_blending_inv_0_2 + jac_affine_inv_2_1_GREEN_DOWN*jac_blending_inv_1_2 + jac_affine_inv_2_2_GREEN_DOWN*jac_blending_inv_2_2); + const real_t tmp_qloop_141 = tmp_qloop_136*tmp_qloop_140; + const real_t tmp_qloop_153 = tmp_qloop_140*tmp_qloop_142; + const real_t tmp_qloop_154 = tmp_qloop_140*tmp_qloop_144; + const real_t tmp_qloop_155 = tmp_qloop_112*tmp_qloop_154; + const real_t tmp_qloop_156 = tmp_qloop_140*tmp_qloop_147; + const real_t tmp_qloop_163 = tmp_qloop_140*tmp_qloop_161; + const real_t hessian_blending_0_0_0 = tmp_qloop_17*tmp_qloop_37*(tmp_qloop_19*tmp_qloop_68 - tmp_qloop_28*tmp_qloop_69 + tmp_qloop_31*(tmp_qloop_33 - tmp_qloop_68) + tmp_qloop_35*tmp_qloop_69)*1.0 - tmp_qloop_18*tmp_qloop_67; + const real_t hessian_blending_1_0_0 = tmp_qloop_17*tmp_qloop_37*(tmp_qloop_19*tmp_qloop_40 + tmp_qloop_25*tmp_qloop_71 + tmp_qloop_70)*1.0 - tmp_qloop_25*tmp_qloop_67; + const real_t hessian_blending_2_0_0 = tmp_qloop_17*tmp_qloop_37*(tmp_qloop_19*tmp_qloop_72 + tmp_qloop_22*tmp_qloop_71 + tmp_qloop_73)*1.0 - tmp_qloop_22*tmp_qloop_67; + const real_t hessian_blending_0_0_1 = tmp_qloop_17*tmp_qloop_25*tmp_qloop_37*tmp_qloop_75*1.0 - tmp_qloop_18*tmp_qloop_78; + const real_t hessian_blending_1_0_1 = -tmp_qloop_29*tmp_qloop_77 + tmp_qloop_52 + tmp_qloop_80*tmp_qloop_81; + const real_t hessian_blending_2_0_1 = tmp_qloop_81*tmp_qloop_83 + tmp_qloop_84; + const real_t hessian_blending_0_0_2 = tmp_qloop_17*tmp_qloop_22*tmp_qloop_37*tmp_qloop_75*1.0 - tmp_qloop_77*tmp_qloop_85; + const real_t hessian_blending_1_0_2 = tmp_qloop_80*tmp_qloop_86 + tmp_qloop_84; + const real_t hessian_blending_2_0_2 = -tmp_qloop_30*tmp_qloop_77 + tmp_qloop_52 + tmp_qloop_83*tmp_qloop_86; + const real_t hessian_blending_0_1_0 = -tmp_qloop_19*tmp_qloop_89 + tmp_qloop_44 + tmp_qloop_87*tmp_qloop_88; + const real_t hessian_blending_1_1_0 = tmp_qloop_17*tmp_qloop_18*tmp_qloop_37*tmp_qloop_90*1.0 - tmp_qloop_18*tmp_qloop_91; + const real_t hessian_blending_2_1_0 = tmp_qloop_88*tmp_qloop_92 - tmp_qloop_93; + const real_t hessian_blending_0_1_1 = tmp_qloop_17*tmp_qloop_37*(tmp_qloop_29*tmp_qloop_68 + tmp_qloop_54*tmp_qloop_69 + tmp_qloop_94)*1.0 - tmp_qloop_18*tmp_qloop_95; + const real_t hessian_blending_1_1_1 = tmp_qloop_17*tmp_qloop_37*(tmp_qloop_29*tmp_qloop_40 + tmp_qloop_31*(-tmp_qloop_40 + tmp_qloop_53) + tmp_qloop_47*tmp_qloop_79 + tmp_qloop_54*tmp_qloop_79)*1.0 - tmp_qloop_25*tmp_qloop_95; + const real_t hessian_blending_2_1_1 = tmp_qloop_17*tmp_qloop_37*(tmp_qloop_29*tmp_qloop_72 + tmp_qloop_54*tmp_qloop_82 + tmp_qloop_73)*1.0 - tmp_qloop_22*tmp_qloop_95; + const real_t hessian_blending_0_1_2 = tmp_qloop_17*tmp_qloop_22*tmp_qloop_37*tmp_qloop_87*1.0 - tmp_qloop_93; + const real_t hessian_blending_1_1_2 = tmp_qloop_17*tmp_qloop_22*tmp_qloop_37*tmp_qloop_90*1.0 - tmp_qloop_22*tmp_qloop_91; + const real_t hessian_blending_2_1_2 = -tmp_qloop_30*tmp_qloop_89 + tmp_qloop_44 + tmp_qloop_86*tmp_qloop_92; + const real_t hessian_blending_0_2_0 = -tmp_qloop_19*tmp_qloop_97 + tmp_qloop_50 + tmp_qloop_88*tmp_qloop_96; + const real_t hessian_blending_1_2_0 = -tmp_qloop_100 + tmp_qloop_17*tmp_qloop_18*tmp_qloop_37*tmp_qloop_98*1.0; + const real_t hessian_blending_2_2_0 = tmp_qloop_101*tmp_qloop_17*tmp_qloop_18*tmp_qloop_37*1.0 - tmp_qloop_85*tmp_qloop_97; + const real_t hessian_blending_0_2_1 = -tmp_qloop_100 + tmp_qloop_17*tmp_qloop_25*tmp_qloop_37*tmp_qloop_96*1.0; + const real_t hessian_blending_1_2_1 = -tmp_qloop_29*tmp_qloop_97 + tmp_qloop_50 + tmp_qloop_81*tmp_qloop_98; + const real_t hessian_blending_2_2_1 = tmp_qloop_101*tmp_qloop_17*tmp_qloop_25*tmp_qloop_37*1.0 - tmp_qloop_22*tmp_qloop_99; + const real_t hessian_blending_0_2_2 = -tmp_qloop_102*tmp_qloop_18 + tmp_qloop_17*tmp_qloop_37*(tmp_qloop_30*tmp_qloop_68 + tmp_qloop_57*tmp_qloop_69 + tmp_qloop_94)*1.0; + const real_t hessian_blending_1_2_2 = -tmp_qloop_102*tmp_qloop_25 + tmp_qloop_17*tmp_qloop_37*(tmp_qloop_30*tmp_qloop_40 + tmp_qloop_57*tmp_qloop_79 + tmp_qloop_70)*1.0; + const real_t hessian_blending_2_2_2 = -tmp_qloop_102*tmp_qloop_22 + tmp_qloop_17*tmp_qloop_37*(tmp_qloop_30*tmp_qloop_72 + tmp_qloop_31*(tmp_qloop_46 - tmp_qloop_72) + tmp_qloop_48*2.0 + tmp_qloop_57*tmp_qloop_82)*1.0; + const real_t q_tmp_0_0 = tmp_qloop_115*tmp_qloop_137; + const real_t q_tmp_0_1 = tmp_qloop_127*tmp_qloop_137; + const real_t q_tmp_0_2 = tmp_qloop_128*tmp_qloop_137; + const real_t q_tmp_0_3 = tmp_qloop_129*tmp_qloop_137; + const real_t q_tmp_0_4 = tmp_qloop_110*tmp_qloop_137; + const real_t q_tmp_0_5 = tmp_qloop_113*tmp_qloop_137; + const real_t q_tmp_0_6 = tmp_qloop_112*tmp_qloop_137; + const real_t q_tmp_0_7 = tmp_qloop_130*tmp_qloop_137; + const real_t q_tmp_0_8 = tmp_qloop_132*tmp_qloop_137; + const real_t q_tmp_0_9 = tmp_qloop_134*tmp_qloop_137; + const real_t q_tmp_0_10 = tmp_qloop_115*tmp_qloop_139; + const real_t q_tmp_0_11 = tmp_qloop_127*tmp_qloop_139; + const real_t q_tmp_0_12 = tmp_qloop_128*tmp_qloop_139; + const real_t q_tmp_0_13 = tmp_qloop_129*tmp_qloop_139; + const real_t q_tmp_0_14 = tmp_qloop_110*tmp_qloop_139; + const real_t q_tmp_0_15 = tmp_qloop_113*tmp_qloop_139; + const real_t q_tmp_0_16 = tmp_qloop_112*tmp_qloop_139; + const real_t q_tmp_0_17 = tmp_qloop_130*tmp_qloop_139; + const real_t q_tmp_0_18 = tmp_qloop_132*tmp_qloop_139; + const real_t q_tmp_0_19 = tmp_qloop_134*tmp_qloop_139; + const real_t q_tmp_0_20 = tmp_qloop_115*tmp_qloop_141; + const real_t q_tmp_0_21 = tmp_qloop_127*tmp_qloop_141; + const real_t q_tmp_0_22 = tmp_qloop_128*tmp_qloop_141; + const real_t q_tmp_0_23 = tmp_qloop_129*tmp_qloop_141; + const real_t q_tmp_0_24 = tmp_qloop_110*tmp_qloop_141; + const real_t q_tmp_0_25 = tmp_qloop_113*tmp_qloop_141; + const real_t q_tmp_0_26 = tmp_qloop_112*tmp_qloop_141; + const real_t q_tmp_0_27 = tmp_qloop_130*tmp_qloop_141; + const real_t q_tmp_0_28 = tmp_qloop_132*tmp_qloop_141; + const real_t q_tmp_0_29 = tmp_qloop_134*tmp_qloop_141; + const real_t q_tmp_1_0 = tmp_qloop_115*tmp_qloop_143; + const real_t q_tmp_1_1 = tmp_qloop_127*tmp_qloop_143; + const real_t q_tmp_1_2 = tmp_qloop_128*tmp_qloop_143; + const real_t q_tmp_1_3 = tmp_qloop_129*tmp_qloop_143; + const real_t q_tmp_1_4 = tmp_qloop_146; + const real_t q_tmp_1_5 = tmp_qloop_133*tmp_qloop_145; + const real_t q_tmp_1_6 = tmp_qloop_133*tmp_qloop_148; + const real_t q_tmp_1_7 = tmp_qloop_130*tmp_qloop_143; + const real_t q_tmp_1_8 = tmp_qloop_132*tmp_qloop_143; + const real_t q_tmp_1_9 = tmp_qloop_134*tmp_qloop_143; + const real_t q_tmp_1_10 = tmp_qloop_115*tmp_qloop_149; + const real_t q_tmp_1_11 = tmp_qloop_127*tmp_qloop_149; + const real_t q_tmp_1_12 = tmp_qloop_128*tmp_qloop_149; + const real_t q_tmp_1_13 = tmp_qloop_129*tmp_qloop_149; + const real_t q_tmp_1_14 = tmp_qloop_151; + const real_t q_tmp_1_15 = tmp_qloop_133*tmp_qloop_150; + const real_t q_tmp_1_16 = tmp_qloop_133*tmp_qloop_152; + const real_t q_tmp_1_17 = tmp_qloop_130*tmp_qloop_149; + const real_t q_tmp_1_18 = tmp_qloop_132*tmp_qloop_149; + const real_t q_tmp_1_19 = tmp_qloop_134*tmp_qloop_149; + const real_t q_tmp_1_20 = tmp_qloop_115*tmp_qloop_153; + const real_t q_tmp_1_21 = tmp_qloop_127*tmp_qloop_153; + const real_t q_tmp_1_22 = tmp_qloop_128*tmp_qloop_153; + const real_t q_tmp_1_23 = tmp_qloop_129*tmp_qloop_153; + const real_t q_tmp_1_24 = tmp_qloop_155; + const real_t q_tmp_1_25 = tmp_qloop_133*tmp_qloop_154; + const real_t q_tmp_1_26 = tmp_qloop_133*tmp_qloop_156; + const real_t q_tmp_1_27 = tmp_qloop_130*tmp_qloop_153; + const real_t q_tmp_1_28 = tmp_qloop_132*tmp_qloop_153; + const real_t q_tmp_1_29 = tmp_qloop_134*tmp_qloop_153; + const real_t q_tmp_2_0 = tmp_qloop_115*tmp_qloop_148; + const real_t q_tmp_2_1 = tmp_qloop_127*tmp_qloop_148; + const real_t q_tmp_2_2 = tmp_qloop_128*tmp_qloop_148; + const real_t q_tmp_2_3 = tmp_qloop_129*tmp_qloop_148; + const real_t q_tmp_2_4 = tmp_qloop_131*tmp_qloop_145; + const real_t q_tmp_2_5 = tmp_qloop_146; + const real_t q_tmp_2_6 = tmp_qloop_157*tmp_qloop_158; + const real_t q_tmp_2_7 = tmp_qloop_130*tmp_qloop_148; + const real_t q_tmp_2_8 = tmp_qloop_132*tmp_qloop_148; + const real_t q_tmp_2_9 = tmp_qloop_134*tmp_qloop_148; + const real_t q_tmp_2_10 = tmp_qloop_115*tmp_qloop_152; + const real_t q_tmp_2_11 = tmp_qloop_127*tmp_qloop_152; + const real_t q_tmp_2_12 = tmp_qloop_128*tmp_qloop_152; + const real_t q_tmp_2_13 = tmp_qloop_129*tmp_qloop_152; + const real_t q_tmp_2_14 = tmp_qloop_131*tmp_qloop_150; + const real_t q_tmp_2_15 = tmp_qloop_151; + const real_t q_tmp_2_16 = tmp_qloop_138*tmp_qloop_159; + const real_t q_tmp_2_17 = tmp_qloop_130*tmp_qloop_152; + const real_t q_tmp_2_18 = tmp_qloop_132*tmp_qloop_152; + const real_t q_tmp_2_19 = tmp_qloop_134*tmp_qloop_152; + const real_t q_tmp_2_20 = tmp_qloop_115*tmp_qloop_156; + const real_t q_tmp_2_21 = tmp_qloop_127*tmp_qloop_156; + const real_t q_tmp_2_22 = tmp_qloop_128*tmp_qloop_156; + const real_t q_tmp_2_23 = tmp_qloop_129*tmp_qloop_156; + const real_t q_tmp_2_24 = tmp_qloop_131*tmp_qloop_154; + const real_t q_tmp_2_25 = tmp_qloop_155; + const real_t q_tmp_2_26 = tmp_qloop_140*tmp_qloop_159; + const real_t q_tmp_2_27 = tmp_qloop_130*tmp_qloop_156; + const real_t q_tmp_2_28 = tmp_qloop_132*tmp_qloop_156; + const real_t q_tmp_2_29 = tmp_qloop_134*tmp_qloop_156; + const real_t q_tmp_3_0 = tmp_qloop_115*tmp_qloop_145; + const real_t q_tmp_3_1 = tmp_qloop_127*tmp_qloop_145; + const real_t q_tmp_3_2 = tmp_qloop_128*tmp_qloop_145; + const real_t q_tmp_3_3 = tmp_qloop_129*tmp_qloop_145; + const real_t q_tmp_3_4 = tmp_qloop_109*tmp_qloop_160; + const real_t q_tmp_3_5 = tmp_qloop_111*tmp_qloop_160; + const real_t q_tmp_3_6 = tmp_qloop_146; + const real_t q_tmp_3_7 = tmp_qloop_130*tmp_qloop_145; + const real_t q_tmp_3_8 = tmp_qloop_132*tmp_qloop_145; + const real_t q_tmp_3_9 = tmp_qloop_134*tmp_qloop_145; + const real_t q_tmp_3_10 = tmp_qloop_115*tmp_qloop_150; + const real_t q_tmp_3_11 = tmp_qloop_127*tmp_qloop_150; + const real_t q_tmp_3_12 = tmp_qloop_128*tmp_qloop_150; + const real_t q_tmp_3_13 = tmp_qloop_129*tmp_qloop_150; + const real_t q_tmp_3_14 = tmp_qloop_109*tmp_qloop_162; + const real_t q_tmp_3_15 = tmp_qloop_111*tmp_qloop_162; + const real_t q_tmp_3_16 = tmp_qloop_151; + const real_t q_tmp_3_17 = tmp_qloop_130*tmp_qloop_150; + const real_t q_tmp_3_18 = tmp_qloop_132*tmp_qloop_150; + const real_t q_tmp_3_19 = tmp_qloop_134*tmp_qloop_150; + const real_t q_tmp_3_20 = tmp_qloop_115*tmp_qloop_154; + const real_t q_tmp_3_21 = tmp_qloop_127*tmp_qloop_154; + const real_t q_tmp_3_22 = tmp_qloop_128*tmp_qloop_154; + const real_t q_tmp_3_23 = tmp_qloop_129*tmp_qloop_154; + const real_t q_tmp_3_24 = tmp_qloop_109*tmp_qloop_163; + const real_t q_tmp_3_25 = tmp_qloop_111*tmp_qloop_163; + const real_t q_tmp_3_26 = tmp_qloop_155; + const real_t q_tmp_3_27 = tmp_qloop_130*tmp_qloop_154; + const real_t q_tmp_3_28 = tmp_qloop_132*tmp_qloop_154; + const real_t q_tmp_3_29 = tmp_qloop_134*tmp_qloop_154; + q_acc_0_0 = q_acc_0_0 + q_tmp_0_0; + q_acc_0_1 = q_acc_0_1 + q_tmp_0_1; + q_acc_0_2 = q_acc_0_2 + q_tmp_0_2; + q_acc_0_3 = q_acc_0_3 + q_tmp_0_3; + q_acc_0_4 = q_acc_0_4 + q_tmp_0_4; + q_acc_0_5 = q_acc_0_5 + q_tmp_0_5; + q_acc_0_6 = q_acc_0_6 + q_tmp_0_6; + q_acc_0_7 = q_acc_0_7 + q_tmp_0_7; + q_acc_0_8 = q_acc_0_8 + q_tmp_0_8; + q_acc_0_9 = q_acc_0_9 + q_tmp_0_9; + q_acc_0_10 = q_acc_0_10 + q_tmp_0_10; + q_acc_0_11 = q_acc_0_11 + q_tmp_0_11; + q_acc_0_12 = q_acc_0_12 + q_tmp_0_12; + q_acc_0_13 = q_acc_0_13 + q_tmp_0_13; + q_acc_0_14 = q_acc_0_14 + q_tmp_0_14; + q_acc_0_15 = q_acc_0_15 + q_tmp_0_15; + q_acc_0_16 = q_acc_0_16 + q_tmp_0_16; + q_acc_0_17 = q_acc_0_17 + q_tmp_0_17; + q_acc_0_18 = q_acc_0_18 + q_tmp_0_18; + q_acc_0_19 = q_acc_0_19 + q_tmp_0_19; + q_acc_0_20 = q_acc_0_20 + q_tmp_0_20; + q_acc_0_21 = q_acc_0_21 + q_tmp_0_21; + q_acc_0_22 = q_acc_0_22 + q_tmp_0_22; + q_acc_0_23 = q_acc_0_23 + q_tmp_0_23; + q_acc_0_24 = q_acc_0_24 + q_tmp_0_24; + q_acc_0_25 = q_acc_0_25 + q_tmp_0_25; + q_acc_0_26 = q_acc_0_26 + q_tmp_0_26; + q_acc_0_27 = q_acc_0_27 + q_tmp_0_27; + q_acc_0_28 = q_acc_0_28 + q_tmp_0_28; + q_acc_0_29 = q_acc_0_29 + q_tmp_0_29; + q_acc_1_0 = q_acc_1_0 + q_tmp_1_0; + q_acc_1_1 = q_acc_1_1 + q_tmp_1_1; + q_acc_1_2 = q_acc_1_2 + q_tmp_1_2; + q_acc_1_3 = q_acc_1_3 + q_tmp_1_3; + q_acc_1_4 = q_acc_1_4 + q_tmp_1_4; + q_acc_1_5 = q_acc_1_5 + q_tmp_1_5; + q_acc_1_6 = q_acc_1_6 + q_tmp_1_6; + q_acc_1_7 = q_acc_1_7 + q_tmp_1_7; + q_acc_1_8 = q_acc_1_8 + q_tmp_1_8; + q_acc_1_9 = q_acc_1_9 + q_tmp_1_9; + q_acc_1_10 = q_acc_1_10 + q_tmp_1_10; + q_acc_1_11 = q_acc_1_11 + q_tmp_1_11; + q_acc_1_12 = q_acc_1_12 + q_tmp_1_12; + q_acc_1_13 = q_acc_1_13 + q_tmp_1_13; + q_acc_1_14 = q_acc_1_14 + q_tmp_1_14; + q_acc_1_15 = q_acc_1_15 + q_tmp_1_15; + q_acc_1_16 = q_acc_1_16 + q_tmp_1_16; + q_acc_1_17 = q_acc_1_17 + q_tmp_1_17; + q_acc_1_18 = q_acc_1_18 + q_tmp_1_18; + q_acc_1_19 = q_acc_1_19 + q_tmp_1_19; + q_acc_1_20 = q_acc_1_20 + q_tmp_1_20; + q_acc_1_21 = q_acc_1_21 + q_tmp_1_21; + q_acc_1_22 = q_acc_1_22 + q_tmp_1_22; + q_acc_1_23 = q_acc_1_23 + q_tmp_1_23; + q_acc_1_24 = q_acc_1_24 + q_tmp_1_24; + q_acc_1_25 = q_acc_1_25 + q_tmp_1_25; + q_acc_1_26 = q_acc_1_26 + q_tmp_1_26; + q_acc_1_27 = q_acc_1_27 + q_tmp_1_27; + q_acc_1_28 = q_acc_1_28 + q_tmp_1_28; + q_acc_1_29 = q_acc_1_29 + q_tmp_1_29; + q_acc_2_0 = q_acc_2_0 + q_tmp_2_0; + q_acc_2_1 = q_acc_2_1 + q_tmp_2_1; + q_acc_2_2 = q_acc_2_2 + q_tmp_2_2; + q_acc_2_3 = q_acc_2_3 + q_tmp_2_3; + q_acc_2_4 = q_acc_2_4 + q_tmp_2_4; + q_acc_2_5 = q_acc_2_5 + q_tmp_2_5; + q_acc_2_6 = q_acc_2_6 + q_tmp_2_6; + q_acc_2_7 = q_acc_2_7 + q_tmp_2_7; + q_acc_2_8 = q_acc_2_8 + q_tmp_2_8; + q_acc_2_9 = q_acc_2_9 + q_tmp_2_9; + q_acc_2_10 = q_acc_2_10 + q_tmp_2_10; + q_acc_2_11 = q_acc_2_11 + q_tmp_2_11; + q_acc_2_12 = q_acc_2_12 + q_tmp_2_12; + q_acc_2_13 = q_acc_2_13 + q_tmp_2_13; + q_acc_2_14 = q_acc_2_14 + q_tmp_2_14; + q_acc_2_15 = q_acc_2_15 + q_tmp_2_15; + q_acc_2_16 = q_acc_2_16 + q_tmp_2_16; + q_acc_2_17 = q_acc_2_17 + q_tmp_2_17; + q_acc_2_18 = q_acc_2_18 + q_tmp_2_18; + q_acc_2_19 = q_acc_2_19 + q_tmp_2_19; + q_acc_2_20 = q_acc_2_20 + q_tmp_2_20; + q_acc_2_21 = q_acc_2_21 + q_tmp_2_21; + q_acc_2_22 = q_acc_2_22 + q_tmp_2_22; + q_acc_2_23 = q_acc_2_23 + q_tmp_2_23; + q_acc_2_24 = q_acc_2_24 + q_tmp_2_24; + q_acc_2_25 = q_acc_2_25 + q_tmp_2_25; + q_acc_2_26 = q_acc_2_26 + q_tmp_2_26; + q_acc_2_27 = q_acc_2_27 + q_tmp_2_27; + q_acc_2_28 = q_acc_2_28 + q_tmp_2_28; + q_acc_2_29 = q_acc_2_29 + q_tmp_2_29; + q_acc_3_0 = q_acc_3_0 + q_tmp_3_0; + q_acc_3_1 = q_acc_3_1 + q_tmp_3_1; + q_acc_3_2 = q_acc_3_2 + q_tmp_3_2; + q_acc_3_3 = q_acc_3_3 + q_tmp_3_3; + q_acc_3_4 = q_acc_3_4 + q_tmp_3_4; + q_acc_3_5 = q_acc_3_5 + q_tmp_3_5; + q_acc_3_6 = q_acc_3_6 + q_tmp_3_6; + q_acc_3_7 = q_acc_3_7 + q_tmp_3_7; + q_acc_3_8 = q_acc_3_8 + q_tmp_3_8; + q_acc_3_9 = q_acc_3_9 + q_tmp_3_9; + q_acc_3_10 = q_acc_3_10 + q_tmp_3_10; + q_acc_3_11 = q_acc_3_11 + q_tmp_3_11; + q_acc_3_12 = q_acc_3_12 + q_tmp_3_12; + q_acc_3_13 = q_acc_3_13 + q_tmp_3_13; + q_acc_3_14 = q_acc_3_14 + q_tmp_3_14; + q_acc_3_15 = q_acc_3_15 + q_tmp_3_15; + q_acc_3_16 = q_acc_3_16 + q_tmp_3_16; + q_acc_3_17 = q_acc_3_17 + q_tmp_3_17; + q_acc_3_18 = q_acc_3_18 + q_tmp_3_18; + q_acc_3_19 = q_acc_3_19 + q_tmp_3_19; + q_acc_3_20 = q_acc_3_20 + q_tmp_3_20; + q_acc_3_21 = q_acc_3_21 + q_tmp_3_21; + q_acc_3_22 = q_acc_3_22 + q_tmp_3_22; + q_acc_3_23 = q_acc_3_23 + q_tmp_3_23; + q_acc_3_24 = q_acc_3_24 + q_tmp_3_24; + q_acc_3_25 = q_acc_3_25 + q_tmp_3_25; + q_acc_3_26 = q_acc_3_26 + q_tmp_3_26; + q_acc_3_27 = q_acc_3_27 + q_tmp_3_27; + q_acc_3_28 = q_acc_3_28 + q_tmp_3_28; + q_acc_3_29 = q_acc_3_29 + q_tmp_3_29; + } + const real_t elMatVec_0 = q_acc_0_0*src_dof_0 + q_acc_0_1*src_dof_1 + q_acc_0_10*src_dof_10 + q_acc_0_11*src_dof_11 + q_acc_0_12*src_dof_12 + q_acc_0_13*src_dof_13 + q_acc_0_14*src_dof_14 + q_acc_0_15*src_dof_15 + q_acc_0_16*src_dof_16 + q_acc_0_17*src_dof_17 + q_acc_0_18*src_dof_18 + q_acc_0_19*src_dof_19 + q_acc_0_2*src_dof_2 + q_acc_0_20*src_dof_20 + q_acc_0_21*src_dof_21 + q_acc_0_22*src_dof_22 + q_acc_0_23*src_dof_23 + q_acc_0_24*src_dof_24 + q_acc_0_25*src_dof_25 + q_acc_0_26*src_dof_26 + q_acc_0_27*src_dof_27 + q_acc_0_28*src_dof_28 + q_acc_0_29*src_dof_29 + q_acc_0_3*src_dof_3 + q_acc_0_4*src_dof_4 + q_acc_0_5*src_dof_5 + q_acc_0_6*src_dof_6 + q_acc_0_7*src_dof_7 + q_acc_0_8*src_dof_8 + q_acc_0_9*src_dof_9; + const real_t elMatVec_1 = q_acc_1_0*src_dof_0 + q_acc_1_1*src_dof_1 + q_acc_1_10*src_dof_10 + q_acc_1_11*src_dof_11 + q_acc_1_12*src_dof_12 + q_acc_1_13*src_dof_13 + q_acc_1_14*src_dof_14 + q_acc_1_15*src_dof_15 + q_acc_1_16*src_dof_16 + q_acc_1_17*src_dof_17 + q_acc_1_18*src_dof_18 + q_acc_1_19*src_dof_19 + q_acc_1_2*src_dof_2 + q_acc_1_20*src_dof_20 + q_acc_1_21*src_dof_21 + q_acc_1_22*src_dof_22 + q_acc_1_23*src_dof_23 + q_acc_1_24*src_dof_24 + q_acc_1_25*src_dof_25 + q_acc_1_26*src_dof_26 + q_acc_1_27*src_dof_27 + q_acc_1_28*src_dof_28 + q_acc_1_29*src_dof_29 + q_acc_1_3*src_dof_3 + q_acc_1_4*src_dof_4 + q_acc_1_5*src_dof_5 + q_acc_1_6*src_dof_6 + q_acc_1_7*src_dof_7 + q_acc_1_8*src_dof_8 + q_acc_1_9*src_dof_9; + const real_t elMatVec_2 = q_acc_2_0*src_dof_0 + q_acc_2_1*src_dof_1 + q_acc_2_10*src_dof_10 + q_acc_2_11*src_dof_11 + q_acc_2_12*src_dof_12 + q_acc_2_13*src_dof_13 + q_acc_2_14*src_dof_14 + q_acc_2_15*src_dof_15 + q_acc_2_16*src_dof_16 + q_acc_2_17*src_dof_17 + q_acc_2_18*src_dof_18 + q_acc_2_19*src_dof_19 + q_acc_2_2*src_dof_2 + q_acc_2_20*src_dof_20 + q_acc_2_21*src_dof_21 + q_acc_2_22*src_dof_22 + q_acc_2_23*src_dof_23 + q_acc_2_24*src_dof_24 + q_acc_2_25*src_dof_25 + q_acc_2_26*src_dof_26 + q_acc_2_27*src_dof_27 + q_acc_2_28*src_dof_28 + q_acc_2_29*src_dof_29 + q_acc_2_3*src_dof_3 + q_acc_2_4*src_dof_4 + q_acc_2_5*src_dof_5 + q_acc_2_6*src_dof_6 + q_acc_2_7*src_dof_7 + q_acc_2_8*src_dof_8 + q_acc_2_9*src_dof_9; + const real_t elMatVec_3 = q_acc_3_0*src_dof_0 + q_acc_3_1*src_dof_1 + q_acc_3_10*src_dof_10 + q_acc_3_11*src_dof_11 + q_acc_3_12*src_dof_12 + q_acc_3_13*src_dof_13 + q_acc_3_14*src_dof_14 + q_acc_3_15*src_dof_15 + q_acc_3_16*src_dof_16 + q_acc_3_17*src_dof_17 + q_acc_3_18*src_dof_18 + q_acc_3_19*src_dof_19 + q_acc_3_2*src_dof_2 + q_acc_3_20*src_dof_20 + q_acc_3_21*src_dof_21 + q_acc_3_22*src_dof_22 + q_acc_3_23*src_dof_23 + q_acc_3_24*src_dof_24 + q_acc_3_25*src_dof_25 + q_acc_3_26*src_dof_26 + q_acc_3_27*src_dof_27 + q_acc_3_28*src_dof_28 + q_acc_3_29*src_dof_29 + q_acc_3_3*src_dof_3 + q_acc_3_4*src_dof_4 + q_acc_3_5*src_dof_5 + q_acc_3_6*src_dof_6 + q_acc_3_7*src_dof_7 + q_acc_3_8*src_dof_8 + q_acc_3_9*src_dof_9; + _data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))] = elMatVec_0 + _data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]; + _data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1] = elMatVec_1 + _data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]; + _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1] = elMatVec_2 + _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]; + _data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))] = elMatVec_3 + _data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]; + } + } + } + } +} +} // namespace operatorgeneration + +} // namespace hyteg diff --git a/operators/grad_rho_by_rho_dot_u/avx/P2VectorToP1ElementwiseGradRhoByRhoDotU_apply_P2VectorToP1ElementwiseGradRhoByRhoDotU_macro_2D.cpp b/operators/grad_rho_by_rho_dot_u/avx/P2VectorToP1ElementwiseGradRhoByRhoDotU_apply_P2VectorToP1ElementwiseGradRhoByRhoDotU_macro_2D.cpp new file mode 100644 index 00000000..adea9287 --- /dev/null +++ b/operators/grad_rho_by_rho_dot_u/avx/P2VectorToP1ElementwiseGradRhoByRhoDotU_apply_P2VectorToP1ElementwiseGradRhoByRhoDotU_macro_2D.cpp @@ -0,0 +1,874 @@ +/* +* Copyright (c) 2017-2024 Nils Kohl, Daniel Bauer, Fabian Böhm. +* +* This file is part of HyTeG +* (see https://i10git.cs.fau.de/hyteg/hyteg). +* +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. +* +* You should have received a copy of the GNU General Public License +* along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +/* +* The entire file was generated with the HyTeG Operator Generator. +* +* Avoid modifying this file. If buggy, consider fixing the generator itself. +*/ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +#include "../P2VectorToP1ElementwiseGradRhoByRhoDotU.hpp" + +#define FUNC_PREFIX + +namespace hyteg { + +namespace operatorgeneration { + +void P2VectorToP1ElementwiseGradRhoByRhoDotU::apply_P2VectorToP1ElementwiseGradRhoByRhoDotU_macro_2D( real_t * RESTRICT _data_dst, real_t * RESTRICT _data_rhoEdge, real_t * RESTRICT _data_rhoVertex, real_t * RESTRICT _data_src_edge_0, real_t * RESTRICT _data_src_edge_1, real_t * RESTRICT _data_src_vertex_0, real_t * RESTRICT _data_src_vertex_1, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +{ + { + const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669}; + + const real_t _data_q_p_0 [] = {0.33333333333333331, 0.20000000000000001, 0.59999999999999998, 0.20000000000000001}; + + const real_t _data_q_p_1 [] = {0.33333333333333331, 0.59999999999999998, 0.20000000000000001, 0.20000000000000001}; + + const real_t tmp_coords_jac_0_GRAY = 1.0 / (micro_edges_per_macro_edge_float)*1.0; + const real_t p_affine_const_0_0_GRAY = macro_vertex_coord_id_0comp0; + const real_t p_affine_const_0_1_GRAY = macro_vertex_coord_id_0comp1; + const real_t p_affine_const_1_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0); + const real_t p_affine_const_1_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1); + const real_t p_affine_const_2_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0); + const real_t p_affine_const_2_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1); + const real_t jac_affine_0_0_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_1_0_GRAY; + const real_t jac_affine_0_1_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_2_0_GRAY; + const real_t jac_affine_1_0_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_1_1_GRAY; + const real_t jac_affine_1_1_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_2_1_GRAY; + const real_t tmp_coords_jac_1_GRAY = jac_affine_0_0_GRAY*jac_affine_1_1_GRAY - jac_affine_0_1_GRAY*jac_affine_1_0_GRAY; + const real_t tmp_coords_jac_2_GRAY = 1.0 / (tmp_coords_jac_1_GRAY); + const real_t jac_affine_inv_0_0_GRAY = jac_affine_1_1_GRAY*tmp_coords_jac_2_GRAY; + const real_t jac_affine_inv_0_1_GRAY = -jac_affine_0_1_GRAY*tmp_coords_jac_2_GRAY; + const real_t jac_affine_inv_1_0_GRAY = -jac_affine_1_0_GRAY*tmp_coords_jac_2_GRAY; + const real_t jac_affine_inv_1_1_GRAY = jac_affine_0_0_GRAY*tmp_coords_jac_2_GRAY; + const real_t abs_det_jac_affine_GRAY = abs(tmp_coords_jac_1_GRAY); + { + /* FaceType.GRAY */ + for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1) + { + for (int64_t ctr_0 = 0; ctr_0 < (int64_t)((-ctr_1 + micro_edges_per_macro_edge) / (4)) * (4); ctr_0 += 4) + { + + const int64_t phantom_ctr_0 = ctr_0; + real_t _data_float_loop_ctr_array_dim_0[4]; + _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0; + _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1; + _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2; + _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3; + real_t _data_float_loop_ctr_array_dim_1[4]; + _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1; + + const __m256d p_affine_0_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)); + const __m256d p_affine_0_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)); + const __m256d p_affine_1_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)); + const __m256d p_affine_1_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)); + const __m256d p_affine_2_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)); + const __m256d p_affine_2_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)); + const __m256d src_dof_0 = _mm256_loadu_pd(& _data_src_vertex_0[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]); + const __m256d src_dof_1 = _mm256_loadu_pd(& _data_src_vertex_0[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]); + const __m256d src_dof_2 = _mm256_loadu_pd(& _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]); + const __m256d src_dof_3 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]); + const __m256d src_dof_4 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]); + const __m256d src_dof_5 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]); + const __m256d src_dof_6 = _mm256_loadu_pd(& _data_src_vertex_1[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]); + const __m256d src_dof_7 = _mm256_loadu_pd(& _data_src_vertex_1[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]); + const __m256d src_dof_8 = _mm256_loadu_pd(& _data_src_vertex_1[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]); + const __m256d src_dof_9 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]); + const __m256d src_dof_10 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]); + const __m256d src_dof_11 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]); + const __m256d rho_dof_0 = _mm256_loadu_pd(& _data_rhoVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]); + const __m256d rho_dof_1 = _mm256_loadu_pd(& _data_rhoVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]); + const __m256d rho_dof_2 = _mm256_loadu_pd(& _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]); + const __m256d rho_dof_3 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]); + const __m256d rho_dof_4 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]); + const __m256d rho_dof_5 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]); + __m256d q_acc_0_0 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_1 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_2 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_3 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_4 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_5 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_6 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_7 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_8 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_9 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_10 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_11 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_0 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_1 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_2 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_3 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_4 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_5 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_6 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_7 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_8 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_9 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_10 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_11 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_0 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_1 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_2 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_3 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_4 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_5 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_6 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_7 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_8 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_9 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_10 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_11 = _mm256_set_pd(0.0,0.0,0.0,0.0); + for (int64_t q = 0; q < 4; q += 1) + { + const __m256d tmp_qloop_0 = _mm256_mul_pd(_mm256_set_pd(4.0,4.0,4.0,4.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])); + const __m256d tmp_qloop_1 = _mm256_mul_pd(tmp_qloop_0,_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])); + const __m256d tmp_qloop_2 = _mm256_mul_pd(_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])); + const __m256d tmp_qloop_3 = _mm256_mul_pd(tmp_qloop_2,_mm256_set_pd(2.0,2.0,2.0,2.0)); + const __m256d tmp_qloop_4 = _mm256_mul_pd(_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])); + const __m256d tmp_qloop_5 = _mm256_mul_pd(tmp_qloop_4,_mm256_set_pd(2.0,2.0,2.0,2.0)); + const __m256d tmp_qloop_6 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),tmp_qloop_1),tmp_qloop_3),tmp_qloop_5); + const __m256d tmp_qloop_7 = _mm256_mul_pd(_mm256_set_pd(4.0,4.0,4.0,4.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])); + const __m256d tmp_qloop_8 = _mm256_mul_pd(rho_dof_0,_mm256_add_pd(_mm256_add_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),tmp_qloop_0),tmp_qloop_7)); + const __m256d tmp_qloop_9 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(rho_dof_1,_mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),tmp_qloop_0)),_mm256_mul_pd(rho_dof_5,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-8.0,-8.0,-8.0,-8.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(tmp_qloop_7,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_set_pd(4.0,4.0,4.0,4.0)))),_mm256_mul_pd(rho_dof_3,tmp_qloop_7)),_mm256_mul_pd(_mm256_mul_pd(rho_dof_4,tmp_qloop_7),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_8); + const __m256d tmp_qloop_10 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(rho_dof_2,_mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),tmp_qloop_7)),_mm256_mul_pd(rho_dof_4,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-8.0,-8.0,-8.0,-8.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])),_mm256_mul_pd(tmp_qloop_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_set_pd(4.0,4.0,4.0,4.0)))),_mm256_mul_pd(rho_dof_3,tmp_qloop_0)),_mm256_mul_pd(_mm256_mul_pd(rho_dof_5,tmp_qloop_0),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_8); + const __m256d tmp_qloop_11 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_9,_mm256_set_pd(jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY)),_mm256_mul_pd(tmp_qloop_10,_mm256_set_pd(jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY))); + const __m256d tmp_qloop_12 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),tmp_qloop_3); + const __m256d tmp_qloop_13 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])),tmp_qloop_5); + const __m256d tmp_qloop_14 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_4,_mm256_set_pd(-4.0,-4.0,-4.0,-4.0)),_mm256_mul_pd(tmp_qloop_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_7); + const __m256d tmp_qloop_15 = _mm256_mul_pd(tmp_qloop_2,_mm256_set_pd(4.0,4.0,4.0,4.0)); + const __m256d tmp_qloop_16 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_15,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_0); + const __m256d tmp_qloop_17 = _mm256_mul_pd(_mm256_mul_pd(_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(rho_dof_0,tmp_qloop_6),_mm256_mul_pd(rho_dof_1,tmp_qloop_12)),_mm256_mul_pd(rho_dof_2,tmp_qloop_13)),_mm256_mul_pd(rho_dof_3,tmp_qloop_1)),_mm256_mul_pd(rho_dof_4,tmp_qloop_14)),_mm256_mul_pd(rho_dof_5,tmp_qloop_16))),_mm256_set_pd(_data_q_w[q],_data_q_w[q],_data_q_w[q],_data_q_w[q])),_mm256_set_pd(abs_det_jac_affine_GRAY,abs_det_jac_affine_GRAY,abs_det_jac_affine_GRAY,abs_det_jac_affine_GRAY)); + const __m256d tmp_qloop_18 = _mm256_mul_pd(tmp_qloop_17,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),_mm256_set_pd(1.0,1.0,1.0,1.0))); + const __m256d tmp_qloop_19 = _mm256_mul_pd(tmp_qloop_11,tmp_qloop_18); + const __m256d tmp_qloop_20 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_9,_mm256_set_pd(jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY)),_mm256_mul_pd(tmp_qloop_10,_mm256_set_pd(jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY))); + const __m256d tmp_qloop_21 = _mm256_mul_pd(tmp_qloop_18,tmp_qloop_20); + const __m256d tmp_qloop_22 = _mm256_mul_pd(tmp_qloop_17,_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])); + const __m256d tmp_qloop_23 = _mm256_mul_pd(tmp_qloop_11,tmp_qloop_22); + const __m256d tmp_qloop_24 = _mm256_mul_pd(tmp_qloop_17,_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])); + const __m256d tmp_qloop_25 = _mm256_mul_pd(tmp_qloop_11,tmp_qloop_24); + const __m256d tmp_qloop_26 = _mm256_mul_pd(tmp_qloop_20,tmp_qloop_22); + const __m256d tmp_qloop_27 = _mm256_mul_pd(tmp_qloop_20,tmp_qloop_24); + const __m256d tmp_qloop_28 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_0,tmp_qloop_17),tmp_qloop_4); + const __m256d q_tmp_0_0 = _mm256_mul_pd(tmp_qloop_19,tmp_qloop_6); + const __m256d q_tmp_0_1 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_19); + const __m256d q_tmp_0_2 = _mm256_mul_pd(tmp_qloop_13,tmp_qloop_19); + const __m256d q_tmp_0_3 = _mm256_mul_pd(tmp_qloop_1,tmp_qloop_19); + const __m256d q_tmp_0_4 = _mm256_mul_pd(tmp_qloop_14,tmp_qloop_19); + const __m256d q_tmp_0_5 = _mm256_mul_pd(tmp_qloop_16,tmp_qloop_19); + const __m256d q_tmp_0_6 = _mm256_mul_pd(tmp_qloop_21,tmp_qloop_6); + const __m256d q_tmp_0_7 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_21); + const __m256d q_tmp_0_8 = _mm256_mul_pd(tmp_qloop_13,tmp_qloop_21); + const __m256d q_tmp_0_9 = _mm256_mul_pd(tmp_qloop_1,tmp_qloop_21); + const __m256d q_tmp_0_10 = _mm256_mul_pd(tmp_qloop_14,tmp_qloop_21); + const __m256d q_tmp_0_11 = _mm256_mul_pd(tmp_qloop_16,tmp_qloop_21); + const __m256d q_tmp_1_0 = _mm256_mul_pd(tmp_qloop_23,tmp_qloop_6); + const __m256d q_tmp_1_1 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_23); + const __m256d q_tmp_1_2 = _mm256_mul_pd(tmp_qloop_13,tmp_qloop_23); + const __m256d q_tmp_1_3 = _mm256_mul_pd(tmp_qloop_15,tmp_qloop_25); + const __m256d q_tmp_1_4 = _mm256_mul_pd(tmp_qloop_14,tmp_qloop_23); + const __m256d q_tmp_1_5 = _mm256_mul_pd(tmp_qloop_16,tmp_qloop_23); + const __m256d q_tmp_1_6 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_6); + const __m256d q_tmp_1_7 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_26); + const __m256d q_tmp_1_8 = _mm256_mul_pd(tmp_qloop_13,tmp_qloop_26); + const __m256d q_tmp_1_9 = _mm256_mul_pd(tmp_qloop_15,tmp_qloop_27); + const __m256d q_tmp_1_10 = _mm256_mul_pd(tmp_qloop_14,tmp_qloop_26); + const __m256d q_tmp_1_11 = _mm256_mul_pd(tmp_qloop_16,tmp_qloop_26); + const __m256d q_tmp_2_0 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_6); + const __m256d q_tmp_2_1 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_25); + const __m256d q_tmp_2_2 = _mm256_mul_pd(tmp_qloop_13,tmp_qloop_25); + const __m256d q_tmp_2_3 = _mm256_mul_pd(tmp_qloop_11,tmp_qloop_28); + const __m256d q_tmp_2_4 = _mm256_mul_pd(tmp_qloop_14,tmp_qloop_25); + const __m256d q_tmp_2_5 = _mm256_mul_pd(tmp_qloop_16,tmp_qloop_25); + const __m256d q_tmp_2_6 = _mm256_mul_pd(tmp_qloop_27,tmp_qloop_6); + const __m256d q_tmp_2_7 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_27); + const __m256d q_tmp_2_8 = _mm256_mul_pd(tmp_qloop_13,tmp_qloop_27); + const __m256d q_tmp_2_9 = _mm256_mul_pd(tmp_qloop_20,tmp_qloop_28); + const __m256d q_tmp_2_10 = _mm256_mul_pd(tmp_qloop_14,tmp_qloop_27); + const __m256d q_tmp_2_11 = _mm256_mul_pd(tmp_qloop_16,tmp_qloop_27); + q_acc_0_0 = _mm256_add_pd(q_acc_0_0,q_tmp_0_0); + q_acc_0_1 = _mm256_add_pd(q_acc_0_1,q_tmp_0_1); + q_acc_0_2 = _mm256_add_pd(q_acc_0_2,q_tmp_0_2); + q_acc_0_3 = _mm256_add_pd(q_acc_0_3,q_tmp_0_3); + q_acc_0_4 = _mm256_add_pd(q_acc_0_4,q_tmp_0_4); + q_acc_0_5 = _mm256_add_pd(q_acc_0_5,q_tmp_0_5); + q_acc_0_6 = _mm256_add_pd(q_acc_0_6,q_tmp_0_6); + q_acc_0_7 = _mm256_add_pd(q_acc_0_7,q_tmp_0_7); + q_acc_0_8 = _mm256_add_pd(q_acc_0_8,q_tmp_0_8); + q_acc_0_9 = _mm256_add_pd(q_acc_0_9,q_tmp_0_9); + q_acc_0_10 = _mm256_add_pd(q_acc_0_10,q_tmp_0_10); + q_acc_0_11 = _mm256_add_pd(q_acc_0_11,q_tmp_0_11); + q_acc_1_0 = _mm256_add_pd(q_acc_1_0,q_tmp_1_0); + q_acc_1_1 = _mm256_add_pd(q_acc_1_1,q_tmp_1_1); + q_acc_1_2 = _mm256_add_pd(q_acc_1_2,q_tmp_1_2); + q_acc_1_3 = _mm256_add_pd(q_acc_1_3,q_tmp_1_3); + q_acc_1_4 = _mm256_add_pd(q_acc_1_4,q_tmp_1_4); + q_acc_1_5 = _mm256_add_pd(q_acc_1_5,q_tmp_1_5); + q_acc_1_6 = _mm256_add_pd(q_acc_1_6,q_tmp_1_6); + q_acc_1_7 = _mm256_add_pd(q_acc_1_7,q_tmp_1_7); + q_acc_1_8 = _mm256_add_pd(q_acc_1_8,q_tmp_1_8); + q_acc_1_9 = _mm256_add_pd(q_acc_1_9,q_tmp_1_9); + q_acc_1_10 = _mm256_add_pd(q_acc_1_10,q_tmp_1_10); + q_acc_1_11 = _mm256_add_pd(q_acc_1_11,q_tmp_1_11); + q_acc_2_0 = _mm256_add_pd(q_acc_2_0,q_tmp_2_0); + q_acc_2_1 = _mm256_add_pd(q_acc_2_1,q_tmp_2_1); + q_acc_2_2 = _mm256_add_pd(q_acc_2_2,q_tmp_2_2); + q_acc_2_3 = _mm256_add_pd(q_acc_2_3,q_tmp_2_3); + q_acc_2_4 = _mm256_add_pd(q_acc_2_4,q_tmp_2_4); + q_acc_2_5 = _mm256_add_pd(q_acc_2_5,q_tmp_2_5); + q_acc_2_6 = _mm256_add_pd(q_acc_2_6,q_tmp_2_6); + q_acc_2_7 = _mm256_add_pd(q_acc_2_7,q_tmp_2_7); + q_acc_2_8 = _mm256_add_pd(q_acc_2_8,q_tmp_2_8); + q_acc_2_9 = _mm256_add_pd(q_acc_2_9,q_tmp_2_9); + q_acc_2_10 = _mm256_add_pd(q_acc_2_10,q_tmp_2_10); + q_acc_2_11 = _mm256_add_pd(q_acc_2_11,q_tmp_2_11); + } + const __m256d elMatVec_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_0_0,src_dof_0),_mm256_mul_pd(q_acc_0_1,src_dof_1)),_mm256_mul_pd(q_acc_0_10,src_dof_10)),_mm256_mul_pd(q_acc_0_11,src_dof_11)),_mm256_mul_pd(q_acc_0_2,src_dof_2)),_mm256_mul_pd(q_acc_0_3,src_dof_3)),_mm256_mul_pd(q_acc_0_4,src_dof_4)),_mm256_mul_pd(q_acc_0_5,src_dof_5)),_mm256_mul_pd(q_acc_0_6,src_dof_6)),_mm256_mul_pd(q_acc_0_7,src_dof_7)),_mm256_mul_pd(q_acc_0_8,src_dof_8)),_mm256_mul_pd(q_acc_0_9,src_dof_9)); + const __m256d elMatVec_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_1_0,src_dof_0),_mm256_mul_pd(q_acc_1_1,src_dof_1)),_mm256_mul_pd(q_acc_1_10,src_dof_10)),_mm256_mul_pd(q_acc_1_11,src_dof_11)),_mm256_mul_pd(q_acc_1_2,src_dof_2)),_mm256_mul_pd(q_acc_1_3,src_dof_3)),_mm256_mul_pd(q_acc_1_4,src_dof_4)),_mm256_mul_pd(q_acc_1_5,src_dof_5)),_mm256_mul_pd(q_acc_1_6,src_dof_6)),_mm256_mul_pd(q_acc_1_7,src_dof_7)),_mm256_mul_pd(q_acc_1_8,src_dof_8)),_mm256_mul_pd(q_acc_1_9,src_dof_9)); + const __m256d elMatVec_2 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_2_0,src_dof_0),_mm256_mul_pd(q_acc_2_1,src_dof_1)),_mm256_mul_pd(q_acc_2_10,src_dof_10)),_mm256_mul_pd(q_acc_2_11,src_dof_11)),_mm256_mul_pd(q_acc_2_2,src_dof_2)),_mm256_mul_pd(q_acc_2_3,src_dof_3)),_mm256_mul_pd(q_acc_2_4,src_dof_4)),_mm256_mul_pd(q_acc_2_5,src_dof_5)),_mm256_mul_pd(q_acc_2_6,src_dof_6)),_mm256_mul_pd(q_acc_2_7,src_dof_7)),_mm256_mul_pd(q_acc_2_8,src_dof_8)),_mm256_mul_pd(q_acc_2_9,src_dof_9)); + _mm256_storeu_pd(&_data_dst[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))],_mm256_add_pd(elMatVec_0,_mm256_loadu_pd(& _data_dst[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]))); + _mm256_storeu_pd(&_data_dst[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1],_mm256_add_pd(elMatVec_1,_mm256_loadu_pd(& _data_dst[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]))); + _mm256_storeu_pd(&_data_dst[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))],_mm256_add_pd(elMatVec_2,_mm256_loadu_pd(& _data_dst[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]))); + } + for (int64_t ctr_0 = (int64_t)((-ctr_1 + micro_edges_per_macro_edge) / (4)) * (4); ctr_0 < -ctr_1 + micro_edges_per_macro_edge; ctr_0 += 1) + { + + const int64_t phantom_ctr_0 = ctr_0; + real_t _data_float_loop_ctr_array_dim_0[4]; + _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0; + _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1; + _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2; + _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3; + real_t _data_float_loop_ctr_array_dim_1[4]; + _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1; + + const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; + const real_t src_dof_0 = _data_src_vertex_0[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]; + const real_t src_dof_1 = _data_src_vertex_0[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + const real_t src_dof_2 = _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t src_dof_3 = _data_src_edge_0[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t src_dof_4 = _data_src_edge_0[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t src_dof_5 = _data_src_edge_0[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]; + const real_t src_dof_6 = _data_src_vertex_1[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]; + const real_t src_dof_7 = _data_src_vertex_1[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + const real_t src_dof_8 = _data_src_vertex_1[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t src_dof_9 = _data_src_edge_1[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t src_dof_10 = _data_src_edge_1[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t src_dof_11 = _data_src_edge_1[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]; + const real_t rho_dof_0 = _data_rhoVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]; + const real_t rho_dof_1 = _data_rhoVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + const real_t rho_dof_2 = _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t rho_dof_3 = _data_rhoEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t rho_dof_4 = _data_rhoEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t rho_dof_5 = _data_rhoEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]; + real_t q_acc_0_0 = 0.0; + real_t q_acc_0_1 = 0.0; + real_t q_acc_0_2 = 0.0; + real_t q_acc_0_3 = 0.0; + real_t q_acc_0_4 = 0.0; + real_t q_acc_0_5 = 0.0; + real_t q_acc_0_6 = 0.0; + real_t q_acc_0_7 = 0.0; + real_t q_acc_0_8 = 0.0; + real_t q_acc_0_9 = 0.0; + real_t q_acc_0_10 = 0.0; + real_t q_acc_0_11 = 0.0; + real_t q_acc_1_0 = 0.0; + real_t q_acc_1_1 = 0.0; + real_t q_acc_1_2 = 0.0; + real_t q_acc_1_3 = 0.0; + real_t q_acc_1_4 = 0.0; + real_t q_acc_1_5 = 0.0; + real_t q_acc_1_6 = 0.0; + real_t q_acc_1_7 = 0.0; + real_t q_acc_1_8 = 0.0; + real_t q_acc_1_9 = 0.0; + real_t q_acc_1_10 = 0.0; + real_t q_acc_1_11 = 0.0; + real_t q_acc_2_0 = 0.0; + real_t q_acc_2_1 = 0.0; + real_t q_acc_2_2 = 0.0; + real_t q_acc_2_3 = 0.0; + real_t q_acc_2_4 = 0.0; + real_t q_acc_2_5 = 0.0; + real_t q_acc_2_6 = 0.0; + real_t q_acc_2_7 = 0.0; + real_t q_acc_2_8 = 0.0; + real_t q_acc_2_9 = 0.0; + real_t q_acc_2_10 = 0.0; + real_t q_acc_2_11 = 0.0; + for (int64_t q = 0; q < 4; q += 1) + { + const real_t tmp_qloop_0 = 4.0*_data_q_p_0[q]; + const real_t tmp_qloop_1 = tmp_qloop_0*_data_q_p_1[q]; + const real_t tmp_qloop_2 = (_data_q_p_0[q]*_data_q_p_0[q]); + const real_t tmp_qloop_3 = tmp_qloop_2*2.0; + const real_t tmp_qloop_4 = (_data_q_p_1[q]*_data_q_p_1[q]); + const real_t tmp_qloop_5 = tmp_qloop_4*2.0; + const real_t tmp_qloop_6 = tmp_qloop_1 + tmp_qloop_3 + tmp_qloop_5 - 3.0*_data_q_p_0[q] - 3.0*_data_q_p_1[q] + 1.0; + const real_t tmp_qloop_7 = 4.0*_data_q_p_1[q]; + const real_t tmp_qloop_8 = rho_dof_0*(tmp_qloop_0 + tmp_qloop_7 - 3.0); + const real_t tmp_qloop_9 = rho_dof_1*(tmp_qloop_0 - 1.0) + rho_dof_3*tmp_qloop_7 - rho_dof_4*tmp_qloop_7 + rho_dof_5*(-tmp_qloop_7 - 8.0*_data_q_p_0[q] + 4.0) + tmp_qloop_8; + const real_t tmp_qloop_10 = rho_dof_2*(tmp_qloop_7 - 1.0) + rho_dof_3*tmp_qloop_0 + rho_dof_4*(-tmp_qloop_0 - 8.0*_data_q_p_1[q] + 4.0) - rho_dof_5*tmp_qloop_0 + tmp_qloop_8; + const real_t tmp_qloop_11 = jac_affine_inv_0_0_GRAY*tmp_qloop_9 + jac_affine_inv_1_0_GRAY*tmp_qloop_10; + const real_t tmp_qloop_12 = tmp_qloop_3 - _data_q_p_0[q]; + const real_t tmp_qloop_13 = tmp_qloop_5 - _data_q_p_1[q]; + const real_t tmp_qloop_14 = -tmp_qloop_1 + tmp_qloop_4*-4.0 + tmp_qloop_7; + const real_t tmp_qloop_15 = tmp_qloop_2*4.0; + const real_t tmp_qloop_16 = tmp_qloop_0 - tmp_qloop_1 - tmp_qloop_15; + const real_t tmp_qloop_17 = abs_det_jac_affine_GRAY*1.0 / (rho_dof_0*tmp_qloop_6 + rho_dof_1*tmp_qloop_12 + rho_dof_2*tmp_qloop_13 + rho_dof_3*tmp_qloop_1 + rho_dof_4*tmp_qloop_14 + rho_dof_5*tmp_qloop_16)*_data_q_w[q]; + const real_t tmp_qloop_18 = tmp_qloop_17*(1.0 - _data_q_p_0[q] - _data_q_p_1[q]); + const real_t tmp_qloop_19 = tmp_qloop_11*tmp_qloop_18; + const real_t tmp_qloop_20 = jac_affine_inv_0_1_GRAY*tmp_qloop_9 + jac_affine_inv_1_1_GRAY*tmp_qloop_10; + const real_t tmp_qloop_21 = tmp_qloop_18*tmp_qloop_20; + const real_t tmp_qloop_22 = tmp_qloop_17*_data_q_p_0[q]; + const real_t tmp_qloop_23 = tmp_qloop_11*tmp_qloop_22; + const real_t tmp_qloop_24 = tmp_qloop_17*_data_q_p_1[q]; + const real_t tmp_qloop_25 = tmp_qloop_11*tmp_qloop_24; + const real_t tmp_qloop_26 = tmp_qloop_20*tmp_qloop_22; + const real_t tmp_qloop_27 = tmp_qloop_20*tmp_qloop_24; + const real_t tmp_qloop_28 = tmp_qloop_0*tmp_qloop_17*tmp_qloop_4; + const real_t q_tmp_0_0 = tmp_qloop_19*tmp_qloop_6; + const real_t q_tmp_0_1 = tmp_qloop_12*tmp_qloop_19; + const real_t q_tmp_0_2 = tmp_qloop_13*tmp_qloop_19; + const real_t q_tmp_0_3 = tmp_qloop_1*tmp_qloop_19; + const real_t q_tmp_0_4 = tmp_qloop_14*tmp_qloop_19; + const real_t q_tmp_0_5 = tmp_qloop_16*tmp_qloop_19; + const real_t q_tmp_0_6 = tmp_qloop_21*tmp_qloop_6; + const real_t q_tmp_0_7 = tmp_qloop_12*tmp_qloop_21; + const real_t q_tmp_0_8 = tmp_qloop_13*tmp_qloop_21; + const real_t q_tmp_0_9 = tmp_qloop_1*tmp_qloop_21; + const real_t q_tmp_0_10 = tmp_qloop_14*tmp_qloop_21; + const real_t q_tmp_0_11 = tmp_qloop_16*tmp_qloop_21; + const real_t q_tmp_1_0 = tmp_qloop_23*tmp_qloop_6; + const real_t q_tmp_1_1 = tmp_qloop_12*tmp_qloop_23; + const real_t q_tmp_1_2 = tmp_qloop_13*tmp_qloop_23; + const real_t q_tmp_1_3 = tmp_qloop_15*tmp_qloop_25; + const real_t q_tmp_1_4 = tmp_qloop_14*tmp_qloop_23; + const real_t q_tmp_1_5 = tmp_qloop_16*tmp_qloop_23; + const real_t q_tmp_1_6 = tmp_qloop_26*tmp_qloop_6; + const real_t q_tmp_1_7 = tmp_qloop_12*tmp_qloop_26; + const real_t q_tmp_1_8 = tmp_qloop_13*tmp_qloop_26; + const real_t q_tmp_1_9 = tmp_qloop_15*tmp_qloop_27; + const real_t q_tmp_1_10 = tmp_qloop_14*tmp_qloop_26; + const real_t q_tmp_1_11 = tmp_qloop_16*tmp_qloop_26; + const real_t q_tmp_2_0 = tmp_qloop_25*tmp_qloop_6; + const real_t q_tmp_2_1 = tmp_qloop_12*tmp_qloop_25; + const real_t q_tmp_2_2 = tmp_qloop_13*tmp_qloop_25; + const real_t q_tmp_2_3 = tmp_qloop_11*tmp_qloop_28; + const real_t q_tmp_2_4 = tmp_qloop_14*tmp_qloop_25; + const real_t q_tmp_2_5 = tmp_qloop_16*tmp_qloop_25; + const real_t q_tmp_2_6 = tmp_qloop_27*tmp_qloop_6; + const real_t q_tmp_2_7 = tmp_qloop_12*tmp_qloop_27; + const real_t q_tmp_2_8 = tmp_qloop_13*tmp_qloop_27; + const real_t q_tmp_2_9 = tmp_qloop_20*tmp_qloop_28; + const real_t q_tmp_2_10 = tmp_qloop_14*tmp_qloop_27; + const real_t q_tmp_2_11 = tmp_qloop_16*tmp_qloop_27; + q_acc_0_0 = q_acc_0_0 + q_tmp_0_0; + q_acc_0_1 = q_acc_0_1 + q_tmp_0_1; + q_acc_0_2 = q_acc_0_2 + q_tmp_0_2; + q_acc_0_3 = q_acc_0_3 + q_tmp_0_3; + q_acc_0_4 = q_acc_0_4 + q_tmp_0_4; + q_acc_0_5 = q_acc_0_5 + q_tmp_0_5; + q_acc_0_6 = q_acc_0_6 + q_tmp_0_6; + q_acc_0_7 = q_acc_0_7 + q_tmp_0_7; + q_acc_0_8 = q_acc_0_8 + q_tmp_0_8; + q_acc_0_9 = q_acc_0_9 + q_tmp_0_9; + q_acc_0_10 = q_acc_0_10 + q_tmp_0_10; + q_acc_0_11 = q_acc_0_11 + q_tmp_0_11; + q_acc_1_0 = q_acc_1_0 + q_tmp_1_0; + q_acc_1_1 = q_acc_1_1 + q_tmp_1_1; + q_acc_1_2 = q_acc_1_2 + q_tmp_1_2; + q_acc_1_3 = q_acc_1_3 + q_tmp_1_3; + q_acc_1_4 = q_acc_1_4 + q_tmp_1_4; + q_acc_1_5 = q_acc_1_5 + q_tmp_1_5; + q_acc_1_6 = q_acc_1_6 + q_tmp_1_6; + q_acc_1_7 = q_acc_1_7 + q_tmp_1_7; + q_acc_1_8 = q_acc_1_8 + q_tmp_1_8; + q_acc_1_9 = q_acc_1_9 + q_tmp_1_9; + q_acc_1_10 = q_acc_1_10 + q_tmp_1_10; + q_acc_1_11 = q_acc_1_11 + q_tmp_1_11; + q_acc_2_0 = q_acc_2_0 + q_tmp_2_0; + q_acc_2_1 = q_acc_2_1 + q_tmp_2_1; + q_acc_2_2 = q_acc_2_2 + q_tmp_2_2; + q_acc_2_3 = q_acc_2_3 + q_tmp_2_3; + q_acc_2_4 = q_acc_2_4 + q_tmp_2_4; + q_acc_2_5 = q_acc_2_5 + q_tmp_2_5; + q_acc_2_6 = q_acc_2_6 + q_tmp_2_6; + q_acc_2_7 = q_acc_2_7 + q_tmp_2_7; + q_acc_2_8 = q_acc_2_8 + q_tmp_2_8; + q_acc_2_9 = q_acc_2_9 + q_tmp_2_9; + q_acc_2_10 = q_acc_2_10 + q_tmp_2_10; + q_acc_2_11 = q_acc_2_11 + q_tmp_2_11; + } + const real_t elMatVec_0 = q_acc_0_0*src_dof_0 + q_acc_0_1*src_dof_1 + q_acc_0_10*src_dof_10 + q_acc_0_11*src_dof_11 + q_acc_0_2*src_dof_2 + q_acc_0_3*src_dof_3 + q_acc_0_4*src_dof_4 + q_acc_0_5*src_dof_5 + q_acc_0_6*src_dof_6 + q_acc_0_7*src_dof_7 + q_acc_0_8*src_dof_8 + q_acc_0_9*src_dof_9; + const real_t elMatVec_1 = q_acc_1_0*src_dof_0 + q_acc_1_1*src_dof_1 + q_acc_1_10*src_dof_10 + q_acc_1_11*src_dof_11 + q_acc_1_2*src_dof_2 + q_acc_1_3*src_dof_3 + q_acc_1_4*src_dof_4 + q_acc_1_5*src_dof_5 + q_acc_1_6*src_dof_6 + q_acc_1_7*src_dof_7 + q_acc_1_8*src_dof_8 + q_acc_1_9*src_dof_9; + const real_t elMatVec_2 = q_acc_2_0*src_dof_0 + q_acc_2_1*src_dof_1 + q_acc_2_10*src_dof_10 + q_acc_2_11*src_dof_11 + q_acc_2_2*src_dof_2 + q_acc_2_3*src_dof_3 + q_acc_2_4*src_dof_4 + q_acc_2_5*src_dof_5 + q_acc_2_6*src_dof_6 + q_acc_2_7*src_dof_7 + q_acc_2_8*src_dof_8 + q_acc_2_9*src_dof_9; + _data_dst[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))] = elMatVec_0 + _data_dst[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]; + _data_dst[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1] = elMatVec_1 + _data_dst[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + _data_dst[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatVec_2 + _data_dst[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + } + } + } + const real_t tmp_coords_jac_0_BLUE = 1.0 / (micro_edges_per_macro_edge_float)*1.0; + const real_t tmp_coords_jac_1_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0); + const real_t tmp_coords_jac_2_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1); + const real_t tmp_coords_jac_3_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0); + const real_t tmp_coords_jac_4_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1); + const real_t p_affine_const_0_0_BLUE = tmp_coords_jac_1_BLUE; + const real_t p_affine_const_0_1_BLUE = tmp_coords_jac_2_BLUE; + const real_t p_affine_const_1_0_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_3_BLUE; + const real_t p_affine_const_1_1_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_4_BLUE; + const real_t p_affine_const_2_0_BLUE = tmp_coords_jac_1_BLUE + tmp_coords_jac_3_BLUE; + const real_t p_affine_const_2_1_BLUE = tmp_coords_jac_2_BLUE + tmp_coords_jac_4_BLUE; + const real_t jac_affine_0_0_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_1_0_BLUE; + const real_t jac_affine_0_1_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_2_0_BLUE; + const real_t jac_affine_1_0_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_1_1_BLUE; + const real_t jac_affine_1_1_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_2_1_BLUE; + const real_t tmp_coords_jac_5_BLUE = jac_affine_0_0_BLUE*jac_affine_1_1_BLUE - jac_affine_0_1_BLUE*jac_affine_1_0_BLUE; + const real_t tmp_coords_jac_6_BLUE = 1.0 / (tmp_coords_jac_5_BLUE); + const real_t jac_affine_inv_0_0_BLUE = jac_affine_1_1_BLUE*tmp_coords_jac_6_BLUE; + const real_t jac_affine_inv_0_1_BLUE = -jac_affine_0_1_BLUE*tmp_coords_jac_6_BLUE; + const real_t jac_affine_inv_1_0_BLUE = -jac_affine_1_0_BLUE*tmp_coords_jac_6_BLUE; + const real_t jac_affine_inv_1_1_BLUE = jac_affine_0_0_BLUE*tmp_coords_jac_6_BLUE; + const real_t abs_det_jac_affine_BLUE = abs(tmp_coords_jac_5_BLUE); + { + /* FaceType.BLUE */ + for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1) + { + for (int64_t ctr_0 = 0; ctr_0 < (int64_t)((-ctr_1 + micro_edges_per_macro_edge - 1) / (4)) * (4); ctr_0 += 4) + { + + const int64_t phantom_ctr_0 = ctr_0; + real_t _data_float_loop_ctr_array_dim_0[4]; + _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0; + _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1; + _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2; + _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3; + real_t _data_float_loop_ctr_array_dim_1[4]; + _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1; + + const __m256d p_affine_0_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)); + const __m256d p_affine_0_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)); + const __m256d p_affine_1_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)); + const __m256d p_affine_1_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)); + const __m256d p_affine_2_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)); + const __m256d p_affine_2_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)); + const __m256d src_dof_0 = _mm256_loadu_pd(& _data_src_vertex_0[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]); + const __m256d src_dof_1 = _mm256_loadu_pd(& _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]); + const __m256d src_dof_2 = _mm256_loadu_pd(& _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]); + const __m256d src_dof_3 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]); + const __m256d src_dof_4 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]); + const __m256d src_dof_5 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]); + const __m256d src_dof_6 = _mm256_loadu_pd(& _data_src_vertex_1[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]); + const __m256d src_dof_7 = _mm256_loadu_pd(& _data_src_vertex_1[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]); + const __m256d src_dof_8 = _mm256_loadu_pd(& _data_src_vertex_1[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]); + const __m256d src_dof_9 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]); + const __m256d src_dof_10 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]); + const __m256d src_dof_11 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]); + const __m256d rho_dof_0 = _mm256_loadu_pd(& _data_rhoVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]); + const __m256d rho_dof_1 = _mm256_loadu_pd(& _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]); + const __m256d rho_dof_2 = _mm256_loadu_pd(& _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]); + const __m256d rho_dof_3 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]); + const __m256d rho_dof_4 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]); + const __m256d rho_dof_5 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]); + __m256d q_acc_0_0 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_1 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_2 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_3 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_4 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_5 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_6 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_7 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_8 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_9 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_10 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_11 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_0 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_1 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_2 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_3 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_4 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_5 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_6 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_7 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_8 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_9 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_10 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_11 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_0 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_1 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_2 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_3 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_4 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_5 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_6 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_7 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_8 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_9 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_10 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_11 = _mm256_set_pd(0.0,0.0,0.0,0.0); + for (int64_t q = 0; q < 4; q += 1) + { + const __m256d tmp_qloop_0 = _mm256_mul_pd(_mm256_set_pd(4.0,4.0,4.0,4.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])); + const __m256d tmp_qloop_1 = _mm256_mul_pd(tmp_qloop_0,_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])); + const __m256d tmp_qloop_2 = _mm256_mul_pd(_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])); + const __m256d tmp_qloop_3 = _mm256_mul_pd(tmp_qloop_2,_mm256_set_pd(2.0,2.0,2.0,2.0)); + const __m256d tmp_qloop_4 = _mm256_mul_pd(_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])); + const __m256d tmp_qloop_5 = _mm256_mul_pd(tmp_qloop_4,_mm256_set_pd(2.0,2.0,2.0,2.0)); + const __m256d tmp_qloop_6 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),tmp_qloop_1),tmp_qloop_3),tmp_qloop_5); + const __m256d tmp_qloop_7 = _mm256_mul_pd(_mm256_set_pd(4.0,4.0,4.0,4.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])); + const __m256d tmp_qloop_8 = _mm256_mul_pd(rho_dof_0,_mm256_add_pd(_mm256_add_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),tmp_qloop_0),tmp_qloop_7)); + const __m256d tmp_qloop_9 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(rho_dof_1,_mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),tmp_qloop_0)),_mm256_mul_pd(rho_dof_5,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-8.0,-8.0,-8.0,-8.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(tmp_qloop_7,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_set_pd(4.0,4.0,4.0,4.0)))),_mm256_mul_pd(rho_dof_3,tmp_qloop_7)),_mm256_mul_pd(_mm256_mul_pd(rho_dof_4,tmp_qloop_7),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_8); + const __m256d tmp_qloop_10 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(rho_dof_2,_mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),tmp_qloop_7)),_mm256_mul_pd(rho_dof_4,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-8.0,-8.0,-8.0,-8.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])),_mm256_mul_pd(tmp_qloop_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_set_pd(4.0,4.0,4.0,4.0)))),_mm256_mul_pd(rho_dof_3,tmp_qloop_0)),_mm256_mul_pd(_mm256_mul_pd(rho_dof_5,tmp_qloop_0),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_8); + const __m256d tmp_qloop_11 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_9,_mm256_set_pd(jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE)),_mm256_mul_pd(tmp_qloop_10,_mm256_set_pd(jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE))); + const __m256d tmp_qloop_12 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),tmp_qloop_3); + const __m256d tmp_qloop_13 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])),tmp_qloop_5); + const __m256d tmp_qloop_14 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_4,_mm256_set_pd(-4.0,-4.0,-4.0,-4.0)),_mm256_mul_pd(tmp_qloop_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_7); + const __m256d tmp_qloop_15 = _mm256_mul_pd(tmp_qloop_2,_mm256_set_pd(4.0,4.0,4.0,4.0)); + const __m256d tmp_qloop_16 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_15,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_0); + const __m256d tmp_qloop_17 = _mm256_mul_pd(_mm256_mul_pd(_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(rho_dof_0,tmp_qloop_6),_mm256_mul_pd(rho_dof_1,tmp_qloop_12)),_mm256_mul_pd(rho_dof_2,tmp_qloop_13)),_mm256_mul_pd(rho_dof_3,tmp_qloop_1)),_mm256_mul_pd(rho_dof_4,tmp_qloop_14)),_mm256_mul_pd(rho_dof_5,tmp_qloop_16))),_mm256_set_pd(_data_q_w[q],_data_q_w[q],_data_q_w[q],_data_q_w[q])),_mm256_set_pd(abs_det_jac_affine_BLUE,abs_det_jac_affine_BLUE,abs_det_jac_affine_BLUE,abs_det_jac_affine_BLUE)); + const __m256d tmp_qloop_18 = _mm256_mul_pd(tmp_qloop_17,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),_mm256_set_pd(1.0,1.0,1.0,1.0))); + const __m256d tmp_qloop_19 = _mm256_mul_pd(tmp_qloop_11,tmp_qloop_18); + const __m256d tmp_qloop_20 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_9,_mm256_set_pd(jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE)),_mm256_mul_pd(tmp_qloop_10,_mm256_set_pd(jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE))); + const __m256d tmp_qloop_21 = _mm256_mul_pd(tmp_qloop_18,tmp_qloop_20); + const __m256d tmp_qloop_22 = _mm256_mul_pd(tmp_qloop_17,_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])); + const __m256d tmp_qloop_23 = _mm256_mul_pd(tmp_qloop_11,tmp_qloop_22); + const __m256d tmp_qloop_24 = _mm256_mul_pd(tmp_qloop_17,_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])); + const __m256d tmp_qloop_25 = _mm256_mul_pd(tmp_qloop_11,tmp_qloop_24); + const __m256d tmp_qloop_26 = _mm256_mul_pd(tmp_qloop_20,tmp_qloop_22); + const __m256d tmp_qloop_27 = _mm256_mul_pd(tmp_qloop_20,tmp_qloop_24); + const __m256d tmp_qloop_28 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_0,tmp_qloop_17),tmp_qloop_4); + const __m256d q_tmp_0_0 = _mm256_mul_pd(tmp_qloop_19,tmp_qloop_6); + const __m256d q_tmp_0_1 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_19); + const __m256d q_tmp_0_2 = _mm256_mul_pd(tmp_qloop_13,tmp_qloop_19); + const __m256d q_tmp_0_3 = _mm256_mul_pd(tmp_qloop_1,tmp_qloop_19); + const __m256d q_tmp_0_4 = _mm256_mul_pd(tmp_qloop_14,tmp_qloop_19); + const __m256d q_tmp_0_5 = _mm256_mul_pd(tmp_qloop_16,tmp_qloop_19); + const __m256d q_tmp_0_6 = _mm256_mul_pd(tmp_qloop_21,tmp_qloop_6); + const __m256d q_tmp_0_7 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_21); + const __m256d q_tmp_0_8 = _mm256_mul_pd(tmp_qloop_13,tmp_qloop_21); + const __m256d q_tmp_0_9 = _mm256_mul_pd(tmp_qloop_1,tmp_qloop_21); + const __m256d q_tmp_0_10 = _mm256_mul_pd(tmp_qloop_14,tmp_qloop_21); + const __m256d q_tmp_0_11 = _mm256_mul_pd(tmp_qloop_16,tmp_qloop_21); + const __m256d q_tmp_1_0 = _mm256_mul_pd(tmp_qloop_23,tmp_qloop_6); + const __m256d q_tmp_1_1 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_23); + const __m256d q_tmp_1_2 = _mm256_mul_pd(tmp_qloop_13,tmp_qloop_23); + const __m256d q_tmp_1_3 = _mm256_mul_pd(tmp_qloop_15,tmp_qloop_25); + const __m256d q_tmp_1_4 = _mm256_mul_pd(tmp_qloop_14,tmp_qloop_23); + const __m256d q_tmp_1_5 = _mm256_mul_pd(tmp_qloop_16,tmp_qloop_23); + const __m256d q_tmp_1_6 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_6); + const __m256d q_tmp_1_7 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_26); + const __m256d q_tmp_1_8 = _mm256_mul_pd(tmp_qloop_13,tmp_qloop_26); + const __m256d q_tmp_1_9 = _mm256_mul_pd(tmp_qloop_15,tmp_qloop_27); + const __m256d q_tmp_1_10 = _mm256_mul_pd(tmp_qloop_14,tmp_qloop_26); + const __m256d q_tmp_1_11 = _mm256_mul_pd(tmp_qloop_16,tmp_qloop_26); + const __m256d q_tmp_2_0 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_6); + const __m256d q_tmp_2_1 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_25); + const __m256d q_tmp_2_2 = _mm256_mul_pd(tmp_qloop_13,tmp_qloop_25); + const __m256d q_tmp_2_3 = _mm256_mul_pd(tmp_qloop_11,tmp_qloop_28); + const __m256d q_tmp_2_4 = _mm256_mul_pd(tmp_qloop_14,tmp_qloop_25); + const __m256d q_tmp_2_5 = _mm256_mul_pd(tmp_qloop_16,tmp_qloop_25); + const __m256d q_tmp_2_6 = _mm256_mul_pd(tmp_qloop_27,tmp_qloop_6); + const __m256d q_tmp_2_7 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_27); + const __m256d q_tmp_2_8 = _mm256_mul_pd(tmp_qloop_13,tmp_qloop_27); + const __m256d q_tmp_2_9 = _mm256_mul_pd(tmp_qloop_20,tmp_qloop_28); + const __m256d q_tmp_2_10 = _mm256_mul_pd(tmp_qloop_14,tmp_qloop_27); + const __m256d q_tmp_2_11 = _mm256_mul_pd(tmp_qloop_16,tmp_qloop_27); + q_acc_0_0 = _mm256_add_pd(q_acc_0_0,q_tmp_0_0); + q_acc_0_1 = _mm256_add_pd(q_acc_0_1,q_tmp_0_1); + q_acc_0_2 = _mm256_add_pd(q_acc_0_2,q_tmp_0_2); + q_acc_0_3 = _mm256_add_pd(q_acc_0_3,q_tmp_0_3); + q_acc_0_4 = _mm256_add_pd(q_acc_0_4,q_tmp_0_4); + q_acc_0_5 = _mm256_add_pd(q_acc_0_5,q_tmp_0_5); + q_acc_0_6 = _mm256_add_pd(q_acc_0_6,q_tmp_0_6); + q_acc_0_7 = _mm256_add_pd(q_acc_0_7,q_tmp_0_7); + q_acc_0_8 = _mm256_add_pd(q_acc_0_8,q_tmp_0_8); + q_acc_0_9 = _mm256_add_pd(q_acc_0_9,q_tmp_0_9); + q_acc_0_10 = _mm256_add_pd(q_acc_0_10,q_tmp_0_10); + q_acc_0_11 = _mm256_add_pd(q_acc_0_11,q_tmp_0_11); + q_acc_1_0 = _mm256_add_pd(q_acc_1_0,q_tmp_1_0); + q_acc_1_1 = _mm256_add_pd(q_acc_1_1,q_tmp_1_1); + q_acc_1_2 = _mm256_add_pd(q_acc_1_2,q_tmp_1_2); + q_acc_1_3 = _mm256_add_pd(q_acc_1_3,q_tmp_1_3); + q_acc_1_4 = _mm256_add_pd(q_acc_1_4,q_tmp_1_4); + q_acc_1_5 = _mm256_add_pd(q_acc_1_5,q_tmp_1_5); + q_acc_1_6 = _mm256_add_pd(q_acc_1_6,q_tmp_1_6); + q_acc_1_7 = _mm256_add_pd(q_acc_1_7,q_tmp_1_7); + q_acc_1_8 = _mm256_add_pd(q_acc_1_8,q_tmp_1_8); + q_acc_1_9 = _mm256_add_pd(q_acc_1_9,q_tmp_1_9); + q_acc_1_10 = _mm256_add_pd(q_acc_1_10,q_tmp_1_10); + q_acc_1_11 = _mm256_add_pd(q_acc_1_11,q_tmp_1_11); + q_acc_2_0 = _mm256_add_pd(q_acc_2_0,q_tmp_2_0); + q_acc_2_1 = _mm256_add_pd(q_acc_2_1,q_tmp_2_1); + q_acc_2_2 = _mm256_add_pd(q_acc_2_2,q_tmp_2_2); + q_acc_2_3 = _mm256_add_pd(q_acc_2_3,q_tmp_2_3); + q_acc_2_4 = _mm256_add_pd(q_acc_2_4,q_tmp_2_4); + q_acc_2_5 = _mm256_add_pd(q_acc_2_5,q_tmp_2_5); + q_acc_2_6 = _mm256_add_pd(q_acc_2_6,q_tmp_2_6); + q_acc_2_7 = _mm256_add_pd(q_acc_2_7,q_tmp_2_7); + q_acc_2_8 = _mm256_add_pd(q_acc_2_8,q_tmp_2_8); + q_acc_2_9 = _mm256_add_pd(q_acc_2_9,q_tmp_2_9); + q_acc_2_10 = _mm256_add_pd(q_acc_2_10,q_tmp_2_10); + q_acc_2_11 = _mm256_add_pd(q_acc_2_11,q_tmp_2_11); + } + const __m256d elMatVec_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_0_0,src_dof_0),_mm256_mul_pd(q_acc_0_1,src_dof_1)),_mm256_mul_pd(q_acc_0_10,src_dof_10)),_mm256_mul_pd(q_acc_0_11,src_dof_11)),_mm256_mul_pd(q_acc_0_2,src_dof_2)),_mm256_mul_pd(q_acc_0_3,src_dof_3)),_mm256_mul_pd(q_acc_0_4,src_dof_4)),_mm256_mul_pd(q_acc_0_5,src_dof_5)),_mm256_mul_pd(q_acc_0_6,src_dof_6)),_mm256_mul_pd(q_acc_0_7,src_dof_7)),_mm256_mul_pd(q_acc_0_8,src_dof_8)),_mm256_mul_pd(q_acc_0_9,src_dof_9)); + const __m256d elMatVec_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_1_0,src_dof_0),_mm256_mul_pd(q_acc_1_1,src_dof_1)),_mm256_mul_pd(q_acc_1_10,src_dof_10)),_mm256_mul_pd(q_acc_1_11,src_dof_11)),_mm256_mul_pd(q_acc_1_2,src_dof_2)),_mm256_mul_pd(q_acc_1_3,src_dof_3)),_mm256_mul_pd(q_acc_1_4,src_dof_4)),_mm256_mul_pd(q_acc_1_5,src_dof_5)),_mm256_mul_pd(q_acc_1_6,src_dof_6)),_mm256_mul_pd(q_acc_1_7,src_dof_7)),_mm256_mul_pd(q_acc_1_8,src_dof_8)),_mm256_mul_pd(q_acc_1_9,src_dof_9)); + const __m256d elMatVec_2 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_2_0,src_dof_0),_mm256_mul_pd(q_acc_2_1,src_dof_1)),_mm256_mul_pd(q_acc_2_10,src_dof_10)),_mm256_mul_pd(q_acc_2_11,src_dof_11)),_mm256_mul_pd(q_acc_2_2,src_dof_2)),_mm256_mul_pd(q_acc_2_3,src_dof_3)),_mm256_mul_pd(q_acc_2_4,src_dof_4)),_mm256_mul_pd(q_acc_2_5,src_dof_5)),_mm256_mul_pd(q_acc_2_6,src_dof_6)),_mm256_mul_pd(q_acc_2_7,src_dof_7)),_mm256_mul_pd(q_acc_2_8,src_dof_8)),_mm256_mul_pd(q_acc_2_9,src_dof_9)); + _mm256_storeu_pd(&_data_dst[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1],_mm256_add_pd(elMatVec_0,_mm256_loadu_pd(& _data_dst[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]))); + _mm256_storeu_pd(&_data_dst[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))],_mm256_add_pd(elMatVec_1,_mm256_loadu_pd(& _data_dst[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]))); + _mm256_storeu_pd(&_data_dst[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1],_mm256_add_pd(elMatVec_2,_mm256_loadu_pd(& _data_dst[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]))); + } + for (int64_t ctr_0 = (int64_t)((-ctr_1 + micro_edges_per_macro_edge - 1) / (4)) * (4); ctr_0 < -ctr_1 + micro_edges_per_macro_edge - 1; ctr_0 += 1) + { + + const int64_t phantom_ctr_0 = ctr_0; + real_t _data_float_loop_ctr_array_dim_0[4]; + _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0; + _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1; + _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2; + _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3; + real_t _data_float_loop_ctr_array_dim_1[4]; + _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1; + + const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; + const real_t src_dof_0 = _data_src_vertex_0[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + const real_t src_dof_1 = _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t src_dof_2 = _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]; + const real_t src_dof_3 = _data_src_edge_0[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t src_dof_4 = _data_src_edge_0[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]; + const real_t src_dof_5 = _data_src_edge_0[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t src_dof_6 = _data_src_vertex_1[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + const real_t src_dof_7 = _data_src_vertex_1[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t src_dof_8 = _data_src_vertex_1[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]; + const real_t src_dof_9 = _data_src_edge_1[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t src_dof_10 = _data_src_edge_1[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]; + const real_t src_dof_11 = _data_src_edge_1[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t rho_dof_0 = _data_rhoVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + const real_t rho_dof_1 = _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t rho_dof_2 = _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]; + const real_t rho_dof_3 = _data_rhoEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t rho_dof_4 = _data_rhoEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]; + const real_t rho_dof_5 = _data_rhoEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + real_t q_acc_0_0 = 0.0; + real_t q_acc_0_1 = 0.0; + real_t q_acc_0_2 = 0.0; + real_t q_acc_0_3 = 0.0; + real_t q_acc_0_4 = 0.0; + real_t q_acc_0_5 = 0.0; + real_t q_acc_0_6 = 0.0; + real_t q_acc_0_7 = 0.0; + real_t q_acc_0_8 = 0.0; + real_t q_acc_0_9 = 0.0; + real_t q_acc_0_10 = 0.0; + real_t q_acc_0_11 = 0.0; + real_t q_acc_1_0 = 0.0; + real_t q_acc_1_1 = 0.0; + real_t q_acc_1_2 = 0.0; + real_t q_acc_1_3 = 0.0; + real_t q_acc_1_4 = 0.0; + real_t q_acc_1_5 = 0.0; + real_t q_acc_1_6 = 0.0; + real_t q_acc_1_7 = 0.0; + real_t q_acc_1_8 = 0.0; + real_t q_acc_1_9 = 0.0; + real_t q_acc_1_10 = 0.0; + real_t q_acc_1_11 = 0.0; + real_t q_acc_2_0 = 0.0; + real_t q_acc_2_1 = 0.0; + real_t q_acc_2_2 = 0.0; + real_t q_acc_2_3 = 0.0; + real_t q_acc_2_4 = 0.0; + real_t q_acc_2_5 = 0.0; + real_t q_acc_2_6 = 0.0; + real_t q_acc_2_7 = 0.0; + real_t q_acc_2_8 = 0.0; + real_t q_acc_2_9 = 0.0; + real_t q_acc_2_10 = 0.0; + real_t q_acc_2_11 = 0.0; + for (int64_t q = 0; q < 4; q += 1) + { + const real_t tmp_qloop_0 = 4.0*_data_q_p_0[q]; + const real_t tmp_qloop_1 = tmp_qloop_0*_data_q_p_1[q]; + const real_t tmp_qloop_2 = (_data_q_p_0[q]*_data_q_p_0[q]); + const real_t tmp_qloop_3 = tmp_qloop_2*2.0; + const real_t tmp_qloop_4 = (_data_q_p_1[q]*_data_q_p_1[q]); + const real_t tmp_qloop_5 = tmp_qloop_4*2.0; + const real_t tmp_qloop_6 = tmp_qloop_1 + tmp_qloop_3 + tmp_qloop_5 - 3.0*_data_q_p_0[q] - 3.0*_data_q_p_1[q] + 1.0; + const real_t tmp_qloop_7 = 4.0*_data_q_p_1[q]; + const real_t tmp_qloop_8 = rho_dof_0*(tmp_qloop_0 + tmp_qloop_7 - 3.0); + const real_t tmp_qloop_9 = rho_dof_1*(tmp_qloop_0 - 1.0) + rho_dof_3*tmp_qloop_7 - rho_dof_4*tmp_qloop_7 + rho_dof_5*(-tmp_qloop_7 - 8.0*_data_q_p_0[q] + 4.0) + tmp_qloop_8; + const real_t tmp_qloop_10 = rho_dof_2*(tmp_qloop_7 - 1.0) + rho_dof_3*tmp_qloop_0 + rho_dof_4*(-tmp_qloop_0 - 8.0*_data_q_p_1[q] + 4.0) - rho_dof_5*tmp_qloop_0 + tmp_qloop_8; + const real_t tmp_qloop_11 = jac_affine_inv_0_0_BLUE*tmp_qloop_9 + jac_affine_inv_1_0_BLUE*tmp_qloop_10; + const real_t tmp_qloop_12 = tmp_qloop_3 - _data_q_p_0[q]; + const real_t tmp_qloop_13 = tmp_qloop_5 - _data_q_p_1[q]; + const real_t tmp_qloop_14 = -tmp_qloop_1 + tmp_qloop_4*-4.0 + tmp_qloop_7; + const real_t tmp_qloop_15 = tmp_qloop_2*4.0; + const real_t tmp_qloop_16 = tmp_qloop_0 - tmp_qloop_1 - tmp_qloop_15; + const real_t tmp_qloop_17 = abs_det_jac_affine_BLUE*1.0 / (rho_dof_0*tmp_qloop_6 + rho_dof_1*tmp_qloop_12 + rho_dof_2*tmp_qloop_13 + rho_dof_3*tmp_qloop_1 + rho_dof_4*tmp_qloop_14 + rho_dof_5*tmp_qloop_16)*_data_q_w[q]; + const real_t tmp_qloop_18 = tmp_qloop_17*(1.0 - _data_q_p_0[q] - _data_q_p_1[q]); + const real_t tmp_qloop_19 = tmp_qloop_11*tmp_qloop_18; + const real_t tmp_qloop_20 = jac_affine_inv_0_1_BLUE*tmp_qloop_9 + jac_affine_inv_1_1_BLUE*tmp_qloop_10; + const real_t tmp_qloop_21 = tmp_qloop_18*tmp_qloop_20; + const real_t tmp_qloop_22 = tmp_qloop_17*_data_q_p_0[q]; + const real_t tmp_qloop_23 = tmp_qloop_11*tmp_qloop_22; + const real_t tmp_qloop_24 = tmp_qloop_17*_data_q_p_1[q]; + const real_t tmp_qloop_25 = tmp_qloop_11*tmp_qloop_24; + const real_t tmp_qloop_26 = tmp_qloop_20*tmp_qloop_22; + const real_t tmp_qloop_27 = tmp_qloop_20*tmp_qloop_24; + const real_t tmp_qloop_28 = tmp_qloop_0*tmp_qloop_17*tmp_qloop_4; + const real_t q_tmp_0_0 = tmp_qloop_19*tmp_qloop_6; + const real_t q_tmp_0_1 = tmp_qloop_12*tmp_qloop_19; + const real_t q_tmp_0_2 = tmp_qloop_13*tmp_qloop_19; + const real_t q_tmp_0_3 = tmp_qloop_1*tmp_qloop_19; + const real_t q_tmp_0_4 = tmp_qloop_14*tmp_qloop_19; + const real_t q_tmp_0_5 = tmp_qloop_16*tmp_qloop_19; + const real_t q_tmp_0_6 = tmp_qloop_21*tmp_qloop_6; + const real_t q_tmp_0_7 = tmp_qloop_12*tmp_qloop_21; + const real_t q_tmp_0_8 = tmp_qloop_13*tmp_qloop_21; + const real_t q_tmp_0_9 = tmp_qloop_1*tmp_qloop_21; + const real_t q_tmp_0_10 = tmp_qloop_14*tmp_qloop_21; + const real_t q_tmp_0_11 = tmp_qloop_16*tmp_qloop_21; + const real_t q_tmp_1_0 = tmp_qloop_23*tmp_qloop_6; + const real_t q_tmp_1_1 = tmp_qloop_12*tmp_qloop_23; + const real_t q_tmp_1_2 = tmp_qloop_13*tmp_qloop_23; + const real_t q_tmp_1_3 = tmp_qloop_15*tmp_qloop_25; + const real_t q_tmp_1_4 = tmp_qloop_14*tmp_qloop_23; + const real_t q_tmp_1_5 = tmp_qloop_16*tmp_qloop_23; + const real_t q_tmp_1_6 = tmp_qloop_26*tmp_qloop_6; + const real_t q_tmp_1_7 = tmp_qloop_12*tmp_qloop_26; + const real_t q_tmp_1_8 = tmp_qloop_13*tmp_qloop_26; + const real_t q_tmp_1_9 = tmp_qloop_15*tmp_qloop_27; + const real_t q_tmp_1_10 = tmp_qloop_14*tmp_qloop_26; + const real_t q_tmp_1_11 = tmp_qloop_16*tmp_qloop_26; + const real_t q_tmp_2_0 = tmp_qloop_25*tmp_qloop_6; + const real_t q_tmp_2_1 = tmp_qloop_12*tmp_qloop_25; + const real_t q_tmp_2_2 = tmp_qloop_13*tmp_qloop_25; + const real_t q_tmp_2_3 = tmp_qloop_11*tmp_qloop_28; + const real_t q_tmp_2_4 = tmp_qloop_14*tmp_qloop_25; + const real_t q_tmp_2_5 = tmp_qloop_16*tmp_qloop_25; + const real_t q_tmp_2_6 = tmp_qloop_27*tmp_qloop_6; + const real_t q_tmp_2_7 = tmp_qloop_12*tmp_qloop_27; + const real_t q_tmp_2_8 = tmp_qloop_13*tmp_qloop_27; + const real_t q_tmp_2_9 = tmp_qloop_20*tmp_qloop_28; + const real_t q_tmp_2_10 = tmp_qloop_14*tmp_qloop_27; + const real_t q_tmp_2_11 = tmp_qloop_16*tmp_qloop_27; + q_acc_0_0 = q_acc_0_0 + q_tmp_0_0; + q_acc_0_1 = q_acc_0_1 + q_tmp_0_1; + q_acc_0_2 = q_acc_0_2 + q_tmp_0_2; + q_acc_0_3 = q_acc_0_3 + q_tmp_0_3; + q_acc_0_4 = q_acc_0_4 + q_tmp_0_4; + q_acc_0_5 = q_acc_0_5 + q_tmp_0_5; + q_acc_0_6 = q_acc_0_6 + q_tmp_0_6; + q_acc_0_7 = q_acc_0_7 + q_tmp_0_7; + q_acc_0_8 = q_acc_0_8 + q_tmp_0_8; + q_acc_0_9 = q_acc_0_9 + q_tmp_0_9; + q_acc_0_10 = q_acc_0_10 + q_tmp_0_10; + q_acc_0_11 = q_acc_0_11 + q_tmp_0_11; + q_acc_1_0 = q_acc_1_0 + q_tmp_1_0; + q_acc_1_1 = q_acc_1_1 + q_tmp_1_1; + q_acc_1_2 = q_acc_1_2 + q_tmp_1_2; + q_acc_1_3 = q_acc_1_3 + q_tmp_1_3; + q_acc_1_4 = q_acc_1_4 + q_tmp_1_4; + q_acc_1_5 = q_acc_1_5 + q_tmp_1_5; + q_acc_1_6 = q_acc_1_6 + q_tmp_1_6; + q_acc_1_7 = q_acc_1_7 + q_tmp_1_7; + q_acc_1_8 = q_acc_1_8 + q_tmp_1_8; + q_acc_1_9 = q_acc_1_9 + q_tmp_1_9; + q_acc_1_10 = q_acc_1_10 + q_tmp_1_10; + q_acc_1_11 = q_acc_1_11 + q_tmp_1_11; + q_acc_2_0 = q_acc_2_0 + q_tmp_2_0; + q_acc_2_1 = q_acc_2_1 + q_tmp_2_1; + q_acc_2_2 = q_acc_2_2 + q_tmp_2_2; + q_acc_2_3 = q_acc_2_3 + q_tmp_2_3; + q_acc_2_4 = q_acc_2_4 + q_tmp_2_4; + q_acc_2_5 = q_acc_2_5 + q_tmp_2_5; + q_acc_2_6 = q_acc_2_6 + q_tmp_2_6; + q_acc_2_7 = q_acc_2_7 + q_tmp_2_7; + q_acc_2_8 = q_acc_2_8 + q_tmp_2_8; + q_acc_2_9 = q_acc_2_9 + q_tmp_2_9; + q_acc_2_10 = q_acc_2_10 + q_tmp_2_10; + q_acc_2_11 = q_acc_2_11 + q_tmp_2_11; + } + const real_t elMatVec_0 = q_acc_0_0*src_dof_0 + q_acc_0_1*src_dof_1 + q_acc_0_10*src_dof_10 + q_acc_0_11*src_dof_11 + q_acc_0_2*src_dof_2 + q_acc_0_3*src_dof_3 + q_acc_0_4*src_dof_4 + q_acc_0_5*src_dof_5 + q_acc_0_6*src_dof_6 + q_acc_0_7*src_dof_7 + q_acc_0_8*src_dof_8 + q_acc_0_9*src_dof_9; + const real_t elMatVec_1 = q_acc_1_0*src_dof_0 + q_acc_1_1*src_dof_1 + q_acc_1_10*src_dof_10 + q_acc_1_11*src_dof_11 + q_acc_1_2*src_dof_2 + q_acc_1_3*src_dof_3 + q_acc_1_4*src_dof_4 + q_acc_1_5*src_dof_5 + q_acc_1_6*src_dof_6 + q_acc_1_7*src_dof_7 + q_acc_1_8*src_dof_8 + q_acc_1_9*src_dof_9; + const real_t elMatVec_2 = q_acc_2_0*src_dof_0 + q_acc_2_1*src_dof_1 + q_acc_2_10*src_dof_10 + q_acc_2_11*src_dof_11 + q_acc_2_2*src_dof_2 + q_acc_2_3*src_dof_3 + q_acc_2_4*src_dof_4 + q_acc_2_5*src_dof_5 + q_acc_2_6*src_dof_6 + q_acc_2_7*src_dof_7 + q_acc_2_8*src_dof_8 + q_acc_2_9*src_dof_9; + _data_dst[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1] = elMatVec_0 + _data_dst[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + _data_dst[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatVec_1 + _data_dst[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + _data_dst[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1] = elMatVec_2 + _data_dst[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]; + } + } + } + } +} +} // namespace operatorgeneration + +} // namespace hyteg diff --git a/operators/grad_rho_by_rho_dot_u/avx/P2VectorToP1ElementwiseGradRhoByRhoDotU_apply_P2VectorToP1ElementwiseGradRhoByRhoDotU_macro_3D.cpp b/operators/grad_rho_by_rho_dot_u/avx/P2VectorToP1ElementwiseGradRhoByRhoDotU_apply_P2VectorToP1ElementwiseGradRhoByRhoDotU_macro_3D.cpp new file mode 100644 index 00000000..d4b9475d --- /dev/null +++ b/operators/grad_rho_by_rho_dot_u/avx/P2VectorToP1ElementwiseGradRhoByRhoDotU_apply_P2VectorToP1ElementwiseGradRhoByRhoDotU_macro_3D.cpp @@ -0,0 +1,6458 @@ +/* +* Copyright (c) 2017-2024 Nils Kohl, Daniel Bauer, Fabian Böhm. +* +* This file is part of HyTeG +* (see https://i10git.cs.fau.de/hyteg/hyteg). +* +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. +* +* You should have received a copy of the GNU General Public License +* along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +/* +* The entire file was generated with the HyTeG Operator Generator. +* +* Avoid modifying this file. If buggy, consider fixing the generator itself. +*/ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +#include "../P2VectorToP1ElementwiseGradRhoByRhoDotU.hpp" + +#define FUNC_PREFIX + +namespace hyteg { + +namespace operatorgeneration { + +void P2VectorToP1ElementwiseGradRhoByRhoDotU::apply_P2VectorToP1ElementwiseGradRhoByRhoDotU_macro_3D( real_t * RESTRICT _data_dst, real_t * RESTRICT _data_rhoEdge, real_t * RESTRICT _data_rhoVertex, real_t * RESTRICT _data_src_edge_0, real_t * RESTRICT _data_src_edge_1, real_t * RESTRICT _data_src_edge_2, real_t * RESTRICT _data_src_vertex_0, real_t * RESTRICT _data_src_vertex_1, real_t * RESTRICT _data_src_vertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +{ + { + const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983}; + + const real_t _data_q_p_0 [] = {0.25, 0.16666666666666666, 0.16666666666666666, 0.5, 0.16666666666666666}; + + const real_t _data_q_p_1 [] = {0.25, 0.16666666666666666, 0.5, 0.16666666666666666, 0.16666666666666666}; + + const real_t _data_q_p_2 [] = {0.25, 0.5, 0.16666666666666666, 0.16666666666666666, 0.16666666666666666}; + + const real_t tmp_coords_jac_0_WHITE_UP = 1.0 / (micro_edges_per_macro_edge_float)*1.0; + const real_t p_affine_const_0_0_WHITE_UP = macro_vertex_coord_id_0comp0; + const real_t p_affine_const_0_1_WHITE_UP = macro_vertex_coord_id_0comp1; + const real_t p_affine_const_0_2_WHITE_UP = macro_vertex_coord_id_0comp2; + const real_t p_affine_const_1_0_WHITE_UP = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0); + const real_t p_affine_const_1_1_WHITE_UP = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1); + const real_t p_affine_const_1_2_WHITE_UP = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2); + const real_t p_affine_const_2_0_WHITE_UP = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0); + const real_t p_affine_const_2_1_WHITE_UP = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1); + const real_t p_affine_const_2_2_WHITE_UP = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2); + const real_t p_affine_const_3_0_WHITE_UP = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0); + const real_t p_affine_const_3_1_WHITE_UP = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1); + const real_t p_affine_const_3_2_WHITE_UP = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2); + const real_t jac_affine_0_0_WHITE_UP = -p_affine_const_0_0_WHITE_UP + p_affine_const_1_0_WHITE_UP; + const real_t jac_affine_0_1_WHITE_UP = -p_affine_const_0_0_WHITE_UP + p_affine_const_2_0_WHITE_UP; + const real_t jac_affine_0_2_WHITE_UP = -p_affine_const_0_0_WHITE_UP + p_affine_const_3_0_WHITE_UP; + const real_t jac_affine_1_0_WHITE_UP = -p_affine_const_0_1_WHITE_UP + p_affine_const_1_1_WHITE_UP; + const real_t jac_affine_1_1_WHITE_UP = -p_affine_const_0_1_WHITE_UP + p_affine_const_2_1_WHITE_UP; + const real_t tmp_coords_jac_5_WHITE_UP = jac_affine_0_2_WHITE_UP*jac_affine_1_1_WHITE_UP; + const real_t jac_affine_1_2_WHITE_UP = -p_affine_const_0_1_WHITE_UP + p_affine_const_3_1_WHITE_UP; + const real_t tmp_coords_jac_3_WHITE_UP = jac_affine_0_1_WHITE_UP*jac_affine_1_2_WHITE_UP; + const real_t jac_affine_2_0_WHITE_UP = -p_affine_const_0_2_WHITE_UP + p_affine_const_1_2_WHITE_UP; + const real_t jac_affine_2_1_WHITE_UP = -p_affine_const_0_2_WHITE_UP + p_affine_const_2_2_WHITE_UP; + const real_t tmp_coords_jac_2_WHITE_UP = jac_affine_1_2_WHITE_UP*jac_affine_2_1_WHITE_UP; + const real_t jac_affine_2_2_WHITE_UP = -p_affine_const_0_2_WHITE_UP + p_affine_const_3_2_WHITE_UP; + const real_t tmp_coords_jac_1_WHITE_UP = jac_affine_1_1_WHITE_UP*jac_affine_2_2_WHITE_UP; + const real_t tmp_coords_jac_4_WHITE_UP = jac_affine_0_1_WHITE_UP*jac_affine_2_2_WHITE_UP; + const real_t tmp_coords_jac_6_WHITE_UP = jac_affine_0_0_WHITE_UP*tmp_coords_jac_1_WHITE_UP - jac_affine_0_0_WHITE_UP*tmp_coords_jac_2_WHITE_UP + jac_affine_0_2_WHITE_UP*jac_affine_1_0_WHITE_UP*jac_affine_2_1_WHITE_UP - jac_affine_1_0_WHITE_UP*tmp_coords_jac_4_WHITE_UP + jac_affine_2_0_WHITE_UP*tmp_coords_jac_3_WHITE_UP - jac_affine_2_0_WHITE_UP*tmp_coords_jac_5_WHITE_UP; + const real_t tmp_coords_jac_7_WHITE_UP = 1.0 / (tmp_coords_jac_6_WHITE_UP); + const real_t jac_affine_inv_0_0_WHITE_UP = tmp_coords_jac_7_WHITE_UP*(tmp_coords_jac_1_WHITE_UP - tmp_coords_jac_2_WHITE_UP); + const real_t jac_affine_inv_0_1_WHITE_UP = tmp_coords_jac_7_WHITE_UP*(jac_affine_0_2_WHITE_UP*jac_affine_2_1_WHITE_UP - tmp_coords_jac_4_WHITE_UP); + const real_t jac_affine_inv_0_2_WHITE_UP = tmp_coords_jac_7_WHITE_UP*(tmp_coords_jac_3_WHITE_UP - tmp_coords_jac_5_WHITE_UP); + const real_t jac_affine_inv_1_0_WHITE_UP = tmp_coords_jac_7_WHITE_UP*(-jac_affine_1_0_WHITE_UP*jac_affine_2_2_WHITE_UP + jac_affine_1_2_WHITE_UP*jac_affine_2_0_WHITE_UP); + const real_t jac_affine_inv_1_1_WHITE_UP = tmp_coords_jac_7_WHITE_UP*(jac_affine_0_0_WHITE_UP*jac_affine_2_2_WHITE_UP - jac_affine_0_2_WHITE_UP*jac_affine_2_0_WHITE_UP); + const real_t jac_affine_inv_1_2_WHITE_UP = tmp_coords_jac_7_WHITE_UP*(-jac_affine_0_0_WHITE_UP*jac_affine_1_2_WHITE_UP + jac_affine_0_2_WHITE_UP*jac_affine_1_0_WHITE_UP); + const real_t jac_affine_inv_2_0_WHITE_UP = tmp_coords_jac_7_WHITE_UP*(jac_affine_1_0_WHITE_UP*jac_affine_2_1_WHITE_UP - jac_affine_1_1_WHITE_UP*jac_affine_2_0_WHITE_UP); + const real_t jac_affine_inv_2_1_WHITE_UP = tmp_coords_jac_7_WHITE_UP*(-jac_affine_0_0_WHITE_UP*jac_affine_2_1_WHITE_UP + jac_affine_0_1_WHITE_UP*jac_affine_2_0_WHITE_UP); + const real_t jac_affine_inv_2_2_WHITE_UP = tmp_coords_jac_7_WHITE_UP*(jac_affine_0_0_WHITE_UP*jac_affine_1_1_WHITE_UP - jac_affine_0_1_WHITE_UP*jac_affine_1_0_WHITE_UP); + const real_t abs_det_jac_affine_WHITE_UP = abs(tmp_coords_jac_6_WHITE_UP); + { + /* CellType.WHITE_UP */ + for (int64_t ctr_2 = 0; ctr_2 < micro_edges_per_macro_edge; ctr_2 += 1) + for (int64_t ctr_1 = 0; ctr_1 < -ctr_2 + micro_edges_per_macro_edge; ctr_1 += 1) + { + for (int64_t ctr_0 = 0; ctr_0 < (int64_t)((-ctr_1 - ctr_2 + micro_edges_per_macro_edge) / (4)) * (4); ctr_0 += 4) + { + + const int64_t phantom_ctr_0 = ctr_0; + real_t _data_float_loop_ctr_array_dim_0[4]; + _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0; + _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1; + _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2; + _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3; + real_t _data_float_loop_ctr_array_dim_1[4]; + _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1; + real_t _data_float_loop_ctr_array_dim_2[4]; + _data_float_loop_ctr_array_dim_2[0] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[1] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[2] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[3] = (real_t) ctr_2; + + const __m256d p_affine_0_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)); + const __m256d p_affine_0_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)); + const __m256d p_affine_0_2 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)); + const __m256d p_affine_1_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)); + const __m256d p_affine_1_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)); + const __m256d p_affine_1_2 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)); + const __m256d p_affine_2_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)); + const __m256d p_affine_2_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)); + const __m256d p_affine_2_2 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)); + const __m256d p_affine_3_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)); + const __m256d p_affine_3_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)); + const __m256d p_affine_3_2 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)); + const __m256d src_dof_0 = _mm256_loadu_pd(& _data_src_vertex_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]); + const __m256d src_dof_1 = _mm256_loadu_pd(& _data_src_vertex_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]); + const __m256d src_dof_2 = _mm256_loadu_pd(& _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]); + const __m256d src_dof_3 = _mm256_loadu_pd(& _data_src_vertex_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]); + const __m256d src_dof_4 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d src_dof_5 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 5*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d src_dof_6 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d src_dof_7 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d src_dof_8 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d src_dof_9 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d src_dof_10 = _mm256_loadu_pd(& _data_src_vertex_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]); + const __m256d src_dof_11 = _mm256_loadu_pd(& _data_src_vertex_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]); + const __m256d src_dof_12 = _mm256_loadu_pd(& _data_src_vertex_1[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]); + const __m256d src_dof_13 = _mm256_loadu_pd(& _data_src_vertex_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]); + const __m256d src_dof_14 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d src_dof_15 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 5*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d src_dof_16 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d src_dof_17 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d src_dof_18 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d src_dof_19 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d src_dof_20 = _mm256_loadu_pd(& _data_src_vertex_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]); + const __m256d src_dof_21 = _mm256_loadu_pd(& _data_src_vertex_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]); + const __m256d src_dof_22 = _mm256_loadu_pd(& _data_src_vertex_2[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]); + const __m256d src_dof_23 = _mm256_loadu_pd(& _data_src_vertex_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]); + const __m256d src_dof_24 = _mm256_loadu_pd(& _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d src_dof_25 = _mm256_loadu_pd(& _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 5*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d src_dof_26 = _mm256_loadu_pd(& _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d src_dof_27 = _mm256_loadu_pd(& _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d src_dof_28 = _mm256_loadu_pd(& _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d src_dof_29 = _mm256_loadu_pd(& _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d rho_dof_0 = _mm256_loadu_pd(& _data_rhoVertex[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]); + const __m256d rho_dof_1 = _mm256_loadu_pd(& _data_rhoVertex[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]); + const __m256d rho_dof_2 = _mm256_loadu_pd(& _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]); + const __m256d rho_dof_3 = _mm256_loadu_pd(& _data_rhoVertex[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]); + const __m256d rho_dof_4 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d rho_dof_5 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 5*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d rho_dof_6 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d rho_dof_7 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d rho_dof_8 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d rho_dof_9 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + __m256d q_acc_0_0 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_1 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_2 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_3 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_4 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_5 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_6 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_7 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_8 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_9 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_10 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_11 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_12 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_13 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_14 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_15 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_16 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_17 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_18 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_19 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_20 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_21 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_22 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_23 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_24 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_25 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_26 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_27 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_28 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_29 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_0 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_1 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_2 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_3 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_4 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_5 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_6 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_7 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_8 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_9 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_10 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_11 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_12 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_13 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_14 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_15 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_16 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_17 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_18 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_19 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_20 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_21 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_22 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_23 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_24 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_25 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_26 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_27 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_28 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_29 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_0 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_1 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_2 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_3 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_4 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_5 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_6 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_7 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_8 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_9 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_10 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_11 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_12 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_13 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_14 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_15 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_16 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_17 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_18 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_19 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_20 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_21 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_22 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_23 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_24 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_25 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_26 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_27 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_28 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_29 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_0 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_1 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_2 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_3 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_4 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_5 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_6 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_7 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_8 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_9 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_10 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_11 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_12 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_13 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_14 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_15 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_16 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_17 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_18 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_19 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_20 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_21 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_22 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_23 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_24 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_25 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_26 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_27 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_28 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_29 = _mm256_set_pd(0.0,0.0,0.0,0.0); + for (int64_t q = 0; q < 5; q += 1) + { + const __m256d tmp_qloop_0 = _mm256_mul_pd(_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])); + const __m256d tmp_qloop_1 = _mm256_mul_pd(tmp_qloop_0,_mm256_set_pd(2.0,2.0,2.0,2.0)); + const __m256d tmp_qloop_2 = _mm256_mul_pd(_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])); + const __m256d tmp_qloop_3 = _mm256_mul_pd(tmp_qloop_2,_mm256_set_pd(2.0,2.0,2.0,2.0)); + const __m256d tmp_qloop_4 = _mm256_mul_pd(_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q])); + const __m256d tmp_qloop_5 = _mm256_mul_pd(tmp_qloop_4,_mm256_set_pd(2.0,2.0,2.0,2.0)); + const __m256d tmp_qloop_6 = _mm256_mul_pd(_mm256_set_pd(4.0,4.0,4.0,4.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])); + const __m256d tmp_qloop_7 = _mm256_mul_pd(tmp_qloop_6,_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q])); + const __m256d tmp_qloop_8 = _mm256_mul_pd(_mm256_set_pd(4.0,4.0,4.0,4.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])); + const __m256d tmp_qloop_9 = _mm256_mul_pd(tmp_qloop_8,_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])); + const __m256d tmp_qloop_10 = _mm256_mul_pd(tmp_qloop_8,_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q])); + const __m256d tmp_qloop_11 = _mm256_add_pd(tmp_qloop_10,tmp_qloop_9); + const __m256d tmp_qloop_12 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),_mm256_mul_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),tmp_qloop_1),tmp_qloop_11),tmp_qloop_3),tmp_qloop_5),tmp_qloop_7); + const __m256d tmp_qloop_13 = _mm256_mul_pd(_mm256_set_pd(4.0,4.0,4.0,4.0),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q])); + const __m256d tmp_qloop_14 = _mm256_add_pd(tmp_qloop_13,tmp_qloop_6); + const __m256d tmp_qloop_15 = _mm256_mul_pd(_mm256_mul_pd(rho_dof_8,tmp_qloop_6),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)); + const __m256d tmp_qloop_16 = _mm256_mul_pd(rho_dof_0,_mm256_add_pd(_mm256_add_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),tmp_qloop_14),tmp_qloop_8)); + const __m256d tmp_qloop_17 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(rho_dof_7,tmp_qloop_13),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),tmp_qloop_16); + const __m256d tmp_qloop_18 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(rho_dof_1,_mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),tmp_qloop_8)),_mm256_mul_pd(rho_dof_9,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_14,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_set_pd(-8.0,-8.0,-8.0,-8.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]))),_mm256_set_pd(4.0,4.0,4.0,4.0)))),_mm256_mul_pd(rho_dof_5,tmp_qloop_13)),_mm256_mul_pd(rho_dof_6,tmp_qloop_6)),tmp_qloop_15),tmp_qloop_17); + const __m256d tmp_qloop_19 = _mm256_add_pd(_mm256_set_pd(-4.0,-4.0,-4.0,-4.0),tmp_qloop_8); + const __m256d tmp_qloop_20 = _mm256_mul_pd(_mm256_mul_pd(rho_dof_9,tmp_qloop_8),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)); + const __m256d tmp_qloop_21 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(rho_dof_2,_mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),tmp_qloop_6)),_mm256_mul_pd(rho_dof_8,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_13,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_19,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(_mm256_set_pd(-8.0,-8.0,-8.0,-8.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))))),_mm256_mul_pd(rho_dof_4,tmp_qloop_13)),_mm256_mul_pd(rho_dof_6,tmp_qloop_8)),tmp_qloop_17),tmp_qloop_20); + const __m256d tmp_qloop_22 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(rho_dof_3,_mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),tmp_qloop_13)),_mm256_mul_pd(rho_dof_7,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_19,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_6,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(_mm256_set_pd(-8.0,-8.0,-8.0,-8.0),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]))))),_mm256_mul_pd(rho_dof_4,tmp_qloop_6)),_mm256_mul_pd(rho_dof_5,tmp_qloop_8)),tmp_qloop_15),tmp_qloop_16),tmp_qloop_20); + const __m256d tmp_qloop_23 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_18,_mm256_set_pd(jac_affine_inv_0_0_WHITE_UP,jac_affine_inv_0_0_WHITE_UP,jac_affine_inv_0_0_WHITE_UP,jac_affine_inv_0_0_WHITE_UP)),_mm256_mul_pd(tmp_qloop_21,_mm256_set_pd(jac_affine_inv_1_0_WHITE_UP,jac_affine_inv_1_0_WHITE_UP,jac_affine_inv_1_0_WHITE_UP,jac_affine_inv_1_0_WHITE_UP))),_mm256_mul_pd(tmp_qloop_22,_mm256_set_pd(jac_affine_inv_2_0_WHITE_UP,jac_affine_inv_2_0_WHITE_UP,jac_affine_inv_2_0_WHITE_UP,jac_affine_inv_2_0_WHITE_UP))); + const __m256d tmp_qloop_24 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),tmp_qloop_1); + const __m256d tmp_qloop_25 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])),tmp_qloop_3); + const __m256d tmp_qloop_26 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q])),tmp_qloop_5); + const __m256d tmp_qloop_27 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_10,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_7,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(tmp_qloop_4,_mm256_set_pd(-4.0,-4.0,-4.0,-4.0))),tmp_qloop_13); + const __m256d tmp_qloop_28 = _mm256_mul_pd(tmp_qloop_2,_mm256_set_pd(4.0,4.0,4.0,4.0)); + const __m256d tmp_qloop_29 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_28,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_7,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(tmp_qloop_9,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_6); + const __m256d tmp_qloop_30 = _mm256_mul_pd(tmp_qloop_0,_mm256_set_pd(4.0,4.0,4.0,4.0)); + const __m256d tmp_qloop_31 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_11,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_30,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_8); + const __m256d tmp_qloop_32 = _mm256_mul_pd(_mm256_mul_pd(_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(rho_dof_0,tmp_qloop_12),_mm256_mul_pd(rho_dof_1,tmp_qloop_24)),_mm256_mul_pd(rho_dof_2,tmp_qloop_25)),_mm256_mul_pd(rho_dof_3,tmp_qloop_26)),_mm256_mul_pd(rho_dof_4,tmp_qloop_7)),_mm256_mul_pd(rho_dof_5,tmp_qloop_10)),_mm256_mul_pd(rho_dof_6,tmp_qloop_9)),_mm256_mul_pd(rho_dof_7,tmp_qloop_27)),_mm256_mul_pd(rho_dof_8,tmp_qloop_29)),_mm256_mul_pd(rho_dof_9,tmp_qloop_31))),_mm256_set_pd(_data_q_w[q],_data_q_w[q],_data_q_w[q],_data_q_w[q])),_mm256_set_pd(abs_det_jac_affine_WHITE_UP,abs_det_jac_affine_WHITE_UP,abs_det_jac_affine_WHITE_UP,abs_det_jac_affine_WHITE_UP)); + const __m256d tmp_qloop_33 = _mm256_mul_pd(tmp_qloop_32,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]))),_mm256_set_pd(1.0,1.0,1.0,1.0))); + const __m256d tmp_qloop_34 = _mm256_mul_pd(tmp_qloop_23,tmp_qloop_33); + const __m256d tmp_qloop_35 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_18,_mm256_set_pd(jac_affine_inv_0_1_WHITE_UP,jac_affine_inv_0_1_WHITE_UP,jac_affine_inv_0_1_WHITE_UP,jac_affine_inv_0_1_WHITE_UP)),_mm256_mul_pd(tmp_qloop_21,_mm256_set_pd(jac_affine_inv_1_1_WHITE_UP,jac_affine_inv_1_1_WHITE_UP,jac_affine_inv_1_1_WHITE_UP,jac_affine_inv_1_1_WHITE_UP))),_mm256_mul_pd(tmp_qloop_22,_mm256_set_pd(jac_affine_inv_2_1_WHITE_UP,jac_affine_inv_2_1_WHITE_UP,jac_affine_inv_2_1_WHITE_UP,jac_affine_inv_2_1_WHITE_UP))); + const __m256d tmp_qloop_36 = _mm256_mul_pd(tmp_qloop_33,tmp_qloop_35); + const __m256d tmp_qloop_37 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_18,_mm256_set_pd(jac_affine_inv_0_2_WHITE_UP,jac_affine_inv_0_2_WHITE_UP,jac_affine_inv_0_2_WHITE_UP,jac_affine_inv_0_2_WHITE_UP)),_mm256_mul_pd(tmp_qloop_21,_mm256_set_pd(jac_affine_inv_1_2_WHITE_UP,jac_affine_inv_1_2_WHITE_UP,jac_affine_inv_1_2_WHITE_UP,jac_affine_inv_1_2_WHITE_UP))),_mm256_mul_pd(tmp_qloop_22,_mm256_set_pd(jac_affine_inv_2_2_WHITE_UP,jac_affine_inv_2_2_WHITE_UP,jac_affine_inv_2_2_WHITE_UP,jac_affine_inv_2_2_WHITE_UP))); + const __m256d tmp_qloop_38 = _mm256_mul_pd(tmp_qloop_33,tmp_qloop_37); + const __m256d tmp_qloop_39 = _mm256_mul_pd(tmp_qloop_32,_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])); + const __m256d tmp_qloop_40 = _mm256_mul_pd(tmp_qloop_23,tmp_qloop_39); + const __m256d tmp_qloop_41 = _mm256_mul_pd(tmp_qloop_32,_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q])); + const __m256d tmp_qloop_42 = _mm256_mul_pd(tmp_qloop_23,tmp_qloop_41); + const __m256d tmp_qloop_43 = _mm256_mul_pd(tmp_qloop_42,tmp_qloop_9); + const __m256d tmp_qloop_44 = _mm256_mul_pd(tmp_qloop_32,_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])); + const __m256d tmp_qloop_45 = _mm256_mul_pd(tmp_qloop_23,tmp_qloop_44); + const __m256d tmp_qloop_46 = _mm256_mul_pd(tmp_qloop_35,tmp_qloop_39); + const __m256d tmp_qloop_47 = _mm256_mul_pd(tmp_qloop_35,tmp_qloop_41); + const __m256d tmp_qloop_48 = _mm256_mul_pd(tmp_qloop_47,tmp_qloop_9); + const __m256d tmp_qloop_49 = _mm256_mul_pd(tmp_qloop_35,tmp_qloop_44); + const __m256d tmp_qloop_50 = _mm256_mul_pd(tmp_qloop_37,tmp_qloop_39); + const __m256d tmp_qloop_51 = _mm256_mul_pd(tmp_qloop_37,tmp_qloop_41); + const __m256d tmp_qloop_52 = _mm256_mul_pd(tmp_qloop_51,tmp_qloop_9); + const __m256d tmp_qloop_53 = _mm256_mul_pd(tmp_qloop_37,tmp_qloop_44); + const __m256d tmp_qloop_54 = _mm256_mul_pd(tmp_qloop_23,tmp_qloop_32); + const __m256d tmp_qloop_55 = _mm256_mul_pd(tmp_qloop_2,tmp_qloop_8); + const __m256d tmp_qloop_56 = _mm256_mul_pd(tmp_qloop_32,tmp_qloop_55); + const __m256d tmp_qloop_57 = _mm256_mul_pd(tmp_qloop_4,tmp_qloop_54); + const __m256d tmp_qloop_58 = _mm256_mul_pd(tmp_qloop_32,tmp_qloop_4); + const __m256d tmp_qloop_59 = _mm256_mul_pd(tmp_qloop_35,tmp_qloop_58); + const __m256d tmp_qloop_60 = _mm256_mul_pd(tmp_qloop_37,tmp_qloop_58); + const __m256d q_tmp_0_0 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_34); + const __m256d q_tmp_0_1 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_34); + const __m256d q_tmp_0_2 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_34); + const __m256d q_tmp_0_3 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_34); + const __m256d q_tmp_0_4 = _mm256_mul_pd(tmp_qloop_34,tmp_qloop_7); + const __m256d q_tmp_0_5 = _mm256_mul_pd(tmp_qloop_10,tmp_qloop_34); + const __m256d q_tmp_0_6 = _mm256_mul_pd(tmp_qloop_34,tmp_qloop_9); + const __m256d q_tmp_0_7 = _mm256_mul_pd(tmp_qloop_27,tmp_qloop_34); + const __m256d q_tmp_0_8 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_34); + const __m256d q_tmp_0_9 = _mm256_mul_pd(tmp_qloop_31,tmp_qloop_34); + const __m256d q_tmp_0_10 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_36); + const __m256d q_tmp_0_11 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_36); + const __m256d q_tmp_0_12 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_36); + const __m256d q_tmp_0_13 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_36); + const __m256d q_tmp_0_14 = _mm256_mul_pd(tmp_qloop_36,tmp_qloop_7); + const __m256d q_tmp_0_15 = _mm256_mul_pd(tmp_qloop_10,tmp_qloop_36); + const __m256d q_tmp_0_16 = _mm256_mul_pd(tmp_qloop_36,tmp_qloop_9); + const __m256d q_tmp_0_17 = _mm256_mul_pd(tmp_qloop_27,tmp_qloop_36); + const __m256d q_tmp_0_18 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_36); + const __m256d q_tmp_0_19 = _mm256_mul_pd(tmp_qloop_31,tmp_qloop_36); + const __m256d q_tmp_0_20 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_38); + const __m256d q_tmp_0_21 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_38); + const __m256d q_tmp_0_22 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_38); + const __m256d q_tmp_0_23 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_38); + const __m256d q_tmp_0_24 = _mm256_mul_pd(tmp_qloop_38,tmp_qloop_7); + const __m256d q_tmp_0_25 = _mm256_mul_pd(tmp_qloop_10,tmp_qloop_38); + const __m256d q_tmp_0_26 = _mm256_mul_pd(tmp_qloop_38,tmp_qloop_9); + const __m256d q_tmp_0_27 = _mm256_mul_pd(tmp_qloop_27,tmp_qloop_38); + const __m256d q_tmp_0_28 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_38); + const __m256d q_tmp_0_29 = _mm256_mul_pd(tmp_qloop_31,tmp_qloop_38); + const __m256d q_tmp_1_0 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_40); + const __m256d q_tmp_1_1 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_40); + const __m256d q_tmp_1_2 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_40); + const __m256d q_tmp_1_3 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_40); + const __m256d q_tmp_1_4 = tmp_qloop_43; + const __m256d q_tmp_1_5 = _mm256_mul_pd(tmp_qloop_30,tmp_qloop_42); + const __m256d q_tmp_1_6 = _mm256_mul_pd(tmp_qloop_30,tmp_qloop_45); + const __m256d q_tmp_1_7 = _mm256_mul_pd(tmp_qloop_27,tmp_qloop_40); + const __m256d q_tmp_1_8 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_40); + const __m256d q_tmp_1_9 = _mm256_mul_pd(tmp_qloop_31,tmp_qloop_40); + const __m256d q_tmp_1_10 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_46); + const __m256d q_tmp_1_11 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_46); + const __m256d q_tmp_1_12 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_46); + const __m256d q_tmp_1_13 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_46); + const __m256d q_tmp_1_14 = tmp_qloop_48; + const __m256d q_tmp_1_15 = _mm256_mul_pd(tmp_qloop_30,tmp_qloop_47); + const __m256d q_tmp_1_16 = _mm256_mul_pd(tmp_qloop_30,tmp_qloop_49); + const __m256d q_tmp_1_17 = _mm256_mul_pd(tmp_qloop_27,tmp_qloop_46); + const __m256d q_tmp_1_18 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_46); + const __m256d q_tmp_1_19 = _mm256_mul_pd(tmp_qloop_31,tmp_qloop_46); + const __m256d q_tmp_1_20 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_50); + const __m256d q_tmp_1_21 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_50); + const __m256d q_tmp_1_22 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_50); + const __m256d q_tmp_1_23 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_50); + const __m256d q_tmp_1_24 = tmp_qloop_52; + const __m256d q_tmp_1_25 = _mm256_mul_pd(tmp_qloop_30,tmp_qloop_51); + const __m256d q_tmp_1_26 = _mm256_mul_pd(tmp_qloop_30,tmp_qloop_53); + const __m256d q_tmp_1_27 = _mm256_mul_pd(tmp_qloop_27,tmp_qloop_50); + const __m256d q_tmp_1_28 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_50); + const __m256d q_tmp_1_29 = _mm256_mul_pd(tmp_qloop_31,tmp_qloop_50); + const __m256d q_tmp_2_0 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_45); + const __m256d q_tmp_2_1 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_45); + const __m256d q_tmp_2_2 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_45); + const __m256d q_tmp_2_3 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_45); + const __m256d q_tmp_2_4 = _mm256_mul_pd(tmp_qloop_28,tmp_qloop_42); + const __m256d q_tmp_2_5 = tmp_qloop_43; + const __m256d q_tmp_2_6 = _mm256_mul_pd(tmp_qloop_54,tmp_qloop_55); + const __m256d q_tmp_2_7 = _mm256_mul_pd(tmp_qloop_27,tmp_qloop_45); + const __m256d q_tmp_2_8 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_45); + const __m256d q_tmp_2_9 = _mm256_mul_pd(tmp_qloop_31,tmp_qloop_45); + const __m256d q_tmp_2_10 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_49); + const __m256d q_tmp_2_11 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_49); + const __m256d q_tmp_2_12 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_49); + const __m256d q_tmp_2_13 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_49); + const __m256d q_tmp_2_14 = _mm256_mul_pd(tmp_qloop_28,tmp_qloop_47); + const __m256d q_tmp_2_15 = tmp_qloop_48; + const __m256d q_tmp_2_16 = _mm256_mul_pd(tmp_qloop_35,tmp_qloop_56); + const __m256d q_tmp_2_17 = _mm256_mul_pd(tmp_qloop_27,tmp_qloop_49); + const __m256d q_tmp_2_18 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_49); + const __m256d q_tmp_2_19 = _mm256_mul_pd(tmp_qloop_31,tmp_qloop_49); + const __m256d q_tmp_2_20 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_53); + const __m256d q_tmp_2_21 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_53); + const __m256d q_tmp_2_22 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_53); + const __m256d q_tmp_2_23 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_53); + const __m256d q_tmp_2_24 = _mm256_mul_pd(tmp_qloop_28,tmp_qloop_51); + const __m256d q_tmp_2_25 = tmp_qloop_52; + const __m256d q_tmp_2_26 = _mm256_mul_pd(tmp_qloop_37,tmp_qloop_56); + const __m256d q_tmp_2_27 = _mm256_mul_pd(tmp_qloop_27,tmp_qloop_53); + const __m256d q_tmp_2_28 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_53); + const __m256d q_tmp_2_29 = _mm256_mul_pd(tmp_qloop_31,tmp_qloop_53); + const __m256d q_tmp_3_0 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_42); + const __m256d q_tmp_3_1 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_42); + const __m256d q_tmp_3_2 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_42); + const __m256d q_tmp_3_3 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_42); + const __m256d q_tmp_3_4 = _mm256_mul_pd(tmp_qloop_57,tmp_qloop_6); + const __m256d q_tmp_3_5 = _mm256_mul_pd(tmp_qloop_57,tmp_qloop_8); + const __m256d q_tmp_3_6 = tmp_qloop_43; + const __m256d q_tmp_3_7 = _mm256_mul_pd(tmp_qloop_27,tmp_qloop_42); + const __m256d q_tmp_3_8 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_42); + const __m256d q_tmp_3_9 = _mm256_mul_pd(tmp_qloop_31,tmp_qloop_42); + const __m256d q_tmp_3_10 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_47); + const __m256d q_tmp_3_11 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_47); + const __m256d q_tmp_3_12 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_47); + const __m256d q_tmp_3_13 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_47); + const __m256d q_tmp_3_14 = _mm256_mul_pd(tmp_qloop_59,tmp_qloop_6); + const __m256d q_tmp_3_15 = _mm256_mul_pd(tmp_qloop_59,tmp_qloop_8); + const __m256d q_tmp_3_16 = tmp_qloop_48; + const __m256d q_tmp_3_17 = _mm256_mul_pd(tmp_qloop_27,tmp_qloop_47); + const __m256d q_tmp_3_18 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_47); + const __m256d q_tmp_3_19 = _mm256_mul_pd(tmp_qloop_31,tmp_qloop_47); + const __m256d q_tmp_3_20 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_51); + const __m256d q_tmp_3_21 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_51); + const __m256d q_tmp_3_22 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_51); + const __m256d q_tmp_3_23 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_51); + const __m256d q_tmp_3_24 = _mm256_mul_pd(tmp_qloop_6,tmp_qloop_60); + const __m256d q_tmp_3_25 = _mm256_mul_pd(tmp_qloop_60,tmp_qloop_8); + const __m256d q_tmp_3_26 = tmp_qloop_52; + const __m256d q_tmp_3_27 = _mm256_mul_pd(tmp_qloop_27,tmp_qloop_51); + const __m256d q_tmp_3_28 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_51); + const __m256d q_tmp_3_29 = _mm256_mul_pd(tmp_qloop_31,tmp_qloop_51); + q_acc_0_0 = _mm256_add_pd(q_acc_0_0,q_tmp_0_0); + q_acc_0_1 = _mm256_add_pd(q_acc_0_1,q_tmp_0_1); + q_acc_0_2 = _mm256_add_pd(q_acc_0_2,q_tmp_0_2); + q_acc_0_3 = _mm256_add_pd(q_acc_0_3,q_tmp_0_3); + q_acc_0_4 = _mm256_add_pd(q_acc_0_4,q_tmp_0_4); + q_acc_0_5 = _mm256_add_pd(q_acc_0_5,q_tmp_0_5); + q_acc_0_6 = _mm256_add_pd(q_acc_0_6,q_tmp_0_6); + q_acc_0_7 = _mm256_add_pd(q_acc_0_7,q_tmp_0_7); + q_acc_0_8 = _mm256_add_pd(q_acc_0_8,q_tmp_0_8); + q_acc_0_9 = _mm256_add_pd(q_acc_0_9,q_tmp_0_9); + q_acc_0_10 = _mm256_add_pd(q_acc_0_10,q_tmp_0_10); + q_acc_0_11 = _mm256_add_pd(q_acc_0_11,q_tmp_0_11); + q_acc_0_12 = _mm256_add_pd(q_acc_0_12,q_tmp_0_12); + q_acc_0_13 = _mm256_add_pd(q_acc_0_13,q_tmp_0_13); + q_acc_0_14 = _mm256_add_pd(q_acc_0_14,q_tmp_0_14); + q_acc_0_15 = _mm256_add_pd(q_acc_0_15,q_tmp_0_15); + q_acc_0_16 = _mm256_add_pd(q_acc_0_16,q_tmp_0_16); + q_acc_0_17 = _mm256_add_pd(q_acc_0_17,q_tmp_0_17); + q_acc_0_18 = _mm256_add_pd(q_acc_0_18,q_tmp_0_18); + q_acc_0_19 = _mm256_add_pd(q_acc_0_19,q_tmp_0_19); + q_acc_0_20 = _mm256_add_pd(q_acc_0_20,q_tmp_0_20); + q_acc_0_21 = _mm256_add_pd(q_acc_0_21,q_tmp_0_21); + q_acc_0_22 = _mm256_add_pd(q_acc_0_22,q_tmp_0_22); + q_acc_0_23 = _mm256_add_pd(q_acc_0_23,q_tmp_0_23); + q_acc_0_24 = _mm256_add_pd(q_acc_0_24,q_tmp_0_24); + q_acc_0_25 = _mm256_add_pd(q_acc_0_25,q_tmp_0_25); + q_acc_0_26 = _mm256_add_pd(q_acc_0_26,q_tmp_0_26); + q_acc_0_27 = _mm256_add_pd(q_acc_0_27,q_tmp_0_27); + q_acc_0_28 = _mm256_add_pd(q_acc_0_28,q_tmp_0_28); + q_acc_0_29 = _mm256_add_pd(q_acc_0_29,q_tmp_0_29); + q_acc_1_0 = _mm256_add_pd(q_acc_1_0,q_tmp_1_0); + q_acc_1_1 = _mm256_add_pd(q_acc_1_1,q_tmp_1_1); + q_acc_1_2 = _mm256_add_pd(q_acc_1_2,q_tmp_1_2); + q_acc_1_3 = _mm256_add_pd(q_acc_1_3,q_tmp_1_3); + q_acc_1_4 = _mm256_add_pd(q_acc_1_4,q_tmp_1_4); + q_acc_1_5 = _mm256_add_pd(q_acc_1_5,q_tmp_1_5); + q_acc_1_6 = _mm256_add_pd(q_acc_1_6,q_tmp_1_6); + q_acc_1_7 = _mm256_add_pd(q_acc_1_7,q_tmp_1_7); + q_acc_1_8 = _mm256_add_pd(q_acc_1_8,q_tmp_1_8); + q_acc_1_9 = _mm256_add_pd(q_acc_1_9,q_tmp_1_9); + q_acc_1_10 = _mm256_add_pd(q_acc_1_10,q_tmp_1_10); + q_acc_1_11 = _mm256_add_pd(q_acc_1_11,q_tmp_1_11); + q_acc_1_12 = _mm256_add_pd(q_acc_1_12,q_tmp_1_12); + q_acc_1_13 = _mm256_add_pd(q_acc_1_13,q_tmp_1_13); + q_acc_1_14 = _mm256_add_pd(q_acc_1_14,q_tmp_1_14); + q_acc_1_15 = _mm256_add_pd(q_acc_1_15,q_tmp_1_15); + q_acc_1_16 = _mm256_add_pd(q_acc_1_16,q_tmp_1_16); + q_acc_1_17 = _mm256_add_pd(q_acc_1_17,q_tmp_1_17); + q_acc_1_18 = _mm256_add_pd(q_acc_1_18,q_tmp_1_18); + q_acc_1_19 = _mm256_add_pd(q_acc_1_19,q_tmp_1_19); + q_acc_1_20 = _mm256_add_pd(q_acc_1_20,q_tmp_1_20); + q_acc_1_21 = _mm256_add_pd(q_acc_1_21,q_tmp_1_21); + q_acc_1_22 = _mm256_add_pd(q_acc_1_22,q_tmp_1_22); + q_acc_1_23 = _mm256_add_pd(q_acc_1_23,q_tmp_1_23); + q_acc_1_24 = _mm256_add_pd(q_acc_1_24,q_tmp_1_24); + q_acc_1_25 = _mm256_add_pd(q_acc_1_25,q_tmp_1_25); + q_acc_1_26 = _mm256_add_pd(q_acc_1_26,q_tmp_1_26); + q_acc_1_27 = _mm256_add_pd(q_acc_1_27,q_tmp_1_27); + q_acc_1_28 = _mm256_add_pd(q_acc_1_28,q_tmp_1_28); + q_acc_1_29 = _mm256_add_pd(q_acc_1_29,q_tmp_1_29); + q_acc_2_0 = _mm256_add_pd(q_acc_2_0,q_tmp_2_0); + q_acc_2_1 = _mm256_add_pd(q_acc_2_1,q_tmp_2_1); + q_acc_2_2 = _mm256_add_pd(q_acc_2_2,q_tmp_2_2); + q_acc_2_3 = _mm256_add_pd(q_acc_2_3,q_tmp_2_3); + q_acc_2_4 = _mm256_add_pd(q_acc_2_4,q_tmp_2_4); + q_acc_2_5 = _mm256_add_pd(q_acc_2_5,q_tmp_2_5); + q_acc_2_6 = _mm256_add_pd(q_acc_2_6,q_tmp_2_6); + q_acc_2_7 = _mm256_add_pd(q_acc_2_7,q_tmp_2_7); + q_acc_2_8 = _mm256_add_pd(q_acc_2_8,q_tmp_2_8); + q_acc_2_9 = _mm256_add_pd(q_acc_2_9,q_tmp_2_9); + q_acc_2_10 = _mm256_add_pd(q_acc_2_10,q_tmp_2_10); + q_acc_2_11 = _mm256_add_pd(q_acc_2_11,q_tmp_2_11); + q_acc_2_12 = _mm256_add_pd(q_acc_2_12,q_tmp_2_12); + q_acc_2_13 = _mm256_add_pd(q_acc_2_13,q_tmp_2_13); + q_acc_2_14 = _mm256_add_pd(q_acc_2_14,q_tmp_2_14); + q_acc_2_15 = _mm256_add_pd(q_acc_2_15,q_tmp_2_15); + q_acc_2_16 = _mm256_add_pd(q_acc_2_16,q_tmp_2_16); + q_acc_2_17 = _mm256_add_pd(q_acc_2_17,q_tmp_2_17); + q_acc_2_18 = _mm256_add_pd(q_acc_2_18,q_tmp_2_18); + q_acc_2_19 = _mm256_add_pd(q_acc_2_19,q_tmp_2_19); + q_acc_2_20 = _mm256_add_pd(q_acc_2_20,q_tmp_2_20); + q_acc_2_21 = _mm256_add_pd(q_acc_2_21,q_tmp_2_21); + q_acc_2_22 = _mm256_add_pd(q_acc_2_22,q_tmp_2_22); + q_acc_2_23 = _mm256_add_pd(q_acc_2_23,q_tmp_2_23); + q_acc_2_24 = _mm256_add_pd(q_acc_2_24,q_tmp_2_24); + q_acc_2_25 = _mm256_add_pd(q_acc_2_25,q_tmp_2_25); + q_acc_2_26 = _mm256_add_pd(q_acc_2_26,q_tmp_2_26); + q_acc_2_27 = _mm256_add_pd(q_acc_2_27,q_tmp_2_27); + q_acc_2_28 = _mm256_add_pd(q_acc_2_28,q_tmp_2_28); + q_acc_2_29 = _mm256_add_pd(q_acc_2_29,q_tmp_2_29); + q_acc_3_0 = _mm256_add_pd(q_acc_3_0,q_tmp_3_0); + q_acc_3_1 = _mm256_add_pd(q_acc_3_1,q_tmp_3_1); + q_acc_3_2 = _mm256_add_pd(q_acc_3_2,q_tmp_3_2); + q_acc_3_3 = _mm256_add_pd(q_acc_3_3,q_tmp_3_3); + q_acc_3_4 = _mm256_add_pd(q_acc_3_4,q_tmp_3_4); + q_acc_3_5 = _mm256_add_pd(q_acc_3_5,q_tmp_3_5); + q_acc_3_6 = _mm256_add_pd(q_acc_3_6,q_tmp_3_6); + q_acc_3_7 = _mm256_add_pd(q_acc_3_7,q_tmp_3_7); + q_acc_3_8 = _mm256_add_pd(q_acc_3_8,q_tmp_3_8); + q_acc_3_9 = _mm256_add_pd(q_acc_3_9,q_tmp_3_9); + q_acc_3_10 = _mm256_add_pd(q_acc_3_10,q_tmp_3_10); + q_acc_3_11 = _mm256_add_pd(q_acc_3_11,q_tmp_3_11); + q_acc_3_12 = _mm256_add_pd(q_acc_3_12,q_tmp_3_12); + q_acc_3_13 = _mm256_add_pd(q_acc_3_13,q_tmp_3_13); + q_acc_3_14 = _mm256_add_pd(q_acc_3_14,q_tmp_3_14); + q_acc_3_15 = _mm256_add_pd(q_acc_3_15,q_tmp_3_15); + q_acc_3_16 = _mm256_add_pd(q_acc_3_16,q_tmp_3_16); + q_acc_3_17 = _mm256_add_pd(q_acc_3_17,q_tmp_3_17); + q_acc_3_18 = _mm256_add_pd(q_acc_3_18,q_tmp_3_18); + q_acc_3_19 = _mm256_add_pd(q_acc_3_19,q_tmp_3_19); + q_acc_3_20 = _mm256_add_pd(q_acc_3_20,q_tmp_3_20); + q_acc_3_21 = _mm256_add_pd(q_acc_3_21,q_tmp_3_21); + q_acc_3_22 = _mm256_add_pd(q_acc_3_22,q_tmp_3_22); + q_acc_3_23 = _mm256_add_pd(q_acc_3_23,q_tmp_3_23); + q_acc_3_24 = _mm256_add_pd(q_acc_3_24,q_tmp_3_24); + q_acc_3_25 = _mm256_add_pd(q_acc_3_25,q_tmp_3_25); + q_acc_3_26 = _mm256_add_pd(q_acc_3_26,q_tmp_3_26); + q_acc_3_27 = _mm256_add_pd(q_acc_3_27,q_tmp_3_27); + q_acc_3_28 = _mm256_add_pd(q_acc_3_28,q_tmp_3_28); + q_acc_3_29 = _mm256_add_pd(q_acc_3_29,q_tmp_3_29); + } + const __m256d elMatVec_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_0_0,src_dof_0),_mm256_mul_pd(q_acc_0_1,src_dof_1)),_mm256_mul_pd(q_acc_0_10,src_dof_10)),_mm256_mul_pd(q_acc_0_11,src_dof_11)),_mm256_mul_pd(q_acc_0_12,src_dof_12)),_mm256_mul_pd(q_acc_0_13,src_dof_13)),_mm256_mul_pd(q_acc_0_14,src_dof_14)),_mm256_mul_pd(q_acc_0_15,src_dof_15)),_mm256_mul_pd(q_acc_0_16,src_dof_16)),_mm256_mul_pd(q_acc_0_17,src_dof_17)),_mm256_mul_pd(q_acc_0_18,src_dof_18)),_mm256_mul_pd(q_acc_0_19,src_dof_19)),_mm256_mul_pd(q_acc_0_2,src_dof_2)),_mm256_mul_pd(q_acc_0_20,src_dof_20)),_mm256_mul_pd(q_acc_0_21,src_dof_21)),_mm256_mul_pd(q_acc_0_22,src_dof_22)),_mm256_mul_pd(q_acc_0_23,src_dof_23)),_mm256_mul_pd(q_acc_0_24,src_dof_24)),_mm256_mul_pd(q_acc_0_25,src_dof_25)),_mm256_mul_pd(q_acc_0_26,src_dof_26)),_mm256_mul_pd(q_acc_0_27,src_dof_27)),_mm256_mul_pd(q_acc_0_28,src_dof_28)),_mm256_mul_pd(q_acc_0_29,src_dof_29)),_mm256_mul_pd(q_acc_0_3,src_dof_3)),_mm256_mul_pd(q_acc_0_4,src_dof_4)),_mm256_mul_pd(q_acc_0_5,src_dof_5)),_mm256_mul_pd(q_acc_0_6,src_dof_6)),_mm256_mul_pd(q_acc_0_7,src_dof_7)),_mm256_mul_pd(q_acc_0_8,src_dof_8)),_mm256_mul_pd(q_acc_0_9,src_dof_9)); + const __m256d elMatVec_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_1_0,src_dof_0),_mm256_mul_pd(q_acc_1_1,src_dof_1)),_mm256_mul_pd(q_acc_1_10,src_dof_10)),_mm256_mul_pd(q_acc_1_11,src_dof_11)),_mm256_mul_pd(q_acc_1_12,src_dof_12)),_mm256_mul_pd(q_acc_1_13,src_dof_13)),_mm256_mul_pd(q_acc_1_14,src_dof_14)),_mm256_mul_pd(q_acc_1_15,src_dof_15)),_mm256_mul_pd(q_acc_1_16,src_dof_16)),_mm256_mul_pd(q_acc_1_17,src_dof_17)),_mm256_mul_pd(q_acc_1_18,src_dof_18)),_mm256_mul_pd(q_acc_1_19,src_dof_19)),_mm256_mul_pd(q_acc_1_2,src_dof_2)),_mm256_mul_pd(q_acc_1_20,src_dof_20)),_mm256_mul_pd(q_acc_1_21,src_dof_21)),_mm256_mul_pd(q_acc_1_22,src_dof_22)),_mm256_mul_pd(q_acc_1_23,src_dof_23)),_mm256_mul_pd(q_acc_1_24,src_dof_24)),_mm256_mul_pd(q_acc_1_25,src_dof_25)),_mm256_mul_pd(q_acc_1_26,src_dof_26)),_mm256_mul_pd(q_acc_1_27,src_dof_27)),_mm256_mul_pd(q_acc_1_28,src_dof_28)),_mm256_mul_pd(q_acc_1_29,src_dof_29)),_mm256_mul_pd(q_acc_1_3,src_dof_3)),_mm256_mul_pd(q_acc_1_4,src_dof_4)),_mm256_mul_pd(q_acc_1_5,src_dof_5)),_mm256_mul_pd(q_acc_1_6,src_dof_6)),_mm256_mul_pd(q_acc_1_7,src_dof_7)),_mm256_mul_pd(q_acc_1_8,src_dof_8)),_mm256_mul_pd(q_acc_1_9,src_dof_9)); + const __m256d elMatVec_2 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_2_0,src_dof_0),_mm256_mul_pd(q_acc_2_1,src_dof_1)),_mm256_mul_pd(q_acc_2_10,src_dof_10)),_mm256_mul_pd(q_acc_2_11,src_dof_11)),_mm256_mul_pd(q_acc_2_12,src_dof_12)),_mm256_mul_pd(q_acc_2_13,src_dof_13)),_mm256_mul_pd(q_acc_2_14,src_dof_14)),_mm256_mul_pd(q_acc_2_15,src_dof_15)),_mm256_mul_pd(q_acc_2_16,src_dof_16)),_mm256_mul_pd(q_acc_2_17,src_dof_17)),_mm256_mul_pd(q_acc_2_18,src_dof_18)),_mm256_mul_pd(q_acc_2_19,src_dof_19)),_mm256_mul_pd(q_acc_2_2,src_dof_2)),_mm256_mul_pd(q_acc_2_20,src_dof_20)),_mm256_mul_pd(q_acc_2_21,src_dof_21)),_mm256_mul_pd(q_acc_2_22,src_dof_22)),_mm256_mul_pd(q_acc_2_23,src_dof_23)),_mm256_mul_pd(q_acc_2_24,src_dof_24)),_mm256_mul_pd(q_acc_2_25,src_dof_25)),_mm256_mul_pd(q_acc_2_26,src_dof_26)),_mm256_mul_pd(q_acc_2_27,src_dof_27)),_mm256_mul_pd(q_acc_2_28,src_dof_28)),_mm256_mul_pd(q_acc_2_29,src_dof_29)),_mm256_mul_pd(q_acc_2_3,src_dof_3)),_mm256_mul_pd(q_acc_2_4,src_dof_4)),_mm256_mul_pd(q_acc_2_5,src_dof_5)),_mm256_mul_pd(q_acc_2_6,src_dof_6)),_mm256_mul_pd(q_acc_2_7,src_dof_7)),_mm256_mul_pd(q_acc_2_8,src_dof_8)),_mm256_mul_pd(q_acc_2_9,src_dof_9)); + const __m256d elMatVec_3 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_3_0,src_dof_0),_mm256_mul_pd(q_acc_3_1,src_dof_1)),_mm256_mul_pd(q_acc_3_10,src_dof_10)),_mm256_mul_pd(q_acc_3_11,src_dof_11)),_mm256_mul_pd(q_acc_3_12,src_dof_12)),_mm256_mul_pd(q_acc_3_13,src_dof_13)),_mm256_mul_pd(q_acc_3_14,src_dof_14)),_mm256_mul_pd(q_acc_3_15,src_dof_15)),_mm256_mul_pd(q_acc_3_16,src_dof_16)),_mm256_mul_pd(q_acc_3_17,src_dof_17)),_mm256_mul_pd(q_acc_3_18,src_dof_18)),_mm256_mul_pd(q_acc_3_19,src_dof_19)),_mm256_mul_pd(q_acc_3_2,src_dof_2)),_mm256_mul_pd(q_acc_3_20,src_dof_20)),_mm256_mul_pd(q_acc_3_21,src_dof_21)),_mm256_mul_pd(q_acc_3_22,src_dof_22)),_mm256_mul_pd(q_acc_3_23,src_dof_23)),_mm256_mul_pd(q_acc_3_24,src_dof_24)),_mm256_mul_pd(q_acc_3_25,src_dof_25)),_mm256_mul_pd(q_acc_3_26,src_dof_26)),_mm256_mul_pd(q_acc_3_27,src_dof_27)),_mm256_mul_pd(q_acc_3_28,src_dof_28)),_mm256_mul_pd(q_acc_3_29,src_dof_29)),_mm256_mul_pd(q_acc_3_3,src_dof_3)),_mm256_mul_pd(q_acc_3_4,src_dof_4)),_mm256_mul_pd(q_acc_3_5,src_dof_5)),_mm256_mul_pd(q_acc_3_6,src_dof_6)),_mm256_mul_pd(q_acc_3_7,src_dof_7)),_mm256_mul_pd(q_acc_3_8,src_dof_8)),_mm256_mul_pd(q_acc_3_9,src_dof_9)); + _mm256_storeu_pd(&_data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))],_mm256_add_pd(elMatVec_0,_mm256_loadu_pd(& _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]))); + _mm256_storeu_pd(&_data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1],_mm256_add_pd(elMatVec_1,_mm256_loadu_pd(& _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]))); + _mm256_storeu_pd(&_data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))],_mm256_add_pd(elMatVec_2,_mm256_loadu_pd(& _data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]))); + _mm256_storeu_pd(&_data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))],_mm256_add_pd(elMatVec_3,_mm256_loadu_pd(& _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]))); + } + for (int64_t ctr_0 = (int64_t)((-ctr_1 - ctr_2 + micro_edges_per_macro_edge) / (4)) * (4); ctr_0 < -ctr_1 - ctr_2 + micro_edges_per_macro_edge; ctr_0 += 1) + { + + const int64_t phantom_ctr_0 = ctr_0; + real_t _data_float_loop_ctr_array_dim_0[4]; + _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0; + _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1; + _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2; + _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3; + real_t _data_float_loop_ctr_array_dim_1[4]; + _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1; + real_t _data_float_loop_ctr_array_dim_2[4]; + _data_float_loop_ctr_array_dim_2[0] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[1] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[2] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[3] = (real_t) ctr_2; + + const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_0_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_2_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_3_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_3_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_3_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t src_dof_0 = _data_src_vertex_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]; + const real_t src_dof_1 = _data_src_vertex_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]; + const real_t src_dof_2 = _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]; + const real_t src_dof_3 = _data_src_vertex_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]; + const real_t src_dof_4 = _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t src_dof_5 = _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 5*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t src_dof_6 = _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t src_dof_7 = _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t src_dof_8 = _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t src_dof_9 = _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t src_dof_10 = _data_src_vertex_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]; + const real_t src_dof_11 = _data_src_vertex_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]; + const real_t src_dof_12 = _data_src_vertex_1[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]; + const real_t src_dof_13 = _data_src_vertex_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]; + const real_t src_dof_14 = _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t src_dof_15 = _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 5*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t src_dof_16 = _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t src_dof_17 = _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t src_dof_18 = _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t src_dof_19 = _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t src_dof_20 = _data_src_vertex_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]; + const real_t src_dof_21 = _data_src_vertex_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]; + const real_t src_dof_22 = _data_src_vertex_2[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]; + const real_t src_dof_23 = _data_src_vertex_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]; + const real_t src_dof_24 = _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t src_dof_25 = _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 5*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t src_dof_26 = _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t src_dof_27 = _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t src_dof_28 = _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t src_dof_29 = _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t rho_dof_0 = _data_rhoVertex[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]; + const real_t rho_dof_1 = _data_rhoVertex[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]; + const real_t rho_dof_2 = _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]; + const real_t rho_dof_3 = _data_rhoVertex[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]; + const real_t rho_dof_4 = _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t rho_dof_5 = _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 5*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t rho_dof_6 = _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t rho_dof_7 = _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t rho_dof_8 = _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t rho_dof_9 = _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + real_t q_acc_0_0 = 0.0; + real_t q_acc_0_1 = 0.0; + real_t q_acc_0_2 = 0.0; + real_t q_acc_0_3 = 0.0; + real_t q_acc_0_4 = 0.0; + real_t q_acc_0_5 = 0.0; + real_t q_acc_0_6 = 0.0; + real_t q_acc_0_7 = 0.0; + real_t q_acc_0_8 = 0.0; + real_t q_acc_0_9 = 0.0; + real_t q_acc_0_10 = 0.0; + real_t q_acc_0_11 = 0.0; + real_t q_acc_0_12 = 0.0; + real_t q_acc_0_13 = 0.0; + real_t q_acc_0_14 = 0.0; + real_t q_acc_0_15 = 0.0; + real_t q_acc_0_16 = 0.0; + real_t q_acc_0_17 = 0.0; + real_t q_acc_0_18 = 0.0; + real_t q_acc_0_19 = 0.0; + real_t q_acc_0_20 = 0.0; + real_t q_acc_0_21 = 0.0; + real_t q_acc_0_22 = 0.0; + real_t q_acc_0_23 = 0.0; + real_t q_acc_0_24 = 0.0; + real_t q_acc_0_25 = 0.0; + real_t q_acc_0_26 = 0.0; + real_t q_acc_0_27 = 0.0; + real_t q_acc_0_28 = 0.0; + real_t q_acc_0_29 = 0.0; + real_t q_acc_1_0 = 0.0; + real_t q_acc_1_1 = 0.0; + real_t q_acc_1_2 = 0.0; + real_t q_acc_1_3 = 0.0; + real_t q_acc_1_4 = 0.0; + real_t q_acc_1_5 = 0.0; + real_t q_acc_1_6 = 0.0; + real_t q_acc_1_7 = 0.0; + real_t q_acc_1_8 = 0.0; + real_t q_acc_1_9 = 0.0; + real_t q_acc_1_10 = 0.0; + real_t q_acc_1_11 = 0.0; + real_t q_acc_1_12 = 0.0; + real_t q_acc_1_13 = 0.0; + real_t q_acc_1_14 = 0.0; + real_t q_acc_1_15 = 0.0; + real_t q_acc_1_16 = 0.0; + real_t q_acc_1_17 = 0.0; + real_t q_acc_1_18 = 0.0; + real_t q_acc_1_19 = 0.0; + real_t q_acc_1_20 = 0.0; + real_t q_acc_1_21 = 0.0; + real_t q_acc_1_22 = 0.0; + real_t q_acc_1_23 = 0.0; + real_t q_acc_1_24 = 0.0; + real_t q_acc_1_25 = 0.0; + real_t q_acc_1_26 = 0.0; + real_t q_acc_1_27 = 0.0; + real_t q_acc_1_28 = 0.0; + real_t q_acc_1_29 = 0.0; + real_t q_acc_2_0 = 0.0; + real_t q_acc_2_1 = 0.0; + real_t q_acc_2_2 = 0.0; + real_t q_acc_2_3 = 0.0; + real_t q_acc_2_4 = 0.0; + real_t q_acc_2_5 = 0.0; + real_t q_acc_2_6 = 0.0; + real_t q_acc_2_7 = 0.0; + real_t q_acc_2_8 = 0.0; + real_t q_acc_2_9 = 0.0; + real_t q_acc_2_10 = 0.0; + real_t q_acc_2_11 = 0.0; + real_t q_acc_2_12 = 0.0; + real_t q_acc_2_13 = 0.0; + real_t q_acc_2_14 = 0.0; + real_t q_acc_2_15 = 0.0; + real_t q_acc_2_16 = 0.0; + real_t q_acc_2_17 = 0.0; + real_t q_acc_2_18 = 0.0; + real_t q_acc_2_19 = 0.0; + real_t q_acc_2_20 = 0.0; + real_t q_acc_2_21 = 0.0; + real_t q_acc_2_22 = 0.0; + real_t q_acc_2_23 = 0.0; + real_t q_acc_2_24 = 0.0; + real_t q_acc_2_25 = 0.0; + real_t q_acc_2_26 = 0.0; + real_t q_acc_2_27 = 0.0; + real_t q_acc_2_28 = 0.0; + real_t q_acc_2_29 = 0.0; + real_t q_acc_3_0 = 0.0; + real_t q_acc_3_1 = 0.0; + real_t q_acc_3_2 = 0.0; + real_t q_acc_3_3 = 0.0; + real_t q_acc_3_4 = 0.0; + real_t q_acc_3_5 = 0.0; + real_t q_acc_3_6 = 0.0; + real_t q_acc_3_7 = 0.0; + real_t q_acc_3_8 = 0.0; + real_t q_acc_3_9 = 0.0; + real_t q_acc_3_10 = 0.0; + real_t q_acc_3_11 = 0.0; + real_t q_acc_3_12 = 0.0; + real_t q_acc_3_13 = 0.0; + real_t q_acc_3_14 = 0.0; + real_t q_acc_3_15 = 0.0; + real_t q_acc_3_16 = 0.0; + real_t q_acc_3_17 = 0.0; + real_t q_acc_3_18 = 0.0; + real_t q_acc_3_19 = 0.0; + real_t q_acc_3_20 = 0.0; + real_t q_acc_3_21 = 0.0; + real_t q_acc_3_22 = 0.0; + real_t q_acc_3_23 = 0.0; + real_t q_acc_3_24 = 0.0; + real_t q_acc_3_25 = 0.0; + real_t q_acc_3_26 = 0.0; + real_t q_acc_3_27 = 0.0; + real_t q_acc_3_28 = 0.0; + real_t q_acc_3_29 = 0.0; + for (int64_t q = 0; q < 5; q += 1) + { + const real_t tmp_qloop_0 = (_data_q_p_0[q]*_data_q_p_0[q]); + const real_t tmp_qloop_1 = tmp_qloop_0*2.0; + const real_t tmp_qloop_2 = (_data_q_p_1[q]*_data_q_p_1[q]); + const real_t tmp_qloop_3 = tmp_qloop_2*2.0; + const real_t tmp_qloop_4 = (_data_q_p_2[q]*_data_q_p_2[q]); + const real_t tmp_qloop_5 = tmp_qloop_4*2.0; + const real_t tmp_qloop_6 = 4.0*_data_q_p_1[q]; + const real_t tmp_qloop_7 = tmp_qloop_6*_data_q_p_2[q]; + const real_t tmp_qloop_8 = 4.0*_data_q_p_0[q]; + const real_t tmp_qloop_9 = tmp_qloop_8*_data_q_p_1[q]; + const real_t tmp_qloop_10 = tmp_qloop_8*_data_q_p_2[q]; + const real_t tmp_qloop_11 = tmp_qloop_10 + tmp_qloop_9; + const real_t tmp_qloop_12 = tmp_qloop_1 + tmp_qloop_11 + tmp_qloop_3 + tmp_qloop_5 + tmp_qloop_7 - 3.0*_data_q_p_0[q] - 3.0*_data_q_p_1[q] - 3.0*_data_q_p_2[q] + 1.0; + const real_t tmp_qloop_13 = 4.0*_data_q_p_2[q]; + const real_t tmp_qloop_14 = tmp_qloop_13 + tmp_qloop_6; + const real_t tmp_qloop_15 = -rho_dof_8*tmp_qloop_6; + const real_t tmp_qloop_16 = rho_dof_0*(tmp_qloop_14 + tmp_qloop_8 - 3.0); + const real_t tmp_qloop_17 = -rho_dof_7*tmp_qloop_13 + tmp_qloop_16; + const real_t tmp_qloop_18 = rho_dof_1*(tmp_qloop_8 - 1.0) + rho_dof_5*tmp_qloop_13 + rho_dof_6*tmp_qloop_6 + rho_dof_9*(-tmp_qloop_14 - 8.0*_data_q_p_0[q] + 4.0) + tmp_qloop_15 + tmp_qloop_17; + const real_t tmp_qloop_19 = tmp_qloop_8 - 4.0; + const real_t tmp_qloop_20 = -rho_dof_9*tmp_qloop_8; + const real_t tmp_qloop_21 = rho_dof_2*(tmp_qloop_6 - 1.0) + rho_dof_4*tmp_qloop_13 + rho_dof_6*tmp_qloop_8 + rho_dof_8*(-tmp_qloop_13 - tmp_qloop_19 - 8.0*_data_q_p_1[q]) + tmp_qloop_17 + tmp_qloop_20; + const real_t tmp_qloop_22 = rho_dof_3*(tmp_qloop_13 - 1.0) + rho_dof_4*tmp_qloop_6 + rho_dof_5*tmp_qloop_8 + rho_dof_7*(-tmp_qloop_19 - tmp_qloop_6 - 8.0*_data_q_p_2[q]) + tmp_qloop_15 + tmp_qloop_16 + tmp_qloop_20; + const real_t tmp_qloop_23 = jac_affine_inv_0_0_WHITE_UP*tmp_qloop_18 + jac_affine_inv_1_0_WHITE_UP*tmp_qloop_21 + jac_affine_inv_2_0_WHITE_UP*tmp_qloop_22; + const real_t tmp_qloop_24 = tmp_qloop_1 - _data_q_p_0[q]; + const real_t tmp_qloop_25 = tmp_qloop_3 - _data_q_p_1[q]; + const real_t tmp_qloop_26 = tmp_qloop_5 - _data_q_p_2[q]; + const real_t tmp_qloop_27 = -tmp_qloop_10 + tmp_qloop_13 + tmp_qloop_4*-4.0 - tmp_qloop_7; + const real_t tmp_qloop_28 = tmp_qloop_2*4.0; + const real_t tmp_qloop_29 = -tmp_qloop_28 + tmp_qloop_6 - tmp_qloop_7 - tmp_qloop_9; + const real_t tmp_qloop_30 = tmp_qloop_0*4.0; + const real_t tmp_qloop_31 = -tmp_qloop_11 - tmp_qloop_30 + tmp_qloop_8; + const real_t tmp_qloop_32 = abs_det_jac_affine_WHITE_UP*1.0 / (rho_dof_0*tmp_qloop_12 + rho_dof_1*tmp_qloop_24 + rho_dof_2*tmp_qloop_25 + rho_dof_3*tmp_qloop_26 + rho_dof_4*tmp_qloop_7 + rho_dof_5*tmp_qloop_10 + rho_dof_6*tmp_qloop_9 + rho_dof_7*tmp_qloop_27 + rho_dof_8*tmp_qloop_29 + rho_dof_9*tmp_qloop_31)*_data_q_w[q]; + const real_t tmp_qloop_33 = tmp_qloop_32*(1.0 - _data_q_p_0[q] - _data_q_p_1[q] - _data_q_p_2[q]); + const real_t tmp_qloop_34 = tmp_qloop_23*tmp_qloop_33; + const real_t tmp_qloop_35 = jac_affine_inv_0_1_WHITE_UP*tmp_qloop_18 + jac_affine_inv_1_1_WHITE_UP*tmp_qloop_21 + jac_affine_inv_2_1_WHITE_UP*tmp_qloop_22; + const real_t tmp_qloop_36 = tmp_qloop_33*tmp_qloop_35; + const real_t tmp_qloop_37 = jac_affine_inv_0_2_WHITE_UP*tmp_qloop_18 + jac_affine_inv_1_2_WHITE_UP*tmp_qloop_21 + jac_affine_inv_2_2_WHITE_UP*tmp_qloop_22; + const real_t tmp_qloop_38 = tmp_qloop_33*tmp_qloop_37; + const real_t tmp_qloop_39 = tmp_qloop_32*_data_q_p_0[q]; + const real_t tmp_qloop_40 = tmp_qloop_23*tmp_qloop_39; + const real_t tmp_qloop_41 = tmp_qloop_32*_data_q_p_2[q]; + const real_t tmp_qloop_42 = tmp_qloop_23*tmp_qloop_41; + const real_t tmp_qloop_43 = tmp_qloop_42*tmp_qloop_9; + const real_t tmp_qloop_44 = tmp_qloop_32*_data_q_p_1[q]; + const real_t tmp_qloop_45 = tmp_qloop_23*tmp_qloop_44; + const real_t tmp_qloop_46 = tmp_qloop_35*tmp_qloop_39; + const real_t tmp_qloop_47 = tmp_qloop_35*tmp_qloop_41; + const real_t tmp_qloop_48 = tmp_qloop_47*tmp_qloop_9; + const real_t tmp_qloop_49 = tmp_qloop_35*tmp_qloop_44; + const real_t tmp_qloop_50 = tmp_qloop_37*tmp_qloop_39; + const real_t tmp_qloop_51 = tmp_qloop_37*tmp_qloop_41; + const real_t tmp_qloop_52 = tmp_qloop_51*tmp_qloop_9; + const real_t tmp_qloop_53 = tmp_qloop_37*tmp_qloop_44; + const real_t tmp_qloop_54 = tmp_qloop_23*tmp_qloop_32; + const real_t tmp_qloop_55 = tmp_qloop_2*tmp_qloop_8; + const real_t tmp_qloop_56 = tmp_qloop_32*tmp_qloop_55; + const real_t tmp_qloop_57 = tmp_qloop_4*tmp_qloop_54; + const real_t tmp_qloop_58 = tmp_qloop_32*tmp_qloop_4; + const real_t tmp_qloop_59 = tmp_qloop_35*tmp_qloop_58; + const real_t tmp_qloop_60 = tmp_qloop_37*tmp_qloop_58; + const real_t q_tmp_0_0 = tmp_qloop_12*tmp_qloop_34; + const real_t q_tmp_0_1 = tmp_qloop_24*tmp_qloop_34; + const real_t q_tmp_0_2 = tmp_qloop_25*tmp_qloop_34; + const real_t q_tmp_0_3 = tmp_qloop_26*tmp_qloop_34; + const real_t q_tmp_0_4 = tmp_qloop_34*tmp_qloop_7; + const real_t q_tmp_0_5 = tmp_qloop_10*tmp_qloop_34; + const real_t q_tmp_0_6 = tmp_qloop_34*tmp_qloop_9; + const real_t q_tmp_0_7 = tmp_qloop_27*tmp_qloop_34; + const real_t q_tmp_0_8 = tmp_qloop_29*tmp_qloop_34; + const real_t q_tmp_0_9 = tmp_qloop_31*tmp_qloop_34; + const real_t q_tmp_0_10 = tmp_qloop_12*tmp_qloop_36; + const real_t q_tmp_0_11 = tmp_qloop_24*tmp_qloop_36; + const real_t q_tmp_0_12 = tmp_qloop_25*tmp_qloop_36; + const real_t q_tmp_0_13 = tmp_qloop_26*tmp_qloop_36; + const real_t q_tmp_0_14 = tmp_qloop_36*tmp_qloop_7; + const real_t q_tmp_0_15 = tmp_qloop_10*tmp_qloop_36; + const real_t q_tmp_0_16 = tmp_qloop_36*tmp_qloop_9; + const real_t q_tmp_0_17 = tmp_qloop_27*tmp_qloop_36; + const real_t q_tmp_0_18 = tmp_qloop_29*tmp_qloop_36; + const real_t q_tmp_0_19 = tmp_qloop_31*tmp_qloop_36; + const real_t q_tmp_0_20 = tmp_qloop_12*tmp_qloop_38; + const real_t q_tmp_0_21 = tmp_qloop_24*tmp_qloop_38; + const real_t q_tmp_0_22 = tmp_qloop_25*tmp_qloop_38; + const real_t q_tmp_0_23 = tmp_qloop_26*tmp_qloop_38; + const real_t q_tmp_0_24 = tmp_qloop_38*tmp_qloop_7; + const real_t q_tmp_0_25 = tmp_qloop_10*tmp_qloop_38; + const real_t q_tmp_0_26 = tmp_qloop_38*tmp_qloop_9; + const real_t q_tmp_0_27 = tmp_qloop_27*tmp_qloop_38; + const real_t q_tmp_0_28 = tmp_qloop_29*tmp_qloop_38; + const real_t q_tmp_0_29 = tmp_qloop_31*tmp_qloop_38; + const real_t q_tmp_1_0 = tmp_qloop_12*tmp_qloop_40; + const real_t q_tmp_1_1 = tmp_qloop_24*tmp_qloop_40; + const real_t q_tmp_1_2 = tmp_qloop_25*tmp_qloop_40; + const real_t q_tmp_1_3 = tmp_qloop_26*tmp_qloop_40; + const real_t q_tmp_1_4 = tmp_qloop_43; + const real_t q_tmp_1_5 = tmp_qloop_30*tmp_qloop_42; + const real_t q_tmp_1_6 = tmp_qloop_30*tmp_qloop_45; + const real_t q_tmp_1_7 = tmp_qloop_27*tmp_qloop_40; + const real_t q_tmp_1_8 = tmp_qloop_29*tmp_qloop_40; + const real_t q_tmp_1_9 = tmp_qloop_31*tmp_qloop_40; + const real_t q_tmp_1_10 = tmp_qloop_12*tmp_qloop_46; + const real_t q_tmp_1_11 = tmp_qloop_24*tmp_qloop_46; + const real_t q_tmp_1_12 = tmp_qloop_25*tmp_qloop_46; + const real_t q_tmp_1_13 = tmp_qloop_26*tmp_qloop_46; + const real_t q_tmp_1_14 = tmp_qloop_48; + const real_t q_tmp_1_15 = tmp_qloop_30*tmp_qloop_47; + const real_t q_tmp_1_16 = tmp_qloop_30*tmp_qloop_49; + const real_t q_tmp_1_17 = tmp_qloop_27*tmp_qloop_46; + const real_t q_tmp_1_18 = tmp_qloop_29*tmp_qloop_46; + const real_t q_tmp_1_19 = tmp_qloop_31*tmp_qloop_46; + const real_t q_tmp_1_20 = tmp_qloop_12*tmp_qloop_50; + const real_t q_tmp_1_21 = tmp_qloop_24*tmp_qloop_50; + const real_t q_tmp_1_22 = tmp_qloop_25*tmp_qloop_50; + const real_t q_tmp_1_23 = tmp_qloop_26*tmp_qloop_50; + const real_t q_tmp_1_24 = tmp_qloop_52; + const real_t q_tmp_1_25 = tmp_qloop_30*tmp_qloop_51; + const real_t q_tmp_1_26 = tmp_qloop_30*tmp_qloop_53; + const real_t q_tmp_1_27 = tmp_qloop_27*tmp_qloop_50; + const real_t q_tmp_1_28 = tmp_qloop_29*tmp_qloop_50; + const real_t q_tmp_1_29 = tmp_qloop_31*tmp_qloop_50; + const real_t q_tmp_2_0 = tmp_qloop_12*tmp_qloop_45; + const real_t q_tmp_2_1 = tmp_qloop_24*tmp_qloop_45; + const real_t q_tmp_2_2 = tmp_qloop_25*tmp_qloop_45; + const real_t q_tmp_2_3 = tmp_qloop_26*tmp_qloop_45; + const real_t q_tmp_2_4 = tmp_qloop_28*tmp_qloop_42; + const real_t q_tmp_2_5 = tmp_qloop_43; + const real_t q_tmp_2_6 = tmp_qloop_54*tmp_qloop_55; + const real_t q_tmp_2_7 = tmp_qloop_27*tmp_qloop_45; + const real_t q_tmp_2_8 = tmp_qloop_29*tmp_qloop_45; + const real_t q_tmp_2_9 = tmp_qloop_31*tmp_qloop_45; + const real_t q_tmp_2_10 = tmp_qloop_12*tmp_qloop_49; + const real_t q_tmp_2_11 = tmp_qloop_24*tmp_qloop_49; + const real_t q_tmp_2_12 = tmp_qloop_25*tmp_qloop_49; + const real_t q_tmp_2_13 = tmp_qloop_26*tmp_qloop_49; + const real_t q_tmp_2_14 = tmp_qloop_28*tmp_qloop_47; + const real_t q_tmp_2_15 = tmp_qloop_48; + const real_t q_tmp_2_16 = tmp_qloop_35*tmp_qloop_56; + const real_t q_tmp_2_17 = tmp_qloop_27*tmp_qloop_49; + const real_t q_tmp_2_18 = tmp_qloop_29*tmp_qloop_49; + const real_t q_tmp_2_19 = tmp_qloop_31*tmp_qloop_49; + const real_t q_tmp_2_20 = tmp_qloop_12*tmp_qloop_53; + const real_t q_tmp_2_21 = tmp_qloop_24*tmp_qloop_53; + const real_t q_tmp_2_22 = tmp_qloop_25*tmp_qloop_53; + const real_t q_tmp_2_23 = tmp_qloop_26*tmp_qloop_53; + const real_t q_tmp_2_24 = tmp_qloop_28*tmp_qloop_51; + const real_t q_tmp_2_25 = tmp_qloop_52; + const real_t q_tmp_2_26 = tmp_qloop_37*tmp_qloop_56; + const real_t q_tmp_2_27 = tmp_qloop_27*tmp_qloop_53; + const real_t q_tmp_2_28 = tmp_qloop_29*tmp_qloop_53; + const real_t q_tmp_2_29 = tmp_qloop_31*tmp_qloop_53; + const real_t q_tmp_3_0 = tmp_qloop_12*tmp_qloop_42; + const real_t q_tmp_3_1 = tmp_qloop_24*tmp_qloop_42; + const real_t q_tmp_3_2 = tmp_qloop_25*tmp_qloop_42; + const real_t q_tmp_3_3 = tmp_qloop_26*tmp_qloop_42; + const real_t q_tmp_3_4 = tmp_qloop_57*tmp_qloop_6; + const real_t q_tmp_3_5 = tmp_qloop_57*tmp_qloop_8; + const real_t q_tmp_3_6 = tmp_qloop_43; + const real_t q_tmp_3_7 = tmp_qloop_27*tmp_qloop_42; + const real_t q_tmp_3_8 = tmp_qloop_29*tmp_qloop_42; + const real_t q_tmp_3_9 = tmp_qloop_31*tmp_qloop_42; + const real_t q_tmp_3_10 = tmp_qloop_12*tmp_qloop_47; + const real_t q_tmp_3_11 = tmp_qloop_24*tmp_qloop_47; + const real_t q_tmp_3_12 = tmp_qloop_25*tmp_qloop_47; + const real_t q_tmp_3_13 = tmp_qloop_26*tmp_qloop_47; + const real_t q_tmp_3_14 = tmp_qloop_59*tmp_qloop_6; + const real_t q_tmp_3_15 = tmp_qloop_59*tmp_qloop_8; + const real_t q_tmp_3_16 = tmp_qloop_48; + const real_t q_tmp_3_17 = tmp_qloop_27*tmp_qloop_47; + const real_t q_tmp_3_18 = tmp_qloop_29*tmp_qloop_47; + const real_t q_tmp_3_19 = tmp_qloop_31*tmp_qloop_47; + const real_t q_tmp_3_20 = tmp_qloop_12*tmp_qloop_51; + const real_t q_tmp_3_21 = tmp_qloop_24*tmp_qloop_51; + const real_t q_tmp_3_22 = tmp_qloop_25*tmp_qloop_51; + const real_t q_tmp_3_23 = tmp_qloop_26*tmp_qloop_51; + const real_t q_tmp_3_24 = tmp_qloop_6*tmp_qloop_60; + const real_t q_tmp_3_25 = tmp_qloop_60*tmp_qloop_8; + const real_t q_tmp_3_26 = tmp_qloop_52; + const real_t q_tmp_3_27 = tmp_qloop_27*tmp_qloop_51; + const real_t q_tmp_3_28 = tmp_qloop_29*tmp_qloop_51; + const real_t q_tmp_3_29 = tmp_qloop_31*tmp_qloop_51; + q_acc_0_0 = q_acc_0_0 + q_tmp_0_0; + q_acc_0_1 = q_acc_0_1 + q_tmp_0_1; + q_acc_0_2 = q_acc_0_2 + q_tmp_0_2; + q_acc_0_3 = q_acc_0_3 + q_tmp_0_3; + q_acc_0_4 = q_acc_0_4 + q_tmp_0_4; + q_acc_0_5 = q_acc_0_5 + q_tmp_0_5; + q_acc_0_6 = q_acc_0_6 + q_tmp_0_6; + q_acc_0_7 = q_acc_0_7 + q_tmp_0_7; + q_acc_0_8 = q_acc_0_8 + q_tmp_0_8; + q_acc_0_9 = q_acc_0_9 + q_tmp_0_9; + q_acc_0_10 = q_acc_0_10 + q_tmp_0_10; + q_acc_0_11 = q_acc_0_11 + q_tmp_0_11; + q_acc_0_12 = q_acc_0_12 + q_tmp_0_12; + q_acc_0_13 = q_acc_0_13 + q_tmp_0_13; + q_acc_0_14 = q_acc_0_14 + q_tmp_0_14; + q_acc_0_15 = q_acc_0_15 + q_tmp_0_15; + q_acc_0_16 = q_acc_0_16 + q_tmp_0_16; + q_acc_0_17 = q_acc_0_17 + q_tmp_0_17; + q_acc_0_18 = q_acc_0_18 + q_tmp_0_18; + q_acc_0_19 = q_acc_0_19 + q_tmp_0_19; + q_acc_0_20 = q_acc_0_20 + q_tmp_0_20; + q_acc_0_21 = q_acc_0_21 + q_tmp_0_21; + q_acc_0_22 = q_acc_0_22 + q_tmp_0_22; + q_acc_0_23 = q_acc_0_23 + q_tmp_0_23; + q_acc_0_24 = q_acc_0_24 + q_tmp_0_24; + q_acc_0_25 = q_acc_0_25 + q_tmp_0_25; + q_acc_0_26 = q_acc_0_26 + q_tmp_0_26; + q_acc_0_27 = q_acc_0_27 + q_tmp_0_27; + q_acc_0_28 = q_acc_0_28 + q_tmp_0_28; + q_acc_0_29 = q_acc_0_29 + q_tmp_0_29; + q_acc_1_0 = q_acc_1_0 + q_tmp_1_0; + q_acc_1_1 = q_acc_1_1 + q_tmp_1_1; + q_acc_1_2 = q_acc_1_2 + q_tmp_1_2; + q_acc_1_3 = q_acc_1_3 + q_tmp_1_3; + q_acc_1_4 = q_acc_1_4 + q_tmp_1_4; + q_acc_1_5 = q_acc_1_5 + q_tmp_1_5; + q_acc_1_6 = q_acc_1_6 + q_tmp_1_6; + q_acc_1_7 = q_acc_1_7 + q_tmp_1_7; + q_acc_1_8 = q_acc_1_8 + q_tmp_1_8; + q_acc_1_9 = q_acc_1_9 + q_tmp_1_9; + q_acc_1_10 = q_acc_1_10 + q_tmp_1_10; + q_acc_1_11 = q_acc_1_11 + q_tmp_1_11; + q_acc_1_12 = q_acc_1_12 + q_tmp_1_12; + q_acc_1_13 = q_acc_1_13 + q_tmp_1_13; + q_acc_1_14 = q_acc_1_14 + q_tmp_1_14; + q_acc_1_15 = q_acc_1_15 + q_tmp_1_15; + q_acc_1_16 = q_acc_1_16 + q_tmp_1_16; + q_acc_1_17 = q_acc_1_17 + q_tmp_1_17; + q_acc_1_18 = q_acc_1_18 + q_tmp_1_18; + q_acc_1_19 = q_acc_1_19 + q_tmp_1_19; + q_acc_1_20 = q_acc_1_20 + q_tmp_1_20; + q_acc_1_21 = q_acc_1_21 + q_tmp_1_21; + q_acc_1_22 = q_acc_1_22 + q_tmp_1_22; + q_acc_1_23 = q_acc_1_23 + q_tmp_1_23; + q_acc_1_24 = q_acc_1_24 + q_tmp_1_24; + q_acc_1_25 = q_acc_1_25 + q_tmp_1_25; + q_acc_1_26 = q_acc_1_26 + q_tmp_1_26; + q_acc_1_27 = q_acc_1_27 + q_tmp_1_27; + q_acc_1_28 = q_acc_1_28 + q_tmp_1_28; + q_acc_1_29 = q_acc_1_29 + q_tmp_1_29; + q_acc_2_0 = q_acc_2_0 + q_tmp_2_0; + q_acc_2_1 = q_acc_2_1 + q_tmp_2_1; + q_acc_2_2 = q_acc_2_2 + q_tmp_2_2; + q_acc_2_3 = q_acc_2_3 + q_tmp_2_3; + q_acc_2_4 = q_acc_2_4 + q_tmp_2_4; + q_acc_2_5 = q_acc_2_5 + q_tmp_2_5; + q_acc_2_6 = q_acc_2_6 + q_tmp_2_6; + q_acc_2_7 = q_acc_2_7 + q_tmp_2_7; + q_acc_2_8 = q_acc_2_8 + q_tmp_2_8; + q_acc_2_9 = q_acc_2_9 + q_tmp_2_9; + q_acc_2_10 = q_acc_2_10 + q_tmp_2_10; + q_acc_2_11 = q_acc_2_11 + q_tmp_2_11; + q_acc_2_12 = q_acc_2_12 + q_tmp_2_12; + q_acc_2_13 = q_acc_2_13 + q_tmp_2_13; + q_acc_2_14 = q_acc_2_14 + q_tmp_2_14; + q_acc_2_15 = q_acc_2_15 + q_tmp_2_15; + q_acc_2_16 = q_acc_2_16 + q_tmp_2_16; + q_acc_2_17 = q_acc_2_17 + q_tmp_2_17; + q_acc_2_18 = q_acc_2_18 + q_tmp_2_18; + q_acc_2_19 = q_acc_2_19 + q_tmp_2_19; + q_acc_2_20 = q_acc_2_20 + q_tmp_2_20; + q_acc_2_21 = q_acc_2_21 + q_tmp_2_21; + q_acc_2_22 = q_acc_2_22 + q_tmp_2_22; + q_acc_2_23 = q_acc_2_23 + q_tmp_2_23; + q_acc_2_24 = q_acc_2_24 + q_tmp_2_24; + q_acc_2_25 = q_acc_2_25 + q_tmp_2_25; + q_acc_2_26 = q_acc_2_26 + q_tmp_2_26; + q_acc_2_27 = q_acc_2_27 + q_tmp_2_27; + q_acc_2_28 = q_acc_2_28 + q_tmp_2_28; + q_acc_2_29 = q_acc_2_29 + q_tmp_2_29; + q_acc_3_0 = q_acc_3_0 + q_tmp_3_0; + q_acc_3_1 = q_acc_3_1 + q_tmp_3_1; + q_acc_3_2 = q_acc_3_2 + q_tmp_3_2; + q_acc_3_3 = q_acc_3_3 + q_tmp_3_3; + q_acc_3_4 = q_acc_3_4 + q_tmp_3_4; + q_acc_3_5 = q_acc_3_5 + q_tmp_3_5; + q_acc_3_6 = q_acc_3_6 + q_tmp_3_6; + q_acc_3_7 = q_acc_3_7 + q_tmp_3_7; + q_acc_3_8 = q_acc_3_8 + q_tmp_3_8; + q_acc_3_9 = q_acc_3_9 + q_tmp_3_9; + q_acc_3_10 = q_acc_3_10 + q_tmp_3_10; + q_acc_3_11 = q_acc_3_11 + q_tmp_3_11; + q_acc_3_12 = q_acc_3_12 + q_tmp_3_12; + q_acc_3_13 = q_acc_3_13 + q_tmp_3_13; + q_acc_3_14 = q_acc_3_14 + q_tmp_3_14; + q_acc_3_15 = q_acc_3_15 + q_tmp_3_15; + q_acc_3_16 = q_acc_3_16 + q_tmp_3_16; + q_acc_3_17 = q_acc_3_17 + q_tmp_3_17; + q_acc_3_18 = q_acc_3_18 + q_tmp_3_18; + q_acc_3_19 = q_acc_3_19 + q_tmp_3_19; + q_acc_3_20 = q_acc_3_20 + q_tmp_3_20; + q_acc_3_21 = q_acc_3_21 + q_tmp_3_21; + q_acc_3_22 = q_acc_3_22 + q_tmp_3_22; + q_acc_3_23 = q_acc_3_23 + q_tmp_3_23; + q_acc_3_24 = q_acc_3_24 + q_tmp_3_24; + q_acc_3_25 = q_acc_3_25 + q_tmp_3_25; + q_acc_3_26 = q_acc_3_26 + q_tmp_3_26; + q_acc_3_27 = q_acc_3_27 + q_tmp_3_27; + q_acc_3_28 = q_acc_3_28 + q_tmp_3_28; + q_acc_3_29 = q_acc_3_29 + q_tmp_3_29; + } + const real_t elMatVec_0 = q_acc_0_0*src_dof_0 + q_acc_0_1*src_dof_1 + q_acc_0_10*src_dof_10 + q_acc_0_11*src_dof_11 + q_acc_0_12*src_dof_12 + q_acc_0_13*src_dof_13 + q_acc_0_14*src_dof_14 + q_acc_0_15*src_dof_15 + q_acc_0_16*src_dof_16 + q_acc_0_17*src_dof_17 + q_acc_0_18*src_dof_18 + q_acc_0_19*src_dof_19 + q_acc_0_2*src_dof_2 + q_acc_0_20*src_dof_20 + q_acc_0_21*src_dof_21 + q_acc_0_22*src_dof_22 + q_acc_0_23*src_dof_23 + q_acc_0_24*src_dof_24 + q_acc_0_25*src_dof_25 + q_acc_0_26*src_dof_26 + q_acc_0_27*src_dof_27 + q_acc_0_28*src_dof_28 + q_acc_0_29*src_dof_29 + q_acc_0_3*src_dof_3 + q_acc_0_4*src_dof_4 + q_acc_0_5*src_dof_5 + q_acc_0_6*src_dof_6 + q_acc_0_7*src_dof_7 + q_acc_0_8*src_dof_8 + q_acc_0_9*src_dof_9; + const real_t elMatVec_1 = q_acc_1_0*src_dof_0 + q_acc_1_1*src_dof_1 + q_acc_1_10*src_dof_10 + q_acc_1_11*src_dof_11 + q_acc_1_12*src_dof_12 + q_acc_1_13*src_dof_13 + q_acc_1_14*src_dof_14 + q_acc_1_15*src_dof_15 + q_acc_1_16*src_dof_16 + q_acc_1_17*src_dof_17 + q_acc_1_18*src_dof_18 + q_acc_1_19*src_dof_19 + q_acc_1_2*src_dof_2 + q_acc_1_20*src_dof_20 + q_acc_1_21*src_dof_21 + q_acc_1_22*src_dof_22 + q_acc_1_23*src_dof_23 + q_acc_1_24*src_dof_24 + q_acc_1_25*src_dof_25 + q_acc_1_26*src_dof_26 + q_acc_1_27*src_dof_27 + q_acc_1_28*src_dof_28 + q_acc_1_29*src_dof_29 + q_acc_1_3*src_dof_3 + q_acc_1_4*src_dof_4 + q_acc_1_5*src_dof_5 + q_acc_1_6*src_dof_6 + q_acc_1_7*src_dof_7 + q_acc_1_8*src_dof_8 + q_acc_1_9*src_dof_9; + const real_t elMatVec_2 = q_acc_2_0*src_dof_0 + q_acc_2_1*src_dof_1 + q_acc_2_10*src_dof_10 + q_acc_2_11*src_dof_11 + q_acc_2_12*src_dof_12 + q_acc_2_13*src_dof_13 + q_acc_2_14*src_dof_14 + q_acc_2_15*src_dof_15 + q_acc_2_16*src_dof_16 + q_acc_2_17*src_dof_17 + q_acc_2_18*src_dof_18 + q_acc_2_19*src_dof_19 + q_acc_2_2*src_dof_2 + q_acc_2_20*src_dof_20 + q_acc_2_21*src_dof_21 + q_acc_2_22*src_dof_22 + q_acc_2_23*src_dof_23 + q_acc_2_24*src_dof_24 + q_acc_2_25*src_dof_25 + q_acc_2_26*src_dof_26 + q_acc_2_27*src_dof_27 + q_acc_2_28*src_dof_28 + q_acc_2_29*src_dof_29 + q_acc_2_3*src_dof_3 + q_acc_2_4*src_dof_4 + q_acc_2_5*src_dof_5 + q_acc_2_6*src_dof_6 + q_acc_2_7*src_dof_7 + q_acc_2_8*src_dof_8 + q_acc_2_9*src_dof_9; + const real_t elMatVec_3 = q_acc_3_0*src_dof_0 + q_acc_3_1*src_dof_1 + q_acc_3_10*src_dof_10 + q_acc_3_11*src_dof_11 + q_acc_3_12*src_dof_12 + q_acc_3_13*src_dof_13 + q_acc_3_14*src_dof_14 + q_acc_3_15*src_dof_15 + q_acc_3_16*src_dof_16 + q_acc_3_17*src_dof_17 + q_acc_3_18*src_dof_18 + q_acc_3_19*src_dof_19 + q_acc_3_2*src_dof_2 + q_acc_3_20*src_dof_20 + q_acc_3_21*src_dof_21 + q_acc_3_22*src_dof_22 + q_acc_3_23*src_dof_23 + q_acc_3_24*src_dof_24 + q_acc_3_25*src_dof_25 + q_acc_3_26*src_dof_26 + q_acc_3_27*src_dof_27 + q_acc_3_28*src_dof_28 + q_acc_3_29*src_dof_29 + q_acc_3_3*src_dof_3 + q_acc_3_4*src_dof_4 + q_acc_3_5*src_dof_5 + q_acc_3_6*src_dof_6 + q_acc_3_7*src_dof_7 + q_acc_3_8*src_dof_8 + q_acc_3_9*src_dof_9; + _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))] = elMatVec_0 + _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]; + _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1] = elMatVec_1 + _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]; + _data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))] = elMatVec_2 + _data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]; + _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))] = elMatVec_3 + _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]; + } + } + } + const real_t tmp_coords_jac_0_WHITE_DOWN = 1.0 / (micro_edges_per_macro_edge_float)*1.0; + const real_t tmp_coords_jac_1_WHITE_DOWN = tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0); + const real_t tmp_coords_jac_2_WHITE_DOWN = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0); + const real_t tmp_coords_jac_3_WHITE_DOWN = tmp_coords_jac_1_WHITE_DOWN + tmp_coords_jac_2_WHITE_DOWN; + const real_t tmp_coords_jac_4_WHITE_DOWN = tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1); + const real_t tmp_coords_jac_5_WHITE_DOWN = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1); + const real_t tmp_coords_jac_6_WHITE_DOWN = tmp_coords_jac_4_WHITE_DOWN + tmp_coords_jac_5_WHITE_DOWN; + const real_t tmp_coords_jac_7_WHITE_DOWN = tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2); + const real_t tmp_coords_jac_8_WHITE_DOWN = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2); + const real_t tmp_coords_jac_9_WHITE_DOWN = tmp_coords_jac_7_WHITE_DOWN + tmp_coords_jac_8_WHITE_DOWN; + const real_t tmp_coords_jac_10_WHITE_DOWN = tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0); + const real_t tmp_coords_jac_11_WHITE_DOWN = tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1); + const real_t tmp_coords_jac_12_WHITE_DOWN = tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2); + const real_t p_affine_const_0_0_WHITE_DOWN = tmp_coords_jac_3_WHITE_DOWN; + const real_t p_affine_const_0_1_WHITE_DOWN = tmp_coords_jac_6_WHITE_DOWN; + const real_t p_affine_const_0_2_WHITE_DOWN = tmp_coords_jac_9_WHITE_DOWN; + const real_t p_affine_const_1_0_WHITE_DOWN = tmp_coords_jac_10_WHITE_DOWN + tmp_coords_jac_2_WHITE_DOWN; + const real_t p_affine_const_1_1_WHITE_DOWN = tmp_coords_jac_11_WHITE_DOWN + tmp_coords_jac_5_WHITE_DOWN; + const real_t p_affine_const_1_2_WHITE_DOWN = tmp_coords_jac_12_WHITE_DOWN + tmp_coords_jac_8_WHITE_DOWN; + const real_t p_affine_const_2_0_WHITE_DOWN = macro_vertex_coord_id_0comp0 + tmp_coords_jac_10_WHITE_DOWN + tmp_coords_jac_1_WHITE_DOWN; + const real_t p_affine_const_2_1_WHITE_DOWN = macro_vertex_coord_id_0comp1 + tmp_coords_jac_11_WHITE_DOWN + tmp_coords_jac_4_WHITE_DOWN; + const real_t p_affine_const_2_2_WHITE_DOWN = macro_vertex_coord_id_0comp2 + tmp_coords_jac_12_WHITE_DOWN + tmp_coords_jac_7_WHITE_DOWN; + const real_t p_affine_const_3_0_WHITE_DOWN = tmp_coords_jac_10_WHITE_DOWN + tmp_coords_jac_3_WHITE_DOWN; + const real_t p_affine_const_3_1_WHITE_DOWN = tmp_coords_jac_11_WHITE_DOWN + tmp_coords_jac_6_WHITE_DOWN; + const real_t p_affine_const_3_2_WHITE_DOWN = tmp_coords_jac_12_WHITE_DOWN + tmp_coords_jac_9_WHITE_DOWN; + const real_t jac_affine_0_0_WHITE_DOWN = -p_affine_const_0_0_WHITE_DOWN + p_affine_const_1_0_WHITE_DOWN; + const real_t jac_affine_0_1_WHITE_DOWN = -p_affine_const_0_0_WHITE_DOWN + p_affine_const_2_0_WHITE_DOWN; + const real_t jac_affine_0_2_WHITE_DOWN = -p_affine_const_0_0_WHITE_DOWN + p_affine_const_3_0_WHITE_DOWN; + const real_t jac_affine_1_0_WHITE_DOWN = -p_affine_const_0_1_WHITE_DOWN + p_affine_const_1_1_WHITE_DOWN; + const real_t jac_affine_1_1_WHITE_DOWN = -p_affine_const_0_1_WHITE_DOWN + p_affine_const_2_1_WHITE_DOWN; + const real_t tmp_coords_jac_17_WHITE_DOWN = jac_affine_0_2_WHITE_DOWN*jac_affine_1_1_WHITE_DOWN; + const real_t jac_affine_1_2_WHITE_DOWN = -p_affine_const_0_1_WHITE_DOWN + p_affine_const_3_1_WHITE_DOWN; + const real_t tmp_coords_jac_15_WHITE_DOWN = jac_affine_0_1_WHITE_DOWN*jac_affine_1_2_WHITE_DOWN; + const real_t jac_affine_2_0_WHITE_DOWN = -p_affine_const_0_2_WHITE_DOWN + p_affine_const_1_2_WHITE_DOWN; + const real_t jac_affine_2_1_WHITE_DOWN = -p_affine_const_0_2_WHITE_DOWN + p_affine_const_2_2_WHITE_DOWN; + const real_t tmp_coords_jac_14_WHITE_DOWN = jac_affine_1_2_WHITE_DOWN*jac_affine_2_1_WHITE_DOWN; + const real_t jac_affine_2_2_WHITE_DOWN = -p_affine_const_0_2_WHITE_DOWN + p_affine_const_3_2_WHITE_DOWN; + const real_t tmp_coords_jac_13_WHITE_DOWN = jac_affine_1_1_WHITE_DOWN*jac_affine_2_2_WHITE_DOWN; + const real_t tmp_coords_jac_16_WHITE_DOWN = jac_affine_0_1_WHITE_DOWN*jac_affine_2_2_WHITE_DOWN; + const real_t tmp_coords_jac_18_WHITE_DOWN = jac_affine_0_0_WHITE_DOWN*tmp_coords_jac_13_WHITE_DOWN - jac_affine_0_0_WHITE_DOWN*tmp_coords_jac_14_WHITE_DOWN + jac_affine_0_2_WHITE_DOWN*jac_affine_1_0_WHITE_DOWN*jac_affine_2_1_WHITE_DOWN - jac_affine_1_0_WHITE_DOWN*tmp_coords_jac_16_WHITE_DOWN + jac_affine_2_0_WHITE_DOWN*tmp_coords_jac_15_WHITE_DOWN - jac_affine_2_0_WHITE_DOWN*tmp_coords_jac_17_WHITE_DOWN; + const real_t tmp_coords_jac_19_WHITE_DOWN = 1.0 / (tmp_coords_jac_18_WHITE_DOWN); + const real_t jac_affine_inv_0_0_WHITE_DOWN = tmp_coords_jac_19_WHITE_DOWN*(tmp_coords_jac_13_WHITE_DOWN - tmp_coords_jac_14_WHITE_DOWN); + const real_t jac_affine_inv_0_1_WHITE_DOWN = tmp_coords_jac_19_WHITE_DOWN*(jac_affine_0_2_WHITE_DOWN*jac_affine_2_1_WHITE_DOWN - tmp_coords_jac_16_WHITE_DOWN); + const real_t jac_affine_inv_0_2_WHITE_DOWN = tmp_coords_jac_19_WHITE_DOWN*(tmp_coords_jac_15_WHITE_DOWN - tmp_coords_jac_17_WHITE_DOWN); + const real_t jac_affine_inv_1_0_WHITE_DOWN = tmp_coords_jac_19_WHITE_DOWN*(-jac_affine_1_0_WHITE_DOWN*jac_affine_2_2_WHITE_DOWN + jac_affine_1_2_WHITE_DOWN*jac_affine_2_0_WHITE_DOWN); + const real_t jac_affine_inv_1_1_WHITE_DOWN = tmp_coords_jac_19_WHITE_DOWN*(jac_affine_0_0_WHITE_DOWN*jac_affine_2_2_WHITE_DOWN - jac_affine_0_2_WHITE_DOWN*jac_affine_2_0_WHITE_DOWN); + const real_t jac_affine_inv_1_2_WHITE_DOWN = tmp_coords_jac_19_WHITE_DOWN*(-jac_affine_0_0_WHITE_DOWN*jac_affine_1_2_WHITE_DOWN + jac_affine_0_2_WHITE_DOWN*jac_affine_1_0_WHITE_DOWN); + const real_t jac_affine_inv_2_0_WHITE_DOWN = tmp_coords_jac_19_WHITE_DOWN*(jac_affine_1_0_WHITE_DOWN*jac_affine_2_1_WHITE_DOWN - jac_affine_1_1_WHITE_DOWN*jac_affine_2_0_WHITE_DOWN); + const real_t jac_affine_inv_2_1_WHITE_DOWN = tmp_coords_jac_19_WHITE_DOWN*(-jac_affine_0_0_WHITE_DOWN*jac_affine_2_1_WHITE_DOWN + jac_affine_0_1_WHITE_DOWN*jac_affine_2_0_WHITE_DOWN); + const real_t jac_affine_inv_2_2_WHITE_DOWN = tmp_coords_jac_19_WHITE_DOWN*(jac_affine_0_0_WHITE_DOWN*jac_affine_1_1_WHITE_DOWN - jac_affine_0_1_WHITE_DOWN*jac_affine_1_0_WHITE_DOWN); + const real_t abs_det_jac_affine_WHITE_DOWN = abs(tmp_coords_jac_18_WHITE_DOWN); + { + /* CellType.WHITE_DOWN */ + for (int64_t ctr_2 = 0; ctr_2 < micro_edges_per_macro_edge; ctr_2 += 1) + for (int64_t ctr_1 = 0; ctr_1 < -ctr_2 + micro_edges_per_macro_edge; ctr_1 += 1) + { + for (int64_t ctr_0 = 0; ctr_0 < (int64_t)((-ctr_1 - ctr_2 + micro_edges_per_macro_edge - 2) / (4)) * (4); ctr_0 += 4) + { + + const int64_t phantom_ctr_0 = ctr_0; + real_t _data_float_loop_ctr_array_dim_0[4]; + _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0; + _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1; + _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2; + _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3; + real_t _data_float_loop_ctr_array_dim_1[4]; + _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1; + real_t _data_float_loop_ctr_array_dim_2[4]; + _data_float_loop_ctr_array_dim_2[0] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[1] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[2] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[3] = (real_t) ctr_2; + + const __m256d p_affine_0_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)); + const __m256d p_affine_0_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)); + const __m256d p_affine_0_2 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)); + const __m256d p_affine_1_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)); + const __m256d p_affine_1_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)); + const __m256d p_affine_1_2 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)); + const __m256d p_affine_2_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)); + const __m256d p_affine_2_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)); + const __m256d p_affine_2_2 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)); + const __m256d p_affine_3_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)); + const __m256d p_affine_3_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)); + const __m256d p_affine_3_2 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)); + const __m256d src_dof_0 = _mm256_loadu_pd(& _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]); + const __m256d src_dof_1 = _mm256_loadu_pd(& _data_src_vertex_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]); + const __m256d src_dof_2 = _mm256_loadu_pd(& _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]); + const __m256d src_dof_3 = _mm256_loadu_pd(& _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]); + const __m256d src_dof_4 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]); + const __m256d src_dof_5 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6)) + 1]); + const __m256d src_dof_6 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]); + const __m256d src_dof_7 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]); + const __m256d src_dof_8 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 5*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d src_dof_9 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]); + const __m256d src_dof_10 = _mm256_loadu_pd(& _data_src_vertex_1[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]); + const __m256d src_dof_11 = _mm256_loadu_pd(& _data_src_vertex_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]); + const __m256d src_dof_12 = _mm256_loadu_pd(& _data_src_vertex_1[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]); + const __m256d src_dof_13 = _mm256_loadu_pd(& _data_src_vertex_1[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]); + const __m256d src_dof_14 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]); + const __m256d src_dof_15 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6)) + 1]); + const __m256d src_dof_16 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]); + const __m256d src_dof_17 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]); + const __m256d src_dof_18 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 5*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d src_dof_19 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]); + const __m256d src_dof_20 = _mm256_loadu_pd(& _data_src_vertex_2[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]); + const __m256d src_dof_21 = _mm256_loadu_pd(& _data_src_vertex_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]); + const __m256d src_dof_22 = _mm256_loadu_pd(& _data_src_vertex_2[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]); + const __m256d src_dof_23 = _mm256_loadu_pd(& _data_src_vertex_2[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]); + const __m256d src_dof_24 = _mm256_loadu_pd(& _data_src_edge_2[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]); + const __m256d src_dof_25 = _mm256_loadu_pd(& _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6)) + 1]); + const __m256d src_dof_26 = _mm256_loadu_pd(& _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]); + const __m256d src_dof_27 = _mm256_loadu_pd(& _data_src_edge_2[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]); + const __m256d src_dof_28 = _mm256_loadu_pd(& _data_src_edge_2[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 5*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d src_dof_29 = _mm256_loadu_pd(& _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]); + const __m256d rho_dof_0 = _mm256_loadu_pd(& _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]); + const __m256d rho_dof_1 = _mm256_loadu_pd(& _data_rhoVertex[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]); + const __m256d rho_dof_2 = _mm256_loadu_pd(& _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]); + const __m256d rho_dof_3 = _mm256_loadu_pd(& _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]); + const __m256d rho_dof_4 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]); + const __m256d rho_dof_5 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6)) + 1]); + const __m256d rho_dof_6 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]); + const __m256d rho_dof_7 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]); + const __m256d rho_dof_8 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 5*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d rho_dof_9 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]); + __m256d q_acc_0_0 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_1 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_2 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_3 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_4 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_5 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_6 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_7 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_8 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_9 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_10 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_11 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_12 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_13 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_14 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_15 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_16 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_17 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_18 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_19 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_20 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_21 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_22 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_23 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_24 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_25 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_26 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_27 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_28 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_29 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_0 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_1 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_2 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_3 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_4 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_5 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_6 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_7 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_8 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_9 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_10 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_11 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_12 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_13 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_14 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_15 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_16 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_17 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_18 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_19 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_20 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_21 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_22 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_23 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_24 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_25 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_26 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_27 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_28 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_29 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_0 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_1 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_2 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_3 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_4 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_5 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_6 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_7 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_8 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_9 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_10 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_11 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_12 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_13 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_14 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_15 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_16 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_17 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_18 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_19 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_20 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_21 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_22 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_23 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_24 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_25 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_26 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_27 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_28 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_29 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_0 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_1 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_2 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_3 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_4 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_5 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_6 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_7 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_8 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_9 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_10 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_11 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_12 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_13 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_14 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_15 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_16 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_17 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_18 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_19 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_20 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_21 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_22 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_23 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_24 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_25 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_26 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_27 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_28 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_29 = _mm256_set_pd(0.0,0.0,0.0,0.0); + for (int64_t q = 0; q < 5; q += 1) + { + const __m256d tmp_qloop_0 = _mm256_mul_pd(_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])); + const __m256d tmp_qloop_1 = _mm256_mul_pd(tmp_qloop_0,_mm256_set_pd(2.0,2.0,2.0,2.0)); + const __m256d tmp_qloop_2 = _mm256_mul_pd(_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])); + const __m256d tmp_qloop_3 = _mm256_mul_pd(tmp_qloop_2,_mm256_set_pd(2.0,2.0,2.0,2.0)); + const __m256d tmp_qloop_4 = _mm256_mul_pd(_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q])); + const __m256d tmp_qloop_5 = _mm256_mul_pd(tmp_qloop_4,_mm256_set_pd(2.0,2.0,2.0,2.0)); + const __m256d tmp_qloop_6 = _mm256_mul_pd(_mm256_set_pd(4.0,4.0,4.0,4.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])); + const __m256d tmp_qloop_7 = _mm256_mul_pd(tmp_qloop_6,_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q])); + const __m256d tmp_qloop_8 = _mm256_mul_pd(_mm256_set_pd(4.0,4.0,4.0,4.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])); + const __m256d tmp_qloop_9 = _mm256_mul_pd(tmp_qloop_8,_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])); + const __m256d tmp_qloop_10 = _mm256_mul_pd(tmp_qloop_8,_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q])); + const __m256d tmp_qloop_11 = _mm256_add_pd(tmp_qloop_10,tmp_qloop_9); + const __m256d tmp_qloop_12 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),_mm256_mul_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),tmp_qloop_1),tmp_qloop_11),tmp_qloop_3),tmp_qloop_5),tmp_qloop_7); + const __m256d tmp_qloop_13 = _mm256_mul_pd(_mm256_set_pd(4.0,4.0,4.0,4.0),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q])); + const __m256d tmp_qloop_14 = _mm256_add_pd(tmp_qloop_13,tmp_qloop_6); + const __m256d tmp_qloop_15 = _mm256_mul_pd(_mm256_mul_pd(rho_dof_8,tmp_qloop_6),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)); + const __m256d tmp_qloop_16 = _mm256_mul_pd(rho_dof_0,_mm256_add_pd(_mm256_add_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),tmp_qloop_14),tmp_qloop_8)); + const __m256d tmp_qloop_17 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(rho_dof_7,tmp_qloop_13),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),tmp_qloop_16); + const __m256d tmp_qloop_18 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(rho_dof_1,_mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),tmp_qloop_8)),_mm256_mul_pd(rho_dof_9,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_14,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_set_pd(-8.0,-8.0,-8.0,-8.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]))),_mm256_set_pd(4.0,4.0,4.0,4.0)))),_mm256_mul_pd(rho_dof_5,tmp_qloop_13)),_mm256_mul_pd(rho_dof_6,tmp_qloop_6)),tmp_qloop_15),tmp_qloop_17); + const __m256d tmp_qloop_19 = _mm256_add_pd(_mm256_set_pd(-4.0,-4.0,-4.0,-4.0),tmp_qloop_8); + const __m256d tmp_qloop_20 = _mm256_mul_pd(_mm256_mul_pd(rho_dof_9,tmp_qloop_8),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)); + const __m256d tmp_qloop_21 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(rho_dof_2,_mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),tmp_qloop_6)),_mm256_mul_pd(rho_dof_8,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_13,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_19,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(_mm256_set_pd(-8.0,-8.0,-8.0,-8.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))))),_mm256_mul_pd(rho_dof_4,tmp_qloop_13)),_mm256_mul_pd(rho_dof_6,tmp_qloop_8)),tmp_qloop_17),tmp_qloop_20); + const __m256d tmp_qloop_22 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(rho_dof_3,_mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),tmp_qloop_13)),_mm256_mul_pd(rho_dof_7,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_19,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_6,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(_mm256_set_pd(-8.0,-8.0,-8.0,-8.0),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]))))),_mm256_mul_pd(rho_dof_4,tmp_qloop_6)),_mm256_mul_pd(rho_dof_5,tmp_qloop_8)),tmp_qloop_15),tmp_qloop_16),tmp_qloop_20); + const __m256d tmp_qloop_23 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_18,_mm256_set_pd(jac_affine_inv_0_0_WHITE_DOWN,jac_affine_inv_0_0_WHITE_DOWN,jac_affine_inv_0_0_WHITE_DOWN,jac_affine_inv_0_0_WHITE_DOWN)),_mm256_mul_pd(tmp_qloop_21,_mm256_set_pd(jac_affine_inv_1_0_WHITE_DOWN,jac_affine_inv_1_0_WHITE_DOWN,jac_affine_inv_1_0_WHITE_DOWN,jac_affine_inv_1_0_WHITE_DOWN))),_mm256_mul_pd(tmp_qloop_22,_mm256_set_pd(jac_affine_inv_2_0_WHITE_DOWN,jac_affine_inv_2_0_WHITE_DOWN,jac_affine_inv_2_0_WHITE_DOWN,jac_affine_inv_2_0_WHITE_DOWN))); + const __m256d tmp_qloop_24 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),tmp_qloop_1); + const __m256d tmp_qloop_25 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])),tmp_qloop_3); + const __m256d tmp_qloop_26 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q])),tmp_qloop_5); + const __m256d tmp_qloop_27 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_10,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_7,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(tmp_qloop_4,_mm256_set_pd(-4.0,-4.0,-4.0,-4.0))),tmp_qloop_13); + const __m256d tmp_qloop_28 = _mm256_mul_pd(tmp_qloop_2,_mm256_set_pd(4.0,4.0,4.0,4.0)); + const __m256d tmp_qloop_29 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_28,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_7,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(tmp_qloop_9,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_6); + const __m256d tmp_qloop_30 = _mm256_mul_pd(tmp_qloop_0,_mm256_set_pd(4.0,4.0,4.0,4.0)); + const __m256d tmp_qloop_31 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_11,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_30,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_8); + const __m256d tmp_qloop_32 = _mm256_mul_pd(_mm256_mul_pd(_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(rho_dof_0,tmp_qloop_12),_mm256_mul_pd(rho_dof_1,tmp_qloop_24)),_mm256_mul_pd(rho_dof_2,tmp_qloop_25)),_mm256_mul_pd(rho_dof_3,tmp_qloop_26)),_mm256_mul_pd(rho_dof_4,tmp_qloop_7)),_mm256_mul_pd(rho_dof_5,tmp_qloop_10)),_mm256_mul_pd(rho_dof_6,tmp_qloop_9)),_mm256_mul_pd(rho_dof_7,tmp_qloop_27)),_mm256_mul_pd(rho_dof_8,tmp_qloop_29)),_mm256_mul_pd(rho_dof_9,tmp_qloop_31))),_mm256_set_pd(_data_q_w[q],_data_q_w[q],_data_q_w[q],_data_q_w[q])),_mm256_set_pd(abs_det_jac_affine_WHITE_DOWN,abs_det_jac_affine_WHITE_DOWN,abs_det_jac_affine_WHITE_DOWN,abs_det_jac_affine_WHITE_DOWN)); + const __m256d tmp_qloop_33 = _mm256_mul_pd(tmp_qloop_32,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]))),_mm256_set_pd(1.0,1.0,1.0,1.0))); + const __m256d tmp_qloop_34 = _mm256_mul_pd(tmp_qloop_23,tmp_qloop_33); + const __m256d tmp_qloop_35 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_18,_mm256_set_pd(jac_affine_inv_0_1_WHITE_DOWN,jac_affine_inv_0_1_WHITE_DOWN,jac_affine_inv_0_1_WHITE_DOWN,jac_affine_inv_0_1_WHITE_DOWN)),_mm256_mul_pd(tmp_qloop_21,_mm256_set_pd(jac_affine_inv_1_1_WHITE_DOWN,jac_affine_inv_1_1_WHITE_DOWN,jac_affine_inv_1_1_WHITE_DOWN,jac_affine_inv_1_1_WHITE_DOWN))),_mm256_mul_pd(tmp_qloop_22,_mm256_set_pd(jac_affine_inv_2_1_WHITE_DOWN,jac_affine_inv_2_1_WHITE_DOWN,jac_affine_inv_2_1_WHITE_DOWN,jac_affine_inv_2_1_WHITE_DOWN))); + const __m256d tmp_qloop_36 = _mm256_mul_pd(tmp_qloop_33,tmp_qloop_35); + const __m256d tmp_qloop_37 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_18,_mm256_set_pd(jac_affine_inv_0_2_WHITE_DOWN,jac_affine_inv_0_2_WHITE_DOWN,jac_affine_inv_0_2_WHITE_DOWN,jac_affine_inv_0_2_WHITE_DOWN)),_mm256_mul_pd(tmp_qloop_21,_mm256_set_pd(jac_affine_inv_1_2_WHITE_DOWN,jac_affine_inv_1_2_WHITE_DOWN,jac_affine_inv_1_2_WHITE_DOWN,jac_affine_inv_1_2_WHITE_DOWN))),_mm256_mul_pd(tmp_qloop_22,_mm256_set_pd(jac_affine_inv_2_2_WHITE_DOWN,jac_affine_inv_2_2_WHITE_DOWN,jac_affine_inv_2_2_WHITE_DOWN,jac_affine_inv_2_2_WHITE_DOWN))); + const __m256d tmp_qloop_38 = _mm256_mul_pd(tmp_qloop_33,tmp_qloop_37); + const __m256d tmp_qloop_39 = _mm256_mul_pd(tmp_qloop_32,_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])); + const __m256d tmp_qloop_40 = _mm256_mul_pd(tmp_qloop_23,tmp_qloop_39); + const __m256d tmp_qloop_41 = _mm256_mul_pd(tmp_qloop_32,_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q])); + const __m256d tmp_qloop_42 = _mm256_mul_pd(tmp_qloop_23,tmp_qloop_41); + const __m256d tmp_qloop_43 = _mm256_mul_pd(tmp_qloop_42,tmp_qloop_9); + const __m256d tmp_qloop_44 = _mm256_mul_pd(tmp_qloop_32,_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])); + const __m256d tmp_qloop_45 = _mm256_mul_pd(tmp_qloop_23,tmp_qloop_44); + const __m256d tmp_qloop_46 = _mm256_mul_pd(tmp_qloop_35,tmp_qloop_39); + const __m256d tmp_qloop_47 = _mm256_mul_pd(tmp_qloop_35,tmp_qloop_41); + const __m256d tmp_qloop_48 = _mm256_mul_pd(tmp_qloop_47,tmp_qloop_9); + const __m256d tmp_qloop_49 = _mm256_mul_pd(tmp_qloop_35,tmp_qloop_44); + const __m256d tmp_qloop_50 = _mm256_mul_pd(tmp_qloop_37,tmp_qloop_39); + const __m256d tmp_qloop_51 = _mm256_mul_pd(tmp_qloop_37,tmp_qloop_41); + const __m256d tmp_qloop_52 = _mm256_mul_pd(tmp_qloop_51,tmp_qloop_9); + const __m256d tmp_qloop_53 = _mm256_mul_pd(tmp_qloop_37,tmp_qloop_44); + const __m256d tmp_qloop_54 = _mm256_mul_pd(tmp_qloop_23,tmp_qloop_32); + const __m256d tmp_qloop_55 = _mm256_mul_pd(tmp_qloop_2,tmp_qloop_8); + const __m256d tmp_qloop_56 = _mm256_mul_pd(tmp_qloop_32,tmp_qloop_55); + const __m256d tmp_qloop_57 = _mm256_mul_pd(tmp_qloop_4,tmp_qloop_54); + const __m256d tmp_qloop_58 = _mm256_mul_pd(tmp_qloop_32,tmp_qloop_4); + const __m256d tmp_qloop_59 = _mm256_mul_pd(tmp_qloop_35,tmp_qloop_58); + const __m256d tmp_qloop_60 = _mm256_mul_pd(tmp_qloop_37,tmp_qloop_58); + const __m256d q_tmp_0_0 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_34); + const __m256d q_tmp_0_1 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_34); + const __m256d q_tmp_0_2 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_34); + const __m256d q_tmp_0_3 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_34); + const __m256d q_tmp_0_4 = _mm256_mul_pd(tmp_qloop_34,tmp_qloop_7); + const __m256d q_tmp_0_5 = _mm256_mul_pd(tmp_qloop_10,tmp_qloop_34); + const __m256d q_tmp_0_6 = _mm256_mul_pd(tmp_qloop_34,tmp_qloop_9); + const __m256d q_tmp_0_7 = _mm256_mul_pd(tmp_qloop_27,tmp_qloop_34); + const __m256d q_tmp_0_8 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_34); + const __m256d q_tmp_0_9 = _mm256_mul_pd(tmp_qloop_31,tmp_qloop_34); + const __m256d q_tmp_0_10 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_36); + const __m256d q_tmp_0_11 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_36); + const __m256d q_tmp_0_12 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_36); + const __m256d q_tmp_0_13 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_36); + const __m256d q_tmp_0_14 = _mm256_mul_pd(tmp_qloop_36,tmp_qloop_7); + const __m256d q_tmp_0_15 = _mm256_mul_pd(tmp_qloop_10,tmp_qloop_36); + const __m256d q_tmp_0_16 = _mm256_mul_pd(tmp_qloop_36,tmp_qloop_9); + const __m256d q_tmp_0_17 = _mm256_mul_pd(tmp_qloop_27,tmp_qloop_36); + const __m256d q_tmp_0_18 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_36); + const __m256d q_tmp_0_19 = _mm256_mul_pd(tmp_qloop_31,tmp_qloop_36); + const __m256d q_tmp_0_20 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_38); + const __m256d q_tmp_0_21 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_38); + const __m256d q_tmp_0_22 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_38); + const __m256d q_tmp_0_23 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_38); + const __m256d q_tmp_0_24 = _mm256_mul_pd(tmp_qloop_38,tmp_qloop_7); + const __m256d q_tmp_0_25 = _mm256_mul_pd(tmp_qloop_10,tmp_qloop_38); + const __m256d q_tmp_0_26 = _mm256_mul_pd(tmp_qloop_38,tmp_qloop_9); + const __m256d q_tmp_0_27 = _mm256_mul_pd(tmp_qloop_27,tmp_qloop_38); + const __m256d q_tmp_0_28 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_38); + const __m256d q_tmp_0_29 = _mm256_mul_pd(tmp_qloop_31,tmp_qloop_38); + const __m256d q_tmp_1_0 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_40); + const __m256d q_tmp_1_1 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_40); + const __m256d q_tmp_1_2 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_40); + const __m256d q_tmp_1_3 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_40); + const __m256d q_tmp_1_4 = tmp_qloop_43; + const __m256d q_tmp_1_5 = _mm256_mul_pd(tmp_qloop_30,tmp_qloop_42); + const __m256d q_tmp_1_6 = _mm256_mul_pd(tmp_qloop_30,tmp_qloop_45); + const __m256d q_tmp_1_7 = _mm256_mul_pd(tmp_qloop_27,tmp_qloop_40); + const __m256d q_tmp_1_8 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_40); + const __m256d q_tmp_1_9 = _mm256_mul_pd(tmp_qloop_31,tmp_qloop_40); + const __m256d q_tmp_1_10 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_46); + const __m256d q_tmp_1_11 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_46); + const __m256d q_tmp_1_12 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_46); + const __m256d q_tmp_1_13 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_46); + const __m256d q_tmp_1_14 = tmp_qloop_48; + const __m256d q_tmp_1_15 = _mm256_mul_pd(tmp_qloop_30,tmp_qloop_47); + const __m256d q_tmp_1_16 = _mm256_mul_pd(tmp_qloop_30,tmp_qloop_49); + const __m256d q_tmp_1_17 = _mm256_mul_pd(tmp_qloop_27,tmp_qloop_46); + const __m256d q_tmp_1_18 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_46); + const __m256d q_tmp_1_19 = _mm256_mul_pd(tmp_qloop_31,tmp_qloop_46); + const __m256d q_tmp_1_20 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_50); + const __m256d q_tmp_1_21 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_50); + const __m256d q_tmp_1_22 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_50); + const __m256d q_tmp_1_23 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_50); + const __m256d q_tmp_1_24 = tmp_qloop_52; + const __m256d q_tmp_1_25 = _mm256_mul_pd(tmp_qloop_30,tmp_qloop_51); + const __m256d q_tmp_1_26 = _mm256_mul_pd(tmp_qloop_30,tmp_qloop_53); + const __m256d q_tmp_1_27 = _mm256_mul_pd(tmp_qloop_27,tmp_qloop_50); + const __m256d q_tmp_1_28 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_50); + const __m256d q_tmp_1_29 = _mm256_mul_pd(tmp_qloop_31,tmp_qloop_50); + const __m256d q_tmp_2_0 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_45); + const __m256d q_tmp_2_1 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_45); + const __m256d q_tmp_2_2 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_45); + const __m256d q_tmp_2_3 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_45); + const __m256d q_tmp_2_4 = _mm256_mul_pd(tmp_qloop_28,tmp_qloop_42); + const __m256d q_tmp_2_5 = tmp_qloop_43; + const __m256d q_tmp_2_6 = _mm256_mul_pd(tmp_qloop_54,tmp_qloop_55); + const __m256d q_tmp_2_7 = _mm256_mul_pd(tmp_qloop_27,tmp_qloop_45); + const __m256d q_tmp_2_8 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_45); + const __m256d q_tmp_2_9 = _mm256_mul_pd(tmp_qloop_31,tmp_qloop_45); + const __m256d q_tmp_2_10 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_49); + const __m256d q_tmp_2_11 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_49); + const __m256d q_tmp_2_12 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_49); + const __m256d q_tmp_2_13 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_49); + const __m256d q_tmp_2_14 = _mm256_mul_pd(tmp_qloop_28,tmp_qloop_47); + const __m256d q_tmp_2_15 = tmp_qloop_48; + const __m256d q_tmp_2_16 = _mm256_mul_pd(tmp_qloop_35,tmp_qloop_56); + const __m256d q_tmp_2_17 = _mm256_mul_pd(tmp_qloop_27,tmp_qloop_49); + const __m256d q_tmp_2_18 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_49); + const __m256d q_tmp_2_19 = _mm256_mul_pd(tmp_qloop_31,tmp_qloop_49); + const __m256d q_tmp_2_20 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_53); + const __m256d q_tmp_2_21 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_53); + const __m256d q_tmp_2_22 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_53); + const __m256d q_tmp_2_23 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_53); + const __m256d q_tmp_2_24 = _mm256_mul_pd(tmp_qloop_28,tmp_qloop_51); + const __m256d q_tmp_2_25 = tmp_qloop_52; + const __m256d q_tmp_2_26 = _mm256_mul_pd(tmp_qloop_37,tmp_qloop_56); + const __m256d q_tmp_2_27 = _mm256_mul_pd(tmp_qloop_27,tmp_qloop_53); + const __m256d q_tmp_2_28 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_53); + const __m256d q_tmp_2_29 = _mm256_mul_pd(tmp_qloop_31,tmp_qloop_53); + const __m256d q_tmp_3_0 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_42); + const __m256d q_tmp_3_1 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_42); + const __m256d q_tmp_3_2 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_42); + const __m256d q_tmp_3_3 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_42); + const __m256d q_tmp_3_4 = _mm256_mul_pd(tmp_qloop_57,tmp_qloop_6); + const __m256d q_tmp_3_5 = _mm256_mul_pd(tmp_qloop_57,tmp_qloop_8); + const __m256d q_tmp_3_6 = tmp_qloop_43; + const __m256d q_tmp_3_7 = _mm256_mul_pd(tmp_qloop_27,tmp_qloop_42); + const __m256d q_tmp_3_8 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_42); + const __m256d q_tmp_3_9 = _mm256_mul_pd(tmp_qloop_31,tmp_qloop_42); + const __m256d q_tmp_3_10 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_47); + const __m256d q_tmp_3_11 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_47); + const __m256d q_tmp_3_12 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_47); + const __m256d q_tmp_3_13 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_47); + const __m256d q_tmp_3_14 = _mm256_mul_pd(tmp_qloop_59,tmp_qloop_6); + const __m256d q_tmp_3_15 = _mm256_mul_pd(tmp_qloop_59,tmp_qloop_8); + const __m256d q_tmp_3_16 = tmp_qloop_48; + const __m256d q_tmp_3_17 = _mm256_mul_pd(tmp_qloop_27,tmp_qloop_47); + const __m256d q_tmp_3_18 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_47); + const __m256d q_tmp_3_19 = _mm256_mul_pd(tmp_qloop_31,tmp_qloop_47); + const __m256d q_tmp_3_20 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_51); + const __m256d q_tmp_3_21 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_51); + const __m256d q_tmp_3_22 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_51); + const __m256d q_tmp_3_23 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_51); + const __m256d q_tmp_3_24 = _mm256_mul_pd(tmp_qloop_6,tmp_qloop_60); + const __m256d q_tmp_3_25 = _mm256_mul_pd(tmp_qloop_60,tmp_qloop_8); + const __m256d q_tmp_3_26 = tmp_qloop_52; + const __m256d q_tmp_3_27 = _mm256_mul_pd(tmp_qloop_27,tmp_qloop_51); + const __m256d q_tmp_3_28 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_51); + const __m256d q_tmp_3_29 = _mm256_mul_pd(tmp_qloop_31,tmp_qloop_51); + q_acc_0_0 = _mm256_add_pd(q_acc_0_0,q_tmp_0_0); + q_acc_0_1 = _mm256_add_pd(q_acc_0_1,q_tmp_0_1); + q_acc_0_2 = _mm256_add_pd(q_acc_0_2,q_tmp_0_2); + q_acc_0_3 = _mm256_add_pd(q_acc_0_3,q_tmp_0_3); + q_acc_0_4 = _mm256_add_pd(q_acc_0_4,q_tmp_0_4); + q_acc_0_5 = _mm256_add_pd(q_acc_0_5,q_tmp_0_5); + q_acc_0_6 = _mm256_add_pd(q_acc_0_6,q_tmp_0_6); + q_acc_0_7 = _mm256_add_pd(q_acc_0_7,q_tmp_0_7); + q_acc_0_8 = _mm256_add_pd(q_acc_0_8,q_tmp_0_8); + q_acc_0_9 = _mm256_add_pd(q_acc_0_9,q_tmp_0_9); + q_acc_0_10 = _mm256_add_pd(q_acc_0_10,q_tmp_0_10); + q_acc_0_11 = _mm256_add_pd(q_acc_0_11,q_tmp_0_11); + q_acc_0_12 = _mm256_add_pd(q_acc_0_12,q_tmp_0_12); + q_acc_0_13 = _mm256_add_pd(q_acc_0_13,q_tmp_0_13); + q_acc_0_14 = _mm256_add_pd(q_acc_0_14,q_tmp_0_14); + q_acc_0_15 = _mm256_add_pd(q_acc_0_15,q_tmp_0_15); + q_acc_0_16 = _mm256_add_pd(q_acc_0_16,q_tmp_0_16); + q_acc_0_17 = _mm256_add_pd(q_acc_0_17,q_tmp_0_17); + q_acc_0_18 = _mm256_add_pd(q_acc_0_18,q_tmp_0_18); + q_acc_0_19 = _mm256_add_pd(q_acc_0_19,q_tmp_0_19); + q_acc_0_20 = _mm256_add_pd(q_acc_0_20,q_tmp_0_20); + q_acc_0_21 = _mm256_add_pd(q_acc_0_21,q_tmp_0_21); + q_acc_0_22 = _mm256_add_pd(q_acc_0_22,q_tmp_0_22); + q_acc_0_23 = _mm256_add_pd(q_acc_0_23,q_tmp_0_23); + q_acc_0_24 = _mm256_add_pd(q_acc_0_24,q_tmp_0_24); + q_acc_0_25 = _mm256_add_pd(q_acc_0_25,q_tmp_0_25); + q_acc_0_26 = _mm256_add_pd(q_acc_0_26,q_tmp_0_26); + q_acc_0_27 = _mm256_add_pd(q_acc_0_27,q_tmp_0_27); + q_acc_0_28 = _mm256_add_pd(q_acc_0_28,q_tmp_0_28); + q_acc_0_29 = _mm256_add_pd(q_acc_0_29,q_tmp_0_29); + q_acc_1_0 = _mm256_add_pd(q_acc_1_0,q_tmp_1_0); + q_acc_1_1 = _mm256_add_pd(q_acc_1_1,q_tmp_1_1); + q_acc_1_2 = _mm256_add_pd(q_acc_1_2,q_tmp_1_2); + q_acc_1_3 = _mm256_add_pd(q_acc_1_3,q_tmp_1_3); + q_acc_1_4 = _mm256_add_pd(q_acc_1_4,q_tmp_1_4); + q_acc_1_5 = _mm256_add_pd(q_acc_1_5,q_tmp_1_5); + q_acc_1_6 = _mm256_add_pd(q_acc_1_6,q_tmp_1_6); + q_acc_1_7 = _mm256_add_pd(q_acc_1_7,q_tmp_1_7); + q_acc_1_8 = _mm256_add_pd(q_acc_1_8,q_tmp_1_8); + q_acc_1_9 = _mm256_add_pd(q_acc_1_9,q_tmp_1_9); + q_acc_1_10 = _mm256_add_pd(q_acc_1_10,q_tmp_1_10); + q_acc_1_11 = _mm256_add_pd(q_acc_1_11,q_tmp_1_11); + q_acc_1_12 = _mm256_add_pd(q_acc_1_12,q_tmp_1_12); + q_acc_1_13 = _mm256_add_pd(q_acc_1_13,q_tmp_1_13); + q_acc_1_14 = _mm256_add_pd(q_acc_1_14,q_tmp_1_14); + q_acc_1_15 = _mm256_add_pd(q_acc_1_15,q_tmp_1_15); + q_acc_1_16 = _mm256_add_pd(q_acc_1_16,q_tmp_1_16); + q_acc_1_17 = _mm256_add_pd(q_acc_1_17,q_tmp_1_17); + q_acc_1_18 = _mm256_add_pd(q_acc_1_18,q_tmp_1_18); + q_acc_1_19 = _mm256_add_pd(q_acc_1_19,q_tmp_1_19); + q_acc_1_20 = _mm256_add_pd(q_acc_1_20,q_tmp_1_20); + q_acc_1_21 = _mm256_add_pd(q_acc_1_21,q_tmp_1_21); + q_acc_1_22 = _mm256_add_pd(q_acc_1_22,q_tmp_1_22); + q_acc_1_23 = _mm256_add_pd(q_acc_1_23,q_tmp_1_23); + q_acc_1_24 = _mm256_add_pd(q_acc_1_24,q_tmp_1_24); + q_acc_1_25 = _mm256_add_pd(q_acc_1_25,q_tmp_1_25); + q_acc_1_26 = _mm256_add_pd(q_acc_1_26,q_tmp_1_26); + q_acc_1_27 = _mm256_add_pd(q_acc_1_27,q_tmp_1_27); + q_acc_1_28 = _mm256_add_pd(q_acc_1_28,q_tmp_1_28); + q_acc_1_29 = _mm256_add_pd(q_acc_1_29,q_tmp_1_29); + q_acc_2_0 = _mm256_add_pd(q_acc_2_0,q_tmp_2_0); + q_acc_2_1 = _mm256_add_pd(q_acc_2_1,q_tmp_2_1); + q_acc_2_2 = _mm256_add_pd(q_acc_2_2,q_tmp_2_2); + q_acc_2_3 = _mm256_add_pd(q_acc_2_3,q_tmp_2_3); + q_acc_2_4 = _mm256_add_pd(q_acc_2_4,q_tmp_2_4); + q_acc_2_5 = _mm256_add_pd(q_acc_2_5,q_tmp_2_5); + q_acc_2_6 = _mm256_add_pd(q_acc_2_6,q_tmp_2_6); + q_acc_2_7 = _mm256_add_pd(q_acc_2_7,q_tmp_2_7); + q_acc_2_8 = _mm256_add_pd(q_acc_2_8,q_tmp_2_8); + q_acc_2_9 = _mm256_add_pd(q_acc_2_9,q_tmp_2_9); + q_acc_2_10 = _mm256_add_pd(q_acc_2_10,q_tmp_2_10); + q_acc_2_11 = _mm256_add_pd(q_acc_2_11,q_tmp_2_11); + q_acc_2_12 = _mm256_add_pd(q_acc_2_12,q_tmp_2_12); + q_acc_2_13 = _mm256_add_pd(q_acc_2_13,q_tmp_2_13); + q_acc_2_14 = _mm256_add_pd(q_acc_2_14,q_tmp_2_14); + q_acc_2_15 = _mm256_add_pd(q_acc_2_15,q_tmp_2_15); + q_acc_2_16 = _mm256_add_pd(q_acc_2_16,q_tmp_2_16); + q_acc_2_17 = _mm256_add_pd(q_acc_2_17,q_tmp_2_17); + q_acc_2_18 = _mm256_add_pd(q_acc_2_18,q_tmp_2_18); + q_acc_2_19 = _mm256_add_pd(q_acc_2_19,q_tmp_2_19); + q_acc_2_20 = _mm256_add_pd(q_acc_2_20,q_tmp_2_20); + q_acc_2_21 = _mm256_add_pd(q_acc_2_21,q_tmp_2_21); + q_acc_2_22 = _mm256_add_pd(q_acc_2_22,q_tmp_2_22); + q_acc_2_23 = _mm256_add_pd(q_acc_2_23,q_tmp_2_23); + q_acc_2_24 = _mm256_add_pd(q_acc_2_24,q_tmp_2_24); + q_acc_2_25 = _mm256_add_pd(q_acc_2_25,q_tmp_2_25); + q_acc_2_26 = _mm256_add_pd(q_acc_2_26,q_tmp_2_26); + q_acc_2_27 = _mm256_add_pd(q_acc_2_27,q_tmp_2_27); + q_acc_2_28 = _mm256_add_pd(q_acc_2_28,q_tmp_2_28); + q_acc_2_29 = _mm256_add_pd(q_acc_2_29,q_tmp_2_29); + q_acc_3_0 = _mm256_add_pd(q_acc_3_0,q_tmp_3_0); + q_acc_3_1 = _mm256_add_pd(q_acc_3_1,q_tmp_3_1); + q_acc_3_2 = _mm256_add_pd(q_acc_3_2,q_tmp_3_2); + q_acc_3_3 = _mm256_add_pd(q_acc_3_3,q_tmp_3_3); + q_acc_3_4 = _mm256_add_pd(q_acc_3_4,q_tmp_3_4); + q_acc_3_5 = _mm256_add_pd(q_acc_3_5,q_tmp_3_5); + q_acc_3_6 = _mm256_add_pd(q_acc_3_6,q_tmp_3_6); + q_acc_3_7 = _mm256_add_pd(q_acc_3_7,q_tmp_3_7); + q_acc_3_8 = _mm256_add_pd(q_acc_3_8,q_tmp_3_8); + q_acc_3_9 = _mm256_add_pd(q_acc_3_9,q_tmp_3_9); + q_acc_3_10 = _mm256_add_pd(q_acc_3_10,q_tmp_3_10); + q_acc_3_11 = _mm256_add_pd(q_acc_3_11,q_tmp_3_11); + q_acc_3_12 = _mm256_add_pd(q_acc_3_12,q_tmp_3_12); + q_acc_3_13 = _mm256_add_pd(q_acc_3_13,q_tmp_3_13); + q_acc_3_14 = _mm256_add_pd(q_acc_3_14,q_tmp_3_14); + q_acc_3_15 = _mm256_add_pd(q_acc_3_15,q_tmp_3_15); + q_acc_3_16 = _mm256_add_pd(q_acc_3_16,q_tmp_3_16); + q_acc_3_17 = _mm256_add_pd(q_acc_3_17,q_tmp_3_17); + q_acc_3_18 = _mm256_add_pd(q_acc_3_18,q_tmp_3_18); + q_acc_3_19 = _mm256_add_pd(q_acc_3_19,q_tmp_3_19); + q_acc_3_20 = _mm256_add_pd(q_acc_3_20,q_tmp_3_20); + q_acc_3_21 = _mm256_add_pd(q_acc_3_21,q_tmp_3_21); + q_acc_3_22 = _mm256_add_pd(q_acc_3_22,q_tmp_3_22); + q_acc_3_23 = _mm256_add_pd(q_acc_3_23,q_tmp_3_23); + q_acc_3_24 = _mm256_add_pd(q_acc_3_24,q_tmp_3_24); + q_acc_3_25 = _mm256_add_pd(q_acc_3_25,q_tmp_3_25); + q_acc_3_26 = _mm256_add_pd(q_acc_3_26,q_tmp_3_26); + q_acc_3_27 = _mm256_add_pd(q_acc_3_27,q_tmp_3_27); + q_acc_3_28 = _mm256_add_pd(q_acc_3_28,q_tmp_3_28); + q_acc_3_29 = _mm256_add_pd(q_acc_3_29,q_tmp_3_29); + } + const __m256d elMatVec_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_0_0,src_dof_0),_mm256_mul_pd(q_acc_0_1,src_dof_1)),_mm256_mul_pd(q_acc_0_10,src_dof_10)),_mm256_mul_pd(q_acc_0_11,src_dof_11)),_mm256_mul_pd(q_acc_0_12,src_dof_12)),_mm256_mul_pd(q_acc_0_13,src_dof_13)),_mm256_mul_pd(q_acc_0_14,src_dof_14)),_mm256_mul_pd(q_acc_0_15,src_dof_15)),_mm256_mul_pd(q_acc_0_16,src_dof_16)),_mm256_mul_pd(q_acc_0_17,src_dof_17)),_mm256_mul_pd(q_acc_0_18,src_dof_18)),_mm256_mul_pd(q_acc_0_19,src_dof_19)),_mm256_mul_pd(q_acc_0_2,src_dof_2)),_mm256_mul_pd(q_acc_0_20,src_dof_20)),_mm256_mul_pd(q_acc_0_21,src_dof_21)),_mm256_mul_pd(q_acc_0_22,src_dof_22)),_mm256_mul_pd(q_acc_0_23,src_dof_23)),_mm256_mul_pd(q_acc_0_24,src_dof_24)),_mm256_mul_pd(q_acc_0_25,src_dof_25)),_mm256_mul_pd(q_acc_0_26,src_dof_26)),_mm256_mul_pd(q_acc_0_27,src_dof_27)),_mm256_mul_pd(q_acc_0_28,src_dof_28)),_mm256_mul_pd(q_acc_0_29,src_dof_29)),_mm256_mul_pd(q_acc_0_3,src_dof_3)),_mm256_mul_pd(q_acc_0_4,src_dof_4)),_mm256_mul_pd(q_acc_0_5,src_dof_5)),_mm256_mul_pd(q_acc_0_6,src_dof_6)),_mm256_mul_pd(q_acc_0_7,src_dof_7)),_mm256_mul_pd(q_acc_0_8,src_dof_8)),_mm256_mul_pd(q_acc_0_9,src_dof_9)); + const __m256d elMatVec_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_1_0,src_dof_0),_mm256_mul_pd(q_acc_1_1,src_dof_1)),_mm256_mul_pd(q_acc_1_10,src_dof_10)),_mm256_mul_pd(q_acc_1_11,src_dof_11)),_mm256_mul_pd(q_acc_1_12,src_dof_12)),_mm256_mul_pd(q_acc_1_13,src_dof_13)),_mm256_mul_pd(q_acc_1_14,src_dof_14)),_mm256_mul_pd(q_acc_1_15,src_dof_15)),_mm256_mul_pd(q_acc_1_16,src_dof_16)),_mm256_mul_pd(q_acc_1_17,src_dof_17)),_mm256_mul_pd(q_acc_1_18,src_dof_18)),_mm256_mul_pd(q_acc_1_19,src_dof_19)),_mm256_mul_pd(q_acc_1_2,src_dof_2)),_mm256_mul_pd(q_acc_1_20,src_dof_20)),_mm256_mul_pd(q_acc_1_21,src_dof_21)),_mm256_mul_pd(q_acc_1_22,src_dof_22)),_mm256_mul_pd(q_acc_1_23,src_dof_23)),_mm256_mul_pd(q_acc_1_24,src_dof_24)),_mm256_mul_pd(q_acc_1_25,src_dof_25)),_mm256_mul_pd(q_acc_1_26,src_dof_26)),_mm256_mul_pd(q_acc_1_27,src_dof_27)),_mm256_mul_pd(q_acc_1_28,src_dof_28)),_mm256_mul_pd(q_acc_1_29,src_dof_29)),_mm256_mul_pd(q_acc_1_3,src_dof_3)),_mm256_mul_pd(q_acc_1_4,src_dof_4)),_mm256_mul_pd(q_acc_1_5,src_dof_5)),_mm256_mul_pd(q_acc_1_6,src_dof_6)),_mm256_mul_pd(q_acc_1_7,src_dof_7)),_mm256_mul_pd(q_acc_1_8,src_dof_8)),_mm256_mul_pd(q_acc_1_9,src_dof_9)); + const __m256d elMatVec_2 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_2_0,src_dof_0),_mm256_mul_pd(q_acc_2_1,src_dof_1)),_mm256_mul_pd(q_acc_2_10,src_dof_10)),_mm256_mul_pd(q_acc_2_11,src_dof_11)),_mm256_mul_pd(q_acc_2_12,src_dof_12)),_mm256_mul_pd(q_acc_2_13,src_dof_13)),_mm256_mul_pd(q_acc_2_14,src_dof_14)),_mm256_mul_pd(q_acc_2_15,src_dof_15)),_mm256_mul_pd(q_acc_2_16,src_dof_16)),_mm256_mul_pd(q_acc_2_17,src_dof_17)),_mm256_mul_pd(q_acc_2_18,src_dof_18)),_mm256_mul_pd(q_acc_2_19,src_dof_19)),_mm256_mul_pd(q_acc_2_2,src_dof_2)),_mm256_mul_pd(q_acc_2_20,src_dof_20)),_mm256_mul_pd(q_acc_2_21,src_dof_21)),_mm256_mul_pd(q_acc_2_22,src_dof_22)),_mm256_mul_pd(q_acc_2_23,src_dof_23)),_mm256_mul_pd(q_acc_2_24,src_dof_24)),_mm256_mul_pd(q_acc_2_25,src_dof_25)),_mm256_mul_pd(q_acc_2_26,src_dof_26)),_mm256_mul_pd(q_acc_2_27,src_dof_27)),_mm256_mul_pd(q_acc_2_28,src_dof_28)),_mm256_mul_pd(q_acc_2_29,src_dof_29)),_mm256_mul_pd(q_acc_2_3,src_dof_3)),_mm256_mul_pd(q_acc_2_4,src_dof_4)),_mm256_mul_pd(q_acc_2_5,src_dof_5)),_mm256_mul_pd(q_acc_2_6,src_dof_6)),_mm256_mul_pd(q_acc_2_7,src_dof_7)),_mm256_mul_pd(q_acc_2_8,src_dof_8)),_mm256_mul_pd(q_acc_2_9,src_dof_9)); + const __m256d elMatVec_3 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_3_0,src_dof_0),_mm256_mul_pd(q_acc_3_1,src_dof_1)),_mm256_mul_pd(q_acc_3_10,src_dof_10)),_mm256_mul_pd(q_acc_3_11,src_dof_11)),_mm256_mul_pd(q_acc_3_12,src_dof_12)),_mm256_mul_pd(q_acc_3_13,src_dof_13)),_mm256_mul_pd(q_acc_3_14,src_dof_14)),_mm256_mul_pd(q_acc_3_15,src_dof_15)),_mm256_mul_pd(q_acc_3_16,src_dof_16)),_mm256_mul_pd(q_acc_3_17,src_dof_17)),_mm256_mul_pd(q_acc_3_18,src_dof_18)),_mm256_mul_pd(q_acc_3_19,src_dof_19)),_mm256_mul_pd(q_acc_3_2,src_dof_2)),_mm256_mul_pd(q_acc_3_20,src_dof_20)),_mm256_mul_pd(q_acc_3_21,src_dof_21)),_mm256_mul_pd(q_acc_3_22,src_dof_22)),_mm256_mul_pd(q_acc_3_23,src_dof_23)),_mm256_mul_pd(q_acc_3_24,src_dof_24)),_mm256_mul_pd(q_acc_3_25,src_dof_25)),_mm256_mul_pd(q_acc_3_26,src_dof_26)),_mm256_mul_pd(q_acc_3_27,src_dof_27)),_mm256_mul_pd(q_acc_3_28,src_dof_28)),_mm256_mul_pd(q_acc_3_29,src_dof_29)),_mm256_mul_pd(q_acc_3_3,src_dof_3)),_mm256_mul_pd(q_acc_3_4,src_dof_4)),_mm256_mul_pd(q_acc_3_5,src_dof_5)),_mm256_mul_pd(q_acc_3_6,src_dof_6)),_mm256_mul_pd(q_acc_3_7,src_dof_7)),_mm256_mul_pd(q_acc_3_8,src_dof_8)),_mm256_mul_pd(q_acc_3_9,src_dof_9)); + _mm256_storeu_pd(&_data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1],_mm256_add_pd(elMatVec_0,_mm256_loadu_pd(& _data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]))); + _mm256_storeu_pd(&_data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1],_mm256_add_pd(elMatVec_1,_mm256_loadu_pd(& _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]))); + _mm256_storeu_pd(&_data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))],_mm256_add_pd(elMatVec_2,_mm256_loadu_pd(& _data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]))); + _mm256_storeu_pd(&_data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1],_mm256_add_pd(elMatVec_3,_mm256_loadu_pd(& _data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]))); + } + for (int64_t ctr_0 = (int64_t)((-ctr_1 - ctr_2 + micro_edges_per_macro_edge - 2) / (4)) * (4); ctr_0 < -ctr_1 - ctr_2 + micro_edges_per_macro_edge - 2; ctr_0 += 1) + { + + const int64_t phantom_ctr_0 = ctr_0; + real_t _data_float_loop_ctr_array_dim_0[4]; + _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0; + _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1; + _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2; + _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3; + real_t _data_float_loop_ctr_array_dim_1[4]; + _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1; + real_t _data_float_loop_ctr_array_dim_2[4]; + _data_float_loop_ctr_array_dim_2[0] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[1] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[2] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[3] = (real_t) ctr_2; + + const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_0_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_1_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_2_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_3_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_3_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_3_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t src_dof_0 = _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]; + const real_t src_dof_1 = _data_src_vertex_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]; + const real_t src_dof_2 = _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]; + const real_t src_dof_3 = _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]; + const real_t src_dof_4 = _data_src_edge_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]; + const real_t src_dof_5 = _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6)) + 1]; + const real_t src_dof_6 = _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]; + const real_t src_dof_7 = _data_src_edge_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]; + const real_t src_dof_8 = _data_src_edge_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 5*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t src_dof_9 = _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]; + const real_t src_dof_10 = _data_src_vertex_1[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]; + const real_t src_dof_11 = _data_src_vertex_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]; + const real_t src_dof_12 = _data_src_vertex_1[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]; + const real_t src_dof_13 = _data_src_vertex_1[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]; + const real_t src_dof_14 = _data_src_edge_1[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]; + const real_t src_dof_15 = _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6)) + 1]; + const real_t src_dof_16 = _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]; + const real_t src_dof_17 = _data_src_edge_1[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]; + const real_t src_dof_18 = _data_src_edge_1[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 5*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t src_dof_19 = _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]; + const real_t src_dof_20 = _data_src_vertex_2[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]; + const real_t src_dof_21 = _data_src_vertex_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]; + const real_t src_dof_22 = _data_src_vertex_2[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]; + const real_t src_dof_23 = _data_src_vertex_2[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]; + const real_t src_dof_24 = _data_src_edge_2[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]; + const real_t src_dof_25 = _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6)) + 1]; + const real_t src_dof_26 = _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]; + const real_t src_dof_27 = _data_src_edge_2[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]; + const real_t src_dof_28 = _data_src_edge_2[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 5*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t src_dof_29 = _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]; + const real_t rho_dof_0 = _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]; + const real_t rho_dof_1 = _data_rhoVertex[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]; + const real_t rho_dof_2 = _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]; + const real_t rho_dof_3 = _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]; + const real_t rho_dof_4 = _data_rhoEdge[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]; + const real_t rho_dof_5 = _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6)) + 1]; + const real_t rho_dof_6 = _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]; + const real_t rho_dof_7 = _data_rhoEdge[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]; + const real_t rho_dof_8 = _data_rhoEdge[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 5*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t rho_dof_9 = _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]; + real_t q_acc_0_0 = 0.0; + real_t q_acc_0_1 = 0.0; + real_t q_acc_0_2 = 0.0; + real_t q_acc_0_3 = 0.0; + real_t q_acc_0_4 = 0.0; + real_t q_acc_0_5 = 0.0; + real_t q_acc_0_6 = 0.0; + real_t q_acc_0_7 = 0.0; + real_t q_acc_0_8 = 0.0; + real_t q_acc_0_9 = 0.0; + real_t q_acc_0_10 = 0.0; + real_t q_acc_0_11 = 0.0; + real_t q_acc_0_12 = 0.0; + real_t q_acc_0_13 = 0.0; + real_t q_acc_0_14 = 0.0; + real_t q_acc_0_15 = 0.0; + real_t q_acc_0_16 = 0.0; + real_t q_acc_0_17 = 0.0; + real_t q_acc_0_18 = 0.0; + real_t q_acc_0_19 = 0.0; + real_t q_acc_0_20 = 0.0; + real_t q_acc_0_21 = 0.0; + real_t q_acc_0_22 = 0.0; + real_t q_acc_0_23 = 0.0; + real_t q_acc_0_24 = 0.0; + real_t q_acc_0_25 = 0.0; + real_t q_acc_0_26 = 0.0; + real_t q_acc_0_27 = 0.0; + real_t q_acc_0_28 = 0.0; + real_t q_acc_0_29 = 0.0; + real_t q_acc_1_0 = 0.0; + real_t q_acc_1_1 = 0.0; + real_t q_acc_1_2 = 0.0; + real_t q_acc_1_3 = 0.0; + real_t q_acc_1_4 = 0.0; + real_t q_acc_1_5 = 0.0; + real_t q_acc_1_6 = 0.0; + real_t q_acc_1_7 = 0.0; + real_t q_acc_1_8 = 0.0; + real_t q_acc_1_9 = 0.0; + real_t q_acc_1_10 = 0.0; + real_t q_acc_1_11 = 0.0; + real_t q_acc_1_12 = 0.0; + real_t q_acc_1_13 = 0.0; + real_t q_acc_1_14 = 0.0; + real_t q_acc_1_15 = 0.0; + real_t q_acc_1_16 = 0.0; + real_t q_acc_1_17 = 0.0; + real_t q_acc_1_18 = 0.0; + real_t q_acc_1_19 = 0.0; + real_t q_acc_1_20 = 0.0; + real_t q_acc_1_21 = 0.0; + real_t q_acc_1_22 = 0.0; + real_t q_acc_1_23 = 0.0; + real_t q_acc_1_24 = 0.0; + real_t q_acc_1_25 = 0.0; + real_t q_acc_1_26 = 0.0; + real_t q_acc_1_27 = 0.0; + real_t q_acc_1_28 = 0.0; + real_t q_acc_1_29 = 0.0; + real_t q_acc_2_0 = 0.0; + real_t q_acc_2_1 = 0.0; + real_t q_acc_2_2 = 0.0; + real_t q_acc_2_3 = 0.0; + real_t q_acc_2_4 = 0.0; + real_t q_acc_2_5 = 0.0; + real_t q_acc_2_6 = 0.0; + real_t q_acc_2_7 = 0.0; + real_t q_acc_2_8 = 0.0; + real_t q_acc_2_9 = 0.0; + real_t q_acc_2_10 = 0.0; + real_t q_acc_2_11 = 0.0; + real_t q_acc_2_12 = 0.0; + real_t q_acc_2_13 = 0.0; + real_t q_acc_2_14 = 0.0; + real_t q_acc_2_15 = 0.0; + real_t q_acc_2_16 = 0.0; + real_t q_acc_2_17 = 0.0; + real_t q_acc_2_18 = 0.0; + real_t q_acc_2_19 = 0.0; + real_t q_acc_2_20 = 0.0; + real_t q_acc_2_21 = 0.0; + real_t q_acc_2_22 = 0.0; + real_t q_acc_2_23 = 0.0; + real_t q_acc_2_24 = 0.0; + real_t q_acc_2_25 = 0.0; + real_t q_acc_2_26 = 0.0; + real_t q_acc_2_27 = 0.0; + real_t q_acc_2_28 = 0.0; + real_t q_acc_2_29 = 0.0; + real_t q_acc_3_0 = 0.0; + real_t q_acc_3_1 = 0.0; + real_t q_acc_3_2 = 0.0; + real_t q_acc_3_3 = 0.0; + real_t q_acc_3_4 = 0.0; + real_t q_acc_3_5 = 0.0; + real_t q_acc_3_6 = 0.0; + real_t q_acc_3_7 = 0.0; + real_t q_acc_3_8 = 0.0; + real_t q_acc_3_9 = 0.0; + real_t q_acc_3_10 = 0.0; + real_t q_acc_3_11 = 0.0; + real_t q_acc_3_12 = 0.0; + real_t q_acc_3_13 = 0.0; + real_t q_acc_3_14 = 0.0; + real_t q_acc_3_15 = 0.0; + real_t q_acc_3_16 = 0.0; + real_t q_acc_3_17 = 0.0; + real_t q_acc_3_18 = 0.0; + real_t q_acc_3_19 = 0.0; + real_t q_acc_3_20 = 0.0; + real_t q_acc_3_21 = 0.0; + real_t q_acc_3_22 = 0.0; + real_t q_acc_3_23 = 0.0; + real_t q_acc_3_24 = 0.0; + real_t q_acc_3_25 = 0.0; + real_t q_acc_3_26 = 0.0; + real_t q_acc_3_27 = 0.0; + real_t q_acc_3_28 = 0.0; + real_t q_acc_3_29 = 0.0; + for (int64_t q = 0; q < 5; q += 1) + { + const real_t tmp_qloop_0 = (_data_q_p_0[q]*_data_q_p_0[q]); + const real_t tmp_qloop_1 = tmp_qloop_0*2.0; + const real_t tmp_qloop_2 = (_data_q_p_1[q]*_data_q_p_1[q]); + const real_t tmp_qloop_3 = tmp_qloop_2*2.0; + const real_t tmp_qloop_4 = (_data_q_p_2[q]*_data_q_p_2[q]); + const real_t tmp_qloop_5 = tmp_qloop_4*2.0; + const real_t tmp_qloop_6 = 4.0*_data_q_p_1[q]; + const real_t tmp_qloop_7 = tmp_qloop_6*_data_q_p_2[q]; + const real_t tmp_qloop_8 = 4.0*_data_q_p_0[q]; + const real_t tmp_qloop_9 = tmp_qloop_8*_data_q_p_1[q]; + const real_t tmp_qloop_10 = tmp_qloop_8*_data_q_p_2[q]; + const real_t tmp_qloop_11 = tmp_qloop_10 + tmp_qloop_9; + const real_t tmp_qloop_12 = tmp_qloop_1 + tmp_qloop_11 + tmp_qloop_3 + tmp_qloop_5 + tmp_qloop_7 - 3.0*_data_q_p_0[q] - 3.0*_data_q_p_1[q] - 3.0*_data_q_p_2[q] + 1.0; + const real_t tmp_qloop_13 = 4.0*_data_q_p_2[q]; + const real_t tmp_qloop_14 = tmp_qloop_13 + tmp_qloop_6; + const real_t tmp_qloop_15 = -rho_dof_8*tmp_qloop_6; + const real_t tmp_qloop_16 = rho_dof_0*(tmp_qloop_14 + tmp_qloop_8 - 3.0); + const real_t tmp_qloop_17 = -rho_dof_7*tmp_qloop_13 + tmp_qloop_16; + const real_t tmp_qloop_18 = rho_dof_1*(tmp_qloop_8 - 1.0) + rho_dof_5*tmp_qloop_13 + rho_dof_6*tmp_qloop_6 + rho_dof_9*(-tmp_qloop_14 - 8.0*_data_q_p_0[q] + 4.0) + tmp_qloop_15 + tmp_qloop_17; + const real_t tmp_qloop_19 = tmp_qloop_8 - 4.0; + const real_t tmp_qloop_20 = -rho_dof_9*tmp_qloop_8; + const real_t tmp_qloop_21 = rho_dof_2*(tmp_qloop_6 - 1.0) + rho_dof_4*tmp_qloop_13 + rho_dof_6*tmp_qloop_8 + rho_dof_8*(-tmp_qloop_13 - tmp_qloop_19 - 8.0*_data_q_p_1[q]) + tmp_qloop_17 + tmp_qloop_20; + const real_t tmp_qloop_22 = rho_dof_3*(tmp_qloop_13 - 1.0) + rho_dof_4*tmp_qloop_6 + rho_dof_5*tmp_qloop_8 + rho_dof_7*(-tmp_qloop_19 - tmp_qloop_6 - 8.0*_data_q_p_2[q]) + tmp_qloop_15 + tmp_qloop_16 + tmp_qloop_20; + const real_t tmp_qloop_23 = jac_affine_inv_0_0_WHITE_DOWN*tmp_qloop_18 + jac_affine_inv_1_0_WHITE_DOWN*tmp_qloop_21 + jac_affine_inv_2_0_WHITE_DOWN*tmp_qloop_22; + const real_t tmp_qloop_24 = tmp_qloop_1 - _data_q_p_0[q]; + const real_t tmp_qloop_25 = tmp_qloop_3 - _data_q_p_1[q]; + const real_t tmp_qloop_26 = tmp_qloop_5 - _data_q_p_2[q]; + const real_t tmp_qloop_27 = -tmp_qloop_10 + tmp_qloop_13 + tmp_qloop_4*-4.0 - tmp_qloop_7; + const real_t tmp_qloop_28 = tmp_qloop_2*4.0; + const real_t tmp_qloop_29 = -tmp_qloop_28 + tmp_qloop_6 - tmp_qloop_7 - tmp_qloop_9; + const real_t tmp_qloop_30 = tmp_qloop_0*4.0; + const real_t tmp_qloop_31 = -tmp_qloop_11 - tmp_qloop_30 + tmp_qloop_8; + const real_t tmp_qloop_32 = abs_det_jac_affine_WHITE_DOWN*1.0 / (rho_dof_0*tmp_qloop_12 + rho_dof_1*tmp_qloop_24 + rho_dof_2*tmp_qloop_25 + rho_dof_3*tmp_qloop_26 + rho_dof_4*tmp_qloop_7 + rho_dof_5*tmp_qloop_10 + rho_dof_6*tmp_qloop_9 + rho_dof_7*tmp_qloop_27 + rho_dof_8*tmp_qloop_29 + rho_dof_9*tmp_qloop_31)*_data_q_w[q]; + const real_t tmp_qloop_33 = tmp_qloop_32*(1.0 - _data_q_p_0[q] - _data_q_p_1[q] - _data_q_p_2[q]); + const real_t tmp_qloop_34 = tmp_qloop_23*tmp_qloop_33; + const real_t tmp_qloop_35 = jac_affine_inv_0_1_WHITE_DOWN*tmp_qloop_18 + jac_affine_inv_1_1_WHITE_DOWN*tmp_qloop_21 + jac_affine_inv_2_1_WHITE_DOWN*tmp_qloop_22; + const real_t tmp_qloop_36 = tmp_qloop_33*tmp_qloop_35; + const real_t tmp_qloop_37 = jac_affine_inv_0_2_WHITE_DOWN*tmp_qloop_18 + jac_affine_inv_1_2_WHITE_DOWN*tmp_qloop_21 + jac_affine_inv_2_2_WHITE_DOWN*tmp_qloop_22; + const real_t tmp_qloop_38 = tmp_qloop_33*tmp_qloop_37; + const real_t tmp_qloop_39 = tmp_qloop_32*_data_q_p_0[q]; + const real_t tmp_qloop_40 = tmp_qloop_23*tmp_qloop_39; + const real_t tmp_qloop_41 = tmp_qloop_32*_data_q_p_2[q]; + const real_t tmp_qloop_42 = tmp_qloop_23*tmp_qloop_41; + const real_t tmp_qloop_43 = tmp_qloop_42*tmp_qloop_9; + const real_t tmp_qloop_44 = tmp_qloop_32*_data_q_p_1[q]; + const real_t tmp_qloop_45 = tmp_qloop_23*tmp_qloop_44; + const real_t tmp_qloop_46 = tmp_qloop_35*tmp_qloop_39; + const real_t tmp_qloop_47 = tmp_qloop_35*tmp_qloop_41; + const real_t tmp_qloop_48 = tmp_qloop_47*tmp_qloop_9; + const real_t tmp_qloop_49 = tmp_qloop_35*tmp_qloop_44; + const real_t tmp_qloop_50 = tmp_qloop_37*tmp_qloop_39; + const real_t tmp_qloop_51 = tmp_qloop_37*tmp_qloop_41; + const real_t tmp_qloop_52 = tmp_qloop_51*tmp_qloop_9; + const real_t tmp_qloop_53 = tmp_qloop_37*tmp_qloop_44; + const real_t tmp_qloop_54 = tmp_qloop_23*tmp_qloop_32; + const real_t tmp_qloop_55 = tmp_qloop_2*tmp_qloop_8; + const real_t tmp_qloop_56 = tmp_qloop_32*tmp_qloop_55; + const real_t tmp_qloop_57 = tmp_qloop_4*tmp_qloop_54; + const real_t tmp_qloop_58 = tmp_qloop_32*tmp_qloop_4; + const real_t tmp_qloop_59 = tmp_qloop_35*tmp_qloop_58; + const real_t tmp_qloop_60 = tmp_qloop_37*tmp_qloop_58; + const real_t q_tmp_0_0 = tmp_qloop_12*tmp_qloop_34; + const real_t q_tmp_0_1 = tmp_qloop_24*tmp_qloop_34; + const real_t q_tmp_0_2 = tmp_qloop_25*tmp_qloop_34; + const real_t q_tmp_0_3 = tmp_qloop_26*tmp_qloop_34; + const real_t q_tmp_0_4 = tmp_qloop_34*tmp_qloop_7; + const real_t q_tmp_0_5 = tmp_qloop_10*tmp_qloop_34; + const real_t q_tmp_0_6 = tmp_qloop_34*tmp_qloop_9; + const real_t q_tmp_0_7 = tmp_qloop_27*tmp_qloop_34; + const real_t q_tmp_0_8 = tmp_qloop_29*tmp_qloop_34; + const real_t q_tmp_0_9 = tmp_qloop_31*tmp_qloop_34; + const real_t q_tmp_0_10 = tmp_qloop_12*tmp_qloop_36; + const real_t q_tmp_0_11 = tmp_qloop_24*tmp_qloop_36; + const real_t q_tmp_0_12 = tmp_qloop_25*tmp_qloop_36; + const real_t q_tmp_0_13 = tmp_qloop_26*tmp_qloop_36; + const real_t q_tmp_0_14 = tmp_qloop_36*tmp_qloop_7; + const real_t q_tmp_0_15 = tmp_qloop_10*tmp_qloop_36; + const real_t q_tmp_0_16 = tmp_qloop_36*tmp_qloop_9; + const real_t q_tmp_0_17 = tmp_qloop_27*tmp_qloop_36; + const real_t q_tmp_0_18 = tmp_qloop_29*tmp_qloop_36; + const real_t q_tmp_0_19 = tmp_qloop_31*tmp_qloop_36; + const real_t q_tmp_0_20 = tmp_qloop_12*tmp_qloop_38; + const real_t q_tmp_0_21 = tmp_qloop_24*tmp_qloop_38; + const real_t q_tmp_0_22 = tmp_qloop_25*tmp_qloop_38; + const real_t q_tmp_0_23 = tmp_qloop_26*tmp_qloop_38; + const real_t q_tmp_0_24 = tmp_qloop_38*tmp_qloop_7; + const real_t q_tmp_0_25 = tmp_qloop_10*tmp_qloop_38; + const real_t q_tmp_0_26 = tmp_qloop_38*tmp_qloop_9; + const real_t q_tmp_0_27 = tmp_qloop_27*tmp_qloop_38; + const real_t q_tmp_0_28 = tmp_qloop_29*tmp_qloop_38; + const real_t q_tmp_0_29 = tmp_qloop_31*tmp_qloop_38; + const real_t q_tmp_1_0 = tmp_qloop_12*tmp_qloop_40; + const real_t q_tmp_1_1 = tmp_qloop_24*tmp_qloop_40; + const real_t q_tmp_1_2 = tmp_qloop_25*tmp_qloop_40; + const real_t q_tmp_1_3 = tmp_qloop_26*tmp_qloop_40; + const real_t q_tmp_1_4 = tmp_qloop_43; + const real_t q_tmp_1_5 = tmp_qloop_30*tmp_qloop_42; + const real_t q_tmp_1_6 = tmp_qloop_30*tmp_qloop_45; + const real_t q_tmp_1_7 = tmp_qloop_27*tmp_qloop_40; + const real_t q_tmp_1_8 = tmp_qloop_29*tmp_qloop_40; + const real_t q_tmp_1_9 = tmp_qloop_31*tmp_qloop_40; + const real_t q_tmp_1_10 = tmp_qloop_12*tmp_qloop_46; + const real_t q_tmp_1_11 = tmp_qloop_24*tmp_qloop_46; + const real_t q_tmp_1_12 = tmp_qloop_25*tmp_qloop_46; + const real_t q_tmp_1_13 = tmp_qloop_26*tmp_qloop_46; + const real_t q_tmp_1_14 = tmp_qloop_48; + const real_t q_tmp_1_15 = tmp_qloop_30*tmp_qloop_47; + const real_t q_tmp_1_16 = tmp_qloop_30*tmp_qloop_49; + const real_t q_tmp_1_17 = tmp_qloop_27*tmp_qloop_46; + const real_t q_tmp_1_18 = tmp_qloop_29*tmp_qloop_46; + const real_t q_tmp_1_19 = tmp_qloop_31*tmp_qloop_46; + const real_t q_tmp_1_20 = tmp_qloop_12*tmp_qloop_50; + const real_t q_tmp_1_21 = tmp_qloop_24*tmp_qloop_50; + const real_t q_tmp_1_22 = tmp_qloop_25*tmp_qloop_50; + const real_t q_tmp_1_23 = tmp_qloop_26*tmp_qloop_50; + const real_t q_tmp_1_24 = tmp_qloop_52; + const real_t q_tmp_1_25 = tmp_qloop_30*tmp_qloop_51; + const real_t q_tmp_1_26 = tmp_qloop_30*tmp_qloop_53; + const real_t q_tmp_1_27 = tmp_qloop_27*tmp_qloop_50; + const real_t q_tmp_1_28 = tmp_qloop_29*tmp_qloop_50; + const real_t q_tmp_1_29 = tmp_qloop_31*tmp_qloop_50; + const real_t q_tmp_2_0 = tmp_qloop_12*tmp_qloop_45; + const real_t q_tmp_2_1 = tmp_qloop_24*tmp_qloop_45; + const real_t q_tmp_2_2 = tmp_qloop_25*tmp_qloop_45; + const real_t q_tmp_2_3 = tmp_qloop_26*tmp_qloop_45; + const real_t q_tmp_2_4 = tmp_qloop_28*tmp_qloop_42; + const real_t q_tmp_2_5 = tmp_qloop_43; + const real_t q_tmp_2_6 = tmp_qloop_54*tmp_qloop_55; + const real_t q_tmp_2_7 = tmp_qloop_27*tmp_qloop_45; + const real_t q_tmp_2_8 = tmp_qloop_29*tmp_qloop_45; + const real_t q_tmp_2_9 = tmp_qloop_31*tmp_qloop_45; + const real_t q_tmp_2_10 = tmp_qloop_12*tmp_qloop_49; + const real_t q_tmp_2_11 = tmp_qloop_24*tmp_qloop_49; + const real_t q_tmp_2_12 = tmp_qloop_25*tmp_qloop_49; + const real_t q_tmp_2_13 = tmp_qloop_26*tmp_qloop_49; + const real_t q_tmp_2_14 = tmp_qloop_28*tmp_qloop_47; + const real_t q_tmp_2_15 = tmp_qloop_48; + const real_t q_tmp_2_16 = tmp_qloop_35*tmp_qloop_56; + const real_t q_tmp_2_17 = tmp_qloop_27*tmp_qloop_49; + const real_t q_tmp_2_18 = tmp_qloop_29*tmp_qloop_49; + const real_t q_tmp_2_19 = tmp_qloop_31*tmp_qloop_49; + const real_t q_tmp_2_20 = tmp_qloop_12*tmp_qloop_53; + const real_t q_tmp_2_21 = tmp_qloop_24*tmp_qloop_53; + const real_t q_tmp_2_22 = tmp_qloop_25*tmp_qloop_53; + const real_t q_tmp_2_23 = tmp_qloop_26*tmp_qloop_53; + const real_t q_tmp_2_24 = tmp_qloop_28*tmp_qloop_51; + const real_t q_tmp_2_25 = tmp_qloop_52; + const real_t q_tmp_2_26 = tmp_qloop_37*tmp_qloop_56; + const real_t q_tmp_2_27 = tmp_qloop_27*tmp_qloop_53; + const real_t q_tmp_2_28 = tmp_qloop_29*tmp_qloop_53; + const real_t q_tmp_2_29 = tmp_qloop_31*tmp_qloop_53; + const real_t q_tmp_3_0 = tmp_qloop_12*tmp_qloop_42; + const real_t q_tmp_3_1 = tmp_qloop_24*tmp_qloop_42; + const real_t q_tmp_3_2 = tmp_qloop_25*tmp_qloop_42; + const real_t q_tmp_3_3 = tmp_qloop_26*tmp_qloop_42; + const real_t q_tmp_3_4 = tmp_qloop_57*tmp_qloop_6; + const real_t q_tmp_3_5 = tmp_qloop_57*tmp_qloop_8; + const real_t q_tmp_3_6 = tmp_qloop_43; + const real_t q_tmp_3_7 = tmp_qloop_27*tmp_qloop_42; + const real_t q_tmp_3_8 = tmp_qloop_29*tmp_qloop_42; + const real_t q_tmp_3_9 = tmp_qloop_31*tmp_qloop_42; + const real_t q_tmp_3_10 = tmp_qloop_12*tmp_qloop_47; + const real_t q_tmp_3_11 = tmp_qloop_24*tmp_qloop_47; + const real_t q_tmp_3_12 = tmp_qloop_25*tmp_qloop_47; + const real_t q_tmp_3_13 = tmp_qloop_26*tmp_qloop_47; + const real_t q_tmp_3_14 = tmp_qloop_59*tmp_qloop_6; + const real_t q_tmp_3_15 = tmp_qloop_59*tmp_qloop_8; + const real_t q_tmp_3_16 = tmp_qloop_48; + const real_t q_tmp_3_17 = tmp_qloop_27*tmp_qloop_47; + const real_t q_tmp_3_18 = tmp_qloop_29*tmp_qloop_47; + const real_t q_tmp_3_19 = tmp_qloop_31*tmp_qloop_47; + const real_t q_tmp_3_20 = tmp_qloop_12*tmp_qloop_51; + const real_t q_tmp_3_21 = tmp_qloop_24*tmp_qloop_51; + const real_t q_tmp_3_22 = tmp_qloop_25*tmp_qloop_51; + const real_t q_tmp_3_23 = tmp_qloop_26*tmp_qloop_51; + const real_t q_tmp_3_24 = tmp_qloop_6*tmp_qloop_60; + const real_t q_tmp_3_25 = tmp_qloop_60*tmp_qloop_8; + const real_t q_tmp_3_26 = tmp_qloop_52; + const real_t q_tmp_3_27 = tmp_qloop_27*tmp_qloop_51; + const real_t q_tmp_3_28 = tmp_qloop_29*tmp_qloop_51; + const real_t q_tmp_3_29 = tmp_qloop_31*tmp_qloop_51; + q_acc_0_0 = q_acc_0_0 + q_tmp_0_0; + q_acc_0_1 = q_acc_0_1 + q_tmp_0_1; + q_acc_0_2 = q_acc_0_2 + q_tmp_0_2; + q_acc_0_3 = q_acc_0_3 + q_tmp_0_3; + q_acc_0_4 = q_acc_0_4 + q_tmp_0_4; + q_acc_0_5 = q_acc_0_5 + q_tmp_0_5; + q_acc_0_6 = q_acc_0_6 + q_tmp_0_6; + q_acc_0_7 = q_acc_0_7 + q_tmp_0_7; + q_acc_0_8 = q_acc_0_8 + q_tmp_0_8; + q_acc_0_9 = q_acc_0_9 + q_tmp_0_9; + q_acc_0_10 = q_acc_0_10 + q_tmp_0_10; + q_acc_0_11 = q_acc_0_11 + q_tmp_0_11; + q_acc_0_12 = q_acc_0_12 + q_tmp_0_12; + q_acc_0_13 = q_acc_0_13 + q_tmp_0_13; + q_acc_0_14 = q_acc_0_14 + q_tmp_0_14; + q_acc_0_15 = q_acc_0_15 + q_tmp_0_15; + q_acc_0_16 = q_acc_0_16 + q_tmp_0_16; + q_acc_0_17 = q_acc_0_17 + q_tmp_0_17; + q_acc_0_18 = q_acc_0_18 + q_tmp_0_18; + q_acc_0_19 = q_acc_0_19 + q_tmp_0_19; + q_acc_0_20 = q_acc_0_20 + q_tmp_0_20; + q_acc_0_21 = q_acc_0_21 + q_tmp_0_21; + q_acc_0_22 = q_acc_0_22 + q_tmp_0_22; + q_acc_0_23 = q_acc_0_23 + q_tmp_0_23; + q_acc_0_24 = q_acc_0_24 + q_tmp_0_24; + q_acc_0_25 = q_acc_0_25 + q_tmp_0_25; + q_acc_0_26 = q_acc_0_26 + q_tmp_0_26; + q_acc_0_27 = q_acc_0_27 + q_tmp_0_27; + q_acc_0_28 = q_acc_0_28 + q_tmp_0_28; + q_acc_0_29 = q_acc_0_29 + q_tmp_0_29; + q_acc_1_0 = q_acc_1_0 + q_tmp_1_0; + q_acc_1_1 = q_acc_1_1 + q_tmp_1_1; + q_acc_1_2 = q_acc_1_2 + q_tmp_1_2; + q_acc_1_3 = q_acc_1_3 + q_tmp_1_3; + q_acc_1_4 = q_acc_1_4 + q_tmp_1_4; + q_acc_1_5 = q_acc_1_5 + q_tmp_1_5; + q_acc_1_6 = q_acc_1_6 + q_tmp_1_6; + q_acc_1_7 = q_acc_1_7 + q_tmp_1_7; + q_acc_1_8 = q_acc_1_8 + q_tmp_1_8; + q_acc_1_9 = q_acc_1_9 + q_tmp_1_9; + q_acc_1_10 = q_acc_1_10 + q_tmp_1_10; + q_acc_1_11 = q_acc_1_11 + q_tmp_1_11; + q_acc_1_12 = q_acc_1_12 + q_tmp_1_12; + q_acc_1_13 = q_acc_1_13 + q_tmp_1_13; + q_acc_1_14 = q_acc_1_14 + q_tmp_1_14; + q_acc_1_15 = q_acc_1_15 + q_tmp_1_15; + q_acc_1_16 = q_acc_1_16 + q_tmp_1_16; + q_acc_1_17 = q_acc_1_17 + q_tmp_1_17; + q_acc_1_18 = q_acc_1_18 + q_tmp_1_18; + q_acc_1_19 = q_acc_1_19 + q_tmp_1_19; + q_acc_1_20 = q_acc_1_20 + q_tmp_1_20; + q_acc_1_21 = q_acc_1_21 + q_tmp_1_21; + q_acc_1_22 = q_acc_1_22 + q_tmp_1_22; + q_acc_1_23 = q_acc_1_23 + q_tmp_1_23; + q_acc_1_24 = q_acc_1_24 + q_tmp_1_24; + q_acc_1_25 = q_acc_1_25 + q_tmp_1_25; + q_acc_1_26 = q_acc_1_26 + q_tmp_1_26; + q_acc_1_27 = q_acc_1_27 + q_tmp_1_27; + q_acc_1_28 = q_acc_1_28 + q_tmp_1_28; + q_acc_1_29 = q_acc_1_29 + q_tmp_1_29; + q_acc_2_0 = q_acc_2_0 + q_tmp_2_0; + q_acc_2_1 = q_acc_2_1 + q_tmp_2_1; + q_acc_2_2 = q_acc_2_2 + q_tmp_2_2; + q_acc_2_3 = q_acc_2_3 + q_tmp_2_3; + q_acc_2_4 = q_acc_2_4 + q_tmp_2_4; + q_acc_2_5 = q_acc_2_5 + q_tmp_2_5; + q_acc_2_6 = q_acc_2_6 + q_tmp_2_6; + q_acc_2_7 = q_acc_2_7 + q_tmp_2_7; + q_acc_2_8 = q_acc_2_8 + q_tmp_2_8; + q_acc_2_9 = q_acc_2_9 + q_tmp_2_9; + q_acc_2_10 = q_acc_2_10 + q_tmp_2_10; + q_acc_2_11 = q_acc_2_11 + q_tmp_2_11; + q_acc_2_12 = q_acc_2_12 + q_tmp_2_12; + q_acc_2_13 = q_acc_2_13 + q_tmp_2_13; + q_acc_2_14 = q_acc_2_14 + q_tmp_2_14; + q_acc_2_15 = q_acc_2_15 + q_tmp_2_15; + q_acc_2_16 = q_acc_2_16 + q_tmp_2_16; + q_acc_2_17 = q_acc_2_17 + q_tmp_2_17; + q_acc_2_18 = q_acc_2_18 + q_tmp_2_18; + q_acc_2_19 = q_acc_2_19 + q_tmp_2_19; + q_acc_2_20 = q_acc_2_20 + q_tmp_2_20; + q_acc_2_21 = q_acc_2_21 + q_tmp_2_21; + q_acc_2_22 = q_acc_2_22 + q_tmp_2_22; + q_acc_2_23 = q_acc_2_23 + q_tmp_2_23; + q_acc_2_24 = q_acc_2_24 + q_tmp_2_24; + q_acc_2_25 = q_acc_2_25 + q_tmp_2_25; + q_acc_2_26 = q_acc_2_26 + q_tmp_2_26; + q_acc_2_27 = q_acc_2_27 + q_tmp_2_27; + q_acc_2_28 = q_acc_2_28 + q_tmp_2_28; + q_acc_2_29 = q_acc_2_29 + q_tmp_2_29; + q_acc_3_0 = q_acc_3_0 + q_tmp_3_0; + q_acc_3_1 = q_acc_3_1 + q_tmp_3_1; + q_acc_3_2 = q_acc_3_2 + q_tmp_3_2; + q_acc_3_3 = q_acc_3_3 + q_tmp_3_3; + q_acc_3_4 = q_acc_3_4 + q_tmp_3_4; + q_acc_3_5 = q_acc_3_5 + q_tmp_3_5; + q_acc_3_6 = q_acc_3_6 + q_tmp_3_6; + q_acc_3_7 = q_acc_3_7 + q_tmp_3_7; + q_acc_3_8 = q_acc_3_8 + q_tmp_3_8; + q_acc_3_9 = q_acc_3_9 + q_tmp_3_9; + q_acc_3_10 = q_acc_3_10 + q_tmp_3_10; + q_acc_3_11 = q_acc_3_11 + q_tmp_3_11; + q_acc_3_12 = q_acc_3_12 + q_tmp_3_12; + q_acc_3_13 = q_acc_3_13 + q_tmp_3_13; + q_acc_3_14 = q_acc_3_14 + q_tmp_3_14; + q_acc_3_15 = q_acc_3_15 + q_tmp_3_15; + q_acc_3_16 = q_acc_3_16 + q_tmp_3_16; + q_acc_3_17 = q_acc_3_17 + q_tmp_3_17; + q_acc_3_18 = q_acc_3_18 + q_tmp_3_18; + q_acc_3_19 = q_acc_3_19 + q_tmp_3_19; + q_acc_3_20 = q_acc_3_20 + q_tmp_3_20; + q_acc_3_21 = q_acc_3_21 + q_tmp_3_21; + q_acc_3_22 = q_acc_3_22 + q_tmp_3_22; + q_acc_3_23 = q_acc_3_23 + q_tmp_3_23; + q_acc_3_24 = q_acc_3_24 + q_tmp_3_24; + q_acc_3_25 = q_acc_3_25 + q_tmp_3_25; + q_acc_3_26 = q_acc_3_26 + q_tmp_3_26; + q_acc_3_27 = q_acc_3_27 + q_tmp_3_27; + q_acc_3_28 = q_acc_3_28 + q_tmp_3_28; + q_acc_3_29 = q_acc_3_29 + q_tmp_3_29; + } + const real_t elMatVec_0 = q_acc_0_0*src_dof_0 + q_acc_0_1*src_dof_1 + q_acc_0_10*src_dof_10 + q_acc_0_11*src_dof_11 + q_acc_0_12*src_dof_12 + q_acc_0_13*src_dof_13 + q_acc_0_14*src_dof_14 + q_acc_0_15*src_dof_15 + q_acc_0_16*src_dof_16 + q_acc_0_17*src_dof_17 + q_acc_0_18*src_dof_18 + q_acc_0_19*src_dof_19 + q_acc_0_2*src_dof_2 + q_acc_0_20*src_dof_20 + q_acc_0_21*src_dof_21 + q_acc_0_22*src_dof_22 + q_acc_0_23*src_dof_23 + q_acc_0_24*src_dof_24 + q_acc_0_25*src_dof_25 + q_acc_0_26*src_dof_26 + q_acc_0_27*src_dof_27 + q_acc_0_28*src_dof_28 + q_acc_0_29*src_dof_29 + q_acc_0_3*src_dof_3 + q_acc_0_4*src_dof_4 + q_acc_0_5*src_dof_5 + q_acc_0_6*src_dof_6 + q_acc_0_7*src_dof_7 + q_acc_0_8*src_dof_8 + q_acc_0_9*src_dof_9; + const real_t elMatVec_1 = q_acc_1_0*src_dof_0 + q_acc_1_1*src_dof_1 + q_acc_1_10*src_dof_10 + q_acc_1_11*src_dof_11 + q_acc_1_12*src_dof_12 + q_acc_1_13*src_dof_13 + q_acc_1_14*src_dof_14 + q_acc_1_15*src_dof_15 + q_acc_1_16*src_dof_16 + q_acc_1_17*src_dof_17 + q_acc_1_18*src_dof_18 + q_acc_1_19*src_dof_19 + q_acc_1_2*src_dof_2 + q_acc_1_20*src_dof_20 + q_acc_1_21*src_dof_21 + q_acc_1_22*src_dof_22 + q_acc_1_23*src_dof_23 + q_acc_1_24*src_dof_24 + q_acc_1_25*src_dof_25 + q_acc_1_26*src_dof_26 + q_acc_1_27*src_dof_27 + q_acc_1_28*src_dof_28 + q_acc_1_29*src_dof_29 + q_acc_1_3*src_dof_3 + q_acc_1_4*src_dof_4 + q_acc_1_5*src_dof_5 + q_acc_1_6*src_dof_6 + q_acc_1_7*src_dof_7 + q_acc_1_8*src_dof_8 + q_acc_1_9*src_dof_9; + const real_t elMatVec_2 = q_acc_2_0*src_dof_0 + q_acc_2_1*src_dof_1 + q_acc_2_10*src_dof_10 + q_acc_2_11*src_dof_11 + q_acc_2_12*src_dof_12 + q_acc_2_13*src_dof_13 + q_acc_2_14*src_dof_14 + q_acc_2_15*src_dof_15 + q_acc_2_16*src_dof_16 + q_acc_2_17*src_dof_17 + q_acc_2_18*src_dof_18 + q_acc_2_19*src_dof_19 + q_acc_2_2*src_dof_2 + q_acc_2_20*src_dof_20 + q_acc_2_21*src_dof_21 + q_acc_2_22*src_dof_22 + q_acc_2_23*src_dof_23 + q_acc_2_24*src_dof_24 + q_acc_2_25*src_dof_25 + q_acc_2_26*src_dof_26 + q_acc_2_27*src_dof_27 + q_acc_2_28*src_dof_28 + q_acc_2_29*src_dof_29 + q_acc_2_3*src_dof_3 + q_acc_2_4*src_dof_4 + q_acc_2_5*src_dof_5 + q_acc_2_6*src_dof_6 + q_acc_2_7*src_dof_7 + q_acc_2_8*src_dof_8 + q_acc_2_9*src_dof_9; + const real_t elMatVec_3 = q_acc_3_0*src_dof_0 + q_acc_3_1*src_dof_1 + q_acc_3_10*src_dof_10 + q_acc_3_11*src_dof_11 + q_acc_3_12*src_dof_12 + q_acc_3_13*src_dof_13 + q_acc_3_14*src_dof_14 + q_acc_3_15*src_dof_15 + q_acc_3_16*src_dof_16 + q_acc_3_17*src_dof_17 + q_acc_3_18*src_dof_18 + q_acc_3_19*src_dof_19 + q_acc_3_2*src_dof_2 + q_acc_3_20*src_dof_20 + q_acc_3_21*src_dof_21 + q_acc_3_22*src_dof_22 + q_acc_3_23*src_dof_23 + q_acc_3_24*src_dof_24 + q_acc_3_25*src_dof_25 + q_acc_3_26*src_dof_26 + q_acc_3_27*src_dof_27 + q_acc_3_28*src_dof_28 + q_acc_3_29*src_dof_29 + q_acc_3_3*src_dof_3 + q_acc_3_4*src_dof_4 + q_acc_3_5*src_dof_5 + q_acc_3_6*src_dof_6 + q_acc_3_7*src_dof_7 + q_acc_3_8*src_dof_8 + q_acc_3_9*src_dof_9; + _data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1] = elMatVec_0 + _data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]; + _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1] = elMatVec_1 + _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]; + _data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))] = elMatVec_2 + _data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]; + _data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1] = elMatVec_3 + _data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]; + } + } + } + const real_t tmp_coords_jac_0_BLUE_UP = 1.0 / (micro_edges_per_macro_edge_float)*1.0; + const real_t tmp_coords_jac_1_BLUE_UP = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0); + const real_t tmp_coords_jac_2_BLUE_UP = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1); + const real_t tmp_coords_jac_3_BLUE_UP = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2); + const real_t tmp_coords_jac_4_BLUE_UP = tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0); + const real_t tmp_coords_jac_5_BLUE_UP = tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1); + const real_t tmp_coords_jac_6_BLUE_UP = tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2); + const real_t p_affine_const_0_0_BLUE_UP = tmp_coords_jac_1_BLUE_UP; + const real_t p_affine_const_0_1_BLUE_UP = tmp_coords_jac_2_BLUE_UP; + const real_t p_affine_const_0_2_BLUE_UP = tmp_coords_jac_3_BLUE_UP; + const real_t p_affine_const_1_0_BLUE_UP = macro_vertex_coord_id_0comp0 + tmp_coords_jac_4_BLUE_UP; + const real_t p_affine_const_1_1_BLUE_UP = macro_vertex_coord_id_0comp1 + tmp_coords_jac_5_BLUE_UP; + const real_t p_affine_const_1_2_BLUE_UP = macro_vertex_coord_id_0comp2 + tmp_coords_jac_6_BLUE_UP; + const real_t p_affine_const_2_0_BLUE_UP = tmp_coords_jac_1_BLUE_UP + tmp_coords_jac_4_BLUE_UP; + const real_t p_affine_const_2_1_BLUE_UP = tmp_coords_jac_2_BLUE_UP + tmp_coords_jac_5_BLUE_UP; + const real_t p_affine_const_2_2_BLUE_UP = tmp_coords_jac_3_BLUE_UP + tmp_coords_jac_6_BLUE_UP; + const real_t p_affine_const_3_0_BLUE_UP = tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0) + tmp_coords_jac_1_BLUE_UP; + const real_t p_affine_const_3_1_BLUE_UP = tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1) + tmp_coords_jac_2_BLUE_UP; + const real_t p_affine_const_3_2_BLUE_UP = tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2) + tmp_coords_jac_3_BLUE_UP; + const real_t jac_affine_0_0_BLUE_UP = -p_affine_const_0_0_BLUE_UP + p_affine_const_1_0_BLUE_UP; + const real_t jac_affine_0_1_BLUE_UP = -p_affine_const_0_0_BLUE_UP + p_affine_const_2_0_BLUE_UP; + const real_t jac_affine_0_2_BLUE_UP = -p_affine_const_0_0_BLUE_UP + p_affine_const_3_0_BLUE_UP; + const real_t jac_affine_1_0_BLUE_UP = -p_affine_const_0_1_BLUE_UP + p_affine_const_1_1_BLUE_UP; + const real_t jac_affine_1_1_BLUE_UP = -p_affine_const_0_1_BLUE_UP + p_affine_const_2_1_BLUE_UP; + const real_t tmp_coords_jac_11_BLUE_UP = jac_affine_0_2_BLUE_UP*jac_affine_1_1_BLUE_UP; + const real_t jac_affine_1_2_BLUE_UP = -p_affine_const_0_1_BLUE_UP + p_affine_const_3_1_BLUE_UP; + const real_t tmp_coords_jac_9_BLUE_UP = jac_affine_0_1_BLUE_UP*jac_affine_1_2_BLUE_UP; + const real_t jac_affine_2_0_BLUE_UP = -p_affine_const_0_2_BLUE_UP + p_affine_const_1_2_BLUE_UP; + const real_t jac_affine_2_1_BLUE_UP = -p_affine_const_0_2_BLUE_UP + p_affine_const_2_2_BLUE_UP; + const real_t tmp_coords_jac_8_BLUE_UP = jac_affine_1_2_BLUE_UP*jac_affine_2_1_BLUE_UP; + const real_t jac_affine_2_2_BLUE_UP = -p_affine_const_0_2_BLUE_UP + p_affine_const_3_2_BLUE_UP; + const real_t tmp_coords_jac_7_BLUE_UP = jac_affine_1_1_BLUE_UP*jac_affine_2_2_BLUE_UP; + const real_t tmp_coords_jac_10_BLUE_UP = jac_affine_0_1_BLUE_UP*jac_affine_2_2_BLUE_UP; + const real_t tmp_coords_jac_12_BLUE_UP = jac_affine_0_0_BLUE_UP*tmp_coords_jac_7_BLUE_UP - jac_affine_0_0_BLUE_UP*tmp_coords_jac_8_BLUE_UP + jac_affine_0_2_BLUE_UP*jac_affine_1_0_BLUE_UP*jac_affine_2_1_BLUE_UP - jac_affine_1_0_BLUE_UP*tmp_coords_jac_10_BLUE_UP - jac_affine_2_0_BLUE_UP*tmp_coords_jac_11_BLUE_UP + jac_affine_2_0_BLUE_UP*tmp_coords_jac_9_BLUE_UP; + const real_t tmp_coords_jac_13_BLUE_UP = 1.0 / (tmp_coords_jac_12_BLUE_UP); + const real_t jac_affine_inv_0_0_BLUE_UP = tmp_coords_jac_13_BLUE_UP*(tmp_coords_jac_7_BLUE_UP - tmp_coords_jac_8_BLUE_UP); + const real_t jac_affine_inv_0_1_BLUE_UP = tmp_coords_jac_13_BLUE_UP*(jac_affine_0_2_BLUE_UP*jac_affine_2_1_BLUE_UP - tmp_coords_jac_10_BLUE_UP); + const real_t jac_affine_inv_0_2_BLUE_UP = tmp_coords_jac_13_BLUE_UP*(-tmp_coords_jac_11_BLUE_UP + tmp_coords_jac_9_BLUE_UP); + const real_t jac_affine_inv_1_0_BLUE_UP = tmp_coords_jac_13_BLUE_UP*(-jac_affine_1_0_BLUE_UP*jac_affine_2_2_BLUE_UP + jac_affine_1_2_BLUE_UP*jac_affine_2_0_BLUE_UP); + const real_t jac_affine_inv_1_1_BLUE_UP = tmp_coords_jac_13_BLUE_UP*(jac_affine_0_0_BLUE_UP*jac_affine_2_2_BLUE_UP - jac_affine_0_2_BLUE_UP*jac_affine_2_0_BLUE_UP); + const real_t jac_affine_inv_1_2_BLUE_UP = tmp_coords_jac_13_BLUE_UP*(-jac_affine_0_0_BLUE_UP*jac_affine_1_2_BLUE_UP + jac_affine_0_2_BLUE_UP*jac_affine_1_0_BLUE_UP); + const real_t jac_affine_inv_2_0_BLUE_UP = tmp_coords_jac_13_BLUE_UP*(jac_affine_1_0_BLUE_UP*jac_affine_2_1_BLUE_UP - jac_affine_1_1_BLUE_UP*jac_affine_2_0_BLUE_UP); + const real_t jac_affine_inv_2_1_BLUE_UP = tmp_coords_jac_13_BLUE_UP*(-jac_affine_0_0_BLUE_UP*jac_affine_2_1_BLUE_UP + jac_affine_0_1_BLUE_UP*jac_affine_2_0_BLUE_UP); + const real_t jac_affine_inv_2_2_BLUE_UP = tmp_coords_jac_13_BLUE_UP*(jac_affine_0_0_BLUE_UP*jac_affine_1_1_BLUE_UP - jac_affine_0_1_BLUE_UP*jac_affine_1_0_BLUE_UP); + const real_t abs_det_jac_affine_BLUE_UP = abs(tmp_coords_jac_12_BLUE_UP); + { + /* CellType.BLUE_UP */ + for (int64_t ctr_2 = 0; ctr_2 < micro_edges_per_macro_edge; ctr_2 += 1) + for (int64_t ctr_1 = 0; ctr_1 < -ctr_2 + micro_edges_per_macro_edge; ctr_1 += 1) + { + for (int64_t ctr_0 = 0; ctr_0 < (int64_t)((-ctr_1 - ctr_2 + micro_edges_per_macro_edge - 1) / (4)) * (4); ctr_0 += 4) + { + + const int64_t phantom_ctr_0 = ctr_0; + real_t _data_float_loop_ctr_array_dim_0[4]; + _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0; + _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1; + _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2; + _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3; + real_t _data_float_loop_ctr_array_dim_1[4]; + _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1; + real_t _data_float_loop_ctr_array_dim_2[4]; + _data_float_loop_ctr_array_dim_2[0] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[1] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[2] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[3] = (real_t) ctr_2; + + const __m256d p_affine_0_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)); + const __m256d p_affine_0_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)); + const __m256d p_affine_0_2 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)); + const __m256d p_affine_1_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)); + const __m256d p_affine_1_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)); + const __m256d p_affine_1_2 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)); + const __m256d p_affine_2_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)); + const __m256d p_affine_2_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)); + const __m256d p_affine_2_2 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)); + const __m256d p_affine_3_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)); + const __m256d p_affine_3_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)); + const __m256d p_affine_3_2 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)); + const __m256d src_dof_0 = _mm256_loadu_pd(& _data_src_vertex_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]); + const __m256d src_dof_1 = _mm256_loadu_pd(& _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]); + const __m256d src_dof_2 = _mm256_loadu_pd(& _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]); + const __m256d src_dof_3 = _mm256_loadu_pd(& _data_src_vertex_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]); + const __m256d src_dof_4 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]); + const __m256d src_dof_5 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge - 1)*(micro_edges_per_macro_edge + 1)) / (6)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]); + const __m256d src_dof_6 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d src_dof_7 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]); + const __m256d src_dof_8 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]); + const __m256d src_dof_9 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d src_dof_10 = _mm256_loadu_pd(& _data_src_vertex_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]); + const __m256d src_dof_11 = _mm256_loadu_pd(& _data_src_vertex_1[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]); + const __m256d src_dof_12 = _mm256_loadu_pd(& _data_src_vertex_1[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]); + const __m256d src_dof_13 = _mm256_loadu_pd(& _data_src_vertex_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]); + const __m256d src_dof_14 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]); + const __m256d src_dof_15 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge - 1)*(micro_edges_per_macro_edge + 1)) / (6)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]); + const __m256d src_dof_16 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d src_dof_17 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]); + const __m256d src_dof_18 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]); + const __m256d src_dof_19 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d src_dof_20 = _mm256_loadu_pd(& _data_src_vertex_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]); + const __m256d src_dof_21 = _mm256_loadu_pd(& _data_src_vertex_2[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]); + const __m256d src_dof_22 = _mm256_loadu_pd(& _data_src_vertex_2[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]); + const __m256d src_dof_23 = _mm256_loadu_pd(& _data_src_vertex_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]); + const __m256d src_dof_24 = _mm256_loadu_pd(& _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]); + const __m256d src_dof_25 = _mm256_loadu_pd(& _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge - 1)*(micro_edges_per_macro_edge + 1)) / (6)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]); + const __m256d src_dof_26 = _mm256_loadu_pd(& _data_src_edge_2[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d src_dof_27 = _mm256_loadu_pd(& _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]); + const __m256d src_dof_28 = _mm256_loadu_pd(& _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]); + const __m256d src_dof_29 = _mm256_loadu_pd(& _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d rho_dof_0 = _mm256_loadu_pd(& _data_rhoVertex[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]); + const __m256d rho_dof_1 = _mm256_loadu_pd(& _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]); + const __m256d rho_dof_2 = _mm256_loadu_pd(& _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]); + const __m256d rho_dof_3 = _mm256_loadu_pd(& _data_rhoVertex[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]); + const __m256d rho_dof_4 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]); + const __m256d rho_dof_5 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge - 1)*(micro_edges_per_macro_edge + 1)) / (6)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]); + const __m256d rho_dof_6 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d rho_dof_7 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]); + const __m256d rho_dof_8 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]); + const __m256d rho_dof_9 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + __m256d q_acc_0_0 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_1 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_2 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_3 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_4 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_5 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_6 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_7 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_8 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_9 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_10 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_11 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_12 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_13 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_14 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_15 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_16 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_17 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_18 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_19 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_20 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_21 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_22 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_23 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_24 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_25 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_26 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_27 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_28 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_29 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_0 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_1 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_2 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_3 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_4 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_5 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_6 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_7 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_8 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_9 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_10 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_11 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_12 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_13 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_14 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_15 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_16 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_17 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_18 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_19 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_20 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_21 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_22 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_23 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_24 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_25 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_26 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_27 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_28 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_29 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_0 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_1 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_2 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_3 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_4 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_5 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_6 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_7 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_8 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_9 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_10 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_11 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_12 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_13 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_14 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_15 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_16 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_17 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_18 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_19 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_20 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_21 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_22 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_23 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_24 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_25 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_26 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_27 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_28 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_29 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_0 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_1 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_2 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_3 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_4 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_5 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_6 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_7 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_8 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_9 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_10 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_11 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_12 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_13 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_14 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_15 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_16 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_17 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_18 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_19 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_20 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_21 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_22 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_23 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_24 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_25 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_26 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_27 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_28 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_29 = _mm256_set_pd(0.0,0.0,0.0,0.0); + for (int64_t q = 0; q < 5; q += 1) + { + const __m256d tmp_qloop_0 = _mm256_mul_pd(_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])); + const __m256d tmp_qloop_1 = _mm256_mul_pd(tmp_qloop_0,_mm256_set_pd(2.0,2.0,2.0,2.0)); + const __m256d tmp_qloop_2 = _mm256_mul_pd(_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])); + const __m256d tmp_qloop_3 = _mm256_mul_pd(tmp_qloop_2,_mm256_set_pd(2.0,2.0,2.0,2.0)); + const __m256d tmp_qloop_4 = _mm256_mul_pd(_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q])); + const __m256d tmp_qloop_5 = _mm256_mul_pd(tmp_qloop_4,_mm256_set_pd(2.0,2.0,2.0,2.0)); + const __m256d tmp_qloop_6 = _mm256_mul_pd(_mm256_set_pd(4.0,4.0,4.0,4.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])); + const __m256d tmp_qloop_7 = _mm256_mul_pd(tmp_qloop_6,_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q])); + const __m256d tmp_qloop_8 = _mm256_mul_pd(_mm256_set_pd(4.0,4.0,4.0,4.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])); + const __m256d tmp_qloop_9 = _mm256_mul_pd(tmp_qloop_8,_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])); + const __m256d tmp_qloop_10 = _mm256_mul_pd(tmp_qloop_8,_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q])); + const __m256d tmp_qloop_11 = _mm256_add_pd(tmp_qloop_10,tmp_qloop_9); + const __m256d tmp_qloop_12 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),_mm256_mul_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),tmp_qloop_1),tmp_qloop_11),tmp_qloop_3),tmp_qloop_5),tmp_qloop_7); + const __m256d tmp_qloop_13 = _mm256_mul_pd(_mm256_set_pd(4.0,4.0,4.0,4.0),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q])); + const __m256d tmp_qloop_14 = _mm256_add_pd(tmp_qloop_13,tmp_qloop_6); + const __m256d tmp_qloop_15 = _mm256_mul_pd(_mm256_mul_pd(rho_dof_8,tmp_qloop_6),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)); + const __m256d tmp_qloop_16 = _mm256_mul_pd(rho_dof_0,_mm256_add_pd(_mm256_add_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),tmp_qloop_14),tmp_qloop_8)); + const __m256d tmp_qloop_17 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(rho_dof_7,tmp_qloop_13),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),tmp_qloop_16); + const __m256d tmp_qloop_18 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(rho_dof_1,_mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),tmp_qloop_8)),_mm256_mul_pd(rho_dof_9,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_14,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_set_pd(-8.0,-8.0,-8.0,-8.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]))),_mm256_set_pd(4.0,4.0,4.0,4.0)))),_mm256_mul_pd(rho_dof_5,tmp_qloop_13)),_mm256_mul_pd(rho_dof_6,tmp_qloop_6)),tmp_qloop_15),tmp_qloop_17); + const __m256d tmp_qloop_19 = _mm256_add_pd(_mm256_set_pd(-4.0,-4.0,-4.0,-4.0),tmp_qloop_8); + const __m256d tmp_qloop_20 = _mm256_mul_pd(_mm256_mul_pd(rho_dof_9,tmp_qloop_8),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)); + const __m256d tmp_qloop_21 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(rho_dof_2,_mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),tmp_qloop_6)),_mm256_mul_pd(rho_dof_8,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_13,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_19,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(_mm256_set_pd(-8.0,-8.0,-8.0,-8.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))))),_mm256_mul_pd(rho_dof_4,tmp_qloop_13)),_mm256_mul_pd(rho_dof_6,tmp_qloop_8)),tmp_qloop_17),tmp_qloop_20); + const __m256d tmp_qloop_22 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(rho_dof_3,_mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),tmp_qloop_13)),_mm256_mul_pd(rho_dof_7,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_19,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_6,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(_mm256_set_pd(-8.0,-8.0,-8.0,-8.0),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]))))),_mm256_mul_pd(rho_dof_4,tmp_qloop_6)),_mm256_mul_pd(rho_dof_5,tmp_qloop_8)),tmp_qloop_15),tmp_qloop_16),tmp_qloop_20); + const __m256d tmp_qloop_23 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_18,_mm256_set_pd(jac_affine_inv_0_0_BLUE_UP,jac_affine_inv_0_0_BLUE_UP,jac_affine_inv_0_0_BLUE_UP,jac_affine_inv_0_0_BLUE_UP)),_mm256_mul_pd(tmp_qloop_21,_mm256_set_pd(jac_affine_inv_1_0_BLUE_UP,jac_affine_inv_1_0_BLUE_UP,jac_affine_inv_1_0_BLUE_UP,jac_affine_inv_1_0_BLUE_UP))),_mm256_mul_pd(tmp_qloop_22,_mm256_set_pd(jac_affine_inv_2_0_BLUE_UP,jac_affine_inv_2_0_BLUE_UP,jac_affine_inv_2_0_BLUE_UP,jac_affine_inv_2_0_BLUE_UP))); + const __m256d tmp_qloop_24 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),tmp_qloop_1); + const __m256d tmp_qloop_25 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])),tmp_qloop_3); + const __m256d tmp_qloop_26 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q])),tmp_qloop_5); + const __m256d tmp_qloop_27 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_10,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_7,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(tmp_qloop_4,_mm256_set_pd(-4.0,-4.0,-4.0,-4.0))),tmp_qloop_13); + const __m256d tmp_qloop_28 = _mm256_mul_pd(tmp_qloop_2,_mm256_set_pd(4.0,4.0,4.0,4.0)); + const __m256d tmp_qloop_29 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_28,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_7,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(tmp_qloop_9,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_6); + const __m256d tmp_qloop_30 = _mm256_mul_pd(tmp_qloop_0,_mm256_set_pd(4.0,4.0,4.0,4.0)); + const __m256d tmp_qloop_31 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_11,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_30,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_8); + const __m256d tmp_qloop_32 = _mm256_mul_pd(_mm256_mul_pd(_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(rho_dof_0,tmp_qloop_12),_mm256_mul_pd(rho_dof_1,tmp_qloop_24)),_mm256_mul_pd(rho_dof_2,tmp_qloop_25)),_mm256_mul_pd(rho_dof_3,tmp_qloop_26)),_mm256_mul_pd(rho_dof_4,tmp_qloop_7)),_mm256_mul_pd(rho_dof_5,tmp_qloop_10)),_mm256_mul_pd(rho_dof_6,tmp_qloop_9)),_mm256_mul_pd(rho_dof_7,tmp_qloop_27)),_mm256_mul_pd(rho_dof_8,tmp_qloop_29)),_mm256_mul_pd(rho_dof_9,tmp_qloop_31))),_mm256_set_pd(_data_q_w[q],_data_q_w[q],_data_q_w[q],_data_q_w[q])),_mm256_set_pd(abs_det_jac_affine_BLUE_UP,abs_det_jac_affine_BLUE_UP,abs_det_jac_affine_BLUE_UP,abs_det_jac_affine_BLUE_UP)); + const __m256d tmp_qloop_33 = _mm256_mul_pd(tmp_qloop_32,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]))),_mm256_set_pd(1.0,1.0,1.0,1.0))); + const __m256d tmp_qloop_34 = _mm256_mul_pd(tmp_qloop_23,tmp_qloop_33); + const __m256d tmp_qloop_35 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_18,_mm256_set_pd(jac_affine_inv_0_1_BLUE_UP,jac_affine_inv_0_1_BLUE_UP,jac_affine_inv_0_1_BLUE_UP,jac_affine_inv_0_1_BLUE_UP)),_mm256_mul_pd(tmp_qloop_21,_mm256_set_pd(jac_affine_inv_1_1_BLUE_UP,jac_affine_inv_1_1_BLUE_UP,jac_affine_inv_1_1_BLUE_UP,jac_affine_inv_1_1_BLUE_UP))),_mm256_mul_pd(tmp_qloop_22,_mm256_set_pd(jac_affine_inv_2_1_BLUE_UP,jac_affine_inv_2_1_BLUE_UP,jac_affine_inv_2_1_BLUE_UP,jac_affine_inv_2_1_BLUE_UP))); + const __m256d tmp_qloop_36 = _mm256_mul_pd(tmp_qloop_33,tmp_qloop_35); + const __m256d tmp_qloop_37 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_18,_mm256_set_pd(jac_affine_inv_0_2_BLUE_UP,jac_affine_inv_0_2_BLUE_UP,jac_affine_inv_0_2_BLUE_UP,jac_affine_inv_0_2_BLUE_UP)),_mm256_mul_pd(tmp_qloop_21,_mm256_set_pd(jac_affine_inv_1_2_BLUE_UP,jac_affine_inv_1_2_BLUE_UP,jac_affine_inv_1_2_BLUE_UP,jac_affine_inv_1_2_BLUE_UP))),_mm256_mul_pd(tmp_qloop_22,_mm256_set_pd(jac_affine_inv_2_2_BLUE_UP,jac_affine_inv_2_2_BLUE_UP,jac_affine_inv_2_2_BLUE_UP,jac_affine_inv_2_2_BLUE_UP))); + const __m256d tmp_qloop_38 = _mm256_mul_pd(tmp_qloop_33,tmp_qloop_37); + const __m256d tmp_qloop_39 = _mm256_mul_pd(tmp_qloop_32,_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])); + const __m256d tmp_qloop_40 = _mm256_mul_pd(tmp_qloop_23,tmp_qloop_39); + const __m256d tmp_qloop_41 = _mm256_mul_pd(tmp_qloop_32,_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q])); + const __m256d tmp_qloop_42 = _mm256_mul_pd(tmp_qloop_23,tmp_qloop_41); + const __m256d tmp_qloop_43 = _mm256_mul_pd(tmp_qloop_42,tmp_qloop_9); + const __m256d tmp_qloop_44 = _mm256_mul_pd(tmp_qloop_32,_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])); + const __m256d tmp_qloop_45 = _mm256_mul_pd(tmp_qloop_23,tmp_qloop_44); + const __m256d tmp_qloop_46 = _mm256_mul_pd(tmp_qloop_35,tmp_qloop_39); + const __m256d tmp_qloop_47 = _mm256_mul_pd(tmp_qloop_35,tmp_qloop_41); + const __m256d tmp_qloop_48 = _mm256_mul_pd(tmp_qloop_47,tmp_qloop_9); + const __m256d tmp_qloop_49 = _mm256_mul_pd(tmp_qloop_35,tmp_qloop_44); + const __m256d tmp_qloop_50 = _mm256_mul_pd(tmp_qloop_37,tmp_qloop_39); + const __m256d tmp_qloop_51 = _mm256_mul_pd(tmp_qloop_37,tmp_qloop_41); + const __m256d tmp_qloop_52 = _mm256_mul_pd(tmp_qloop_51,tmp_qloop_9); + const __m256d tmp_qloop_53 = _mm256_mul_pd(tmp_qloop_37,tmp_qloop_44); + const __m256d tmp_qloop_54 = _mm256_mul_pd(tmp_qloop_23,tmp_qloop_32); + const __m256d tmp_qloop_55 = _mm256_mul_pd(tmp_qloop_2,tmp_qloop_8); + const __m256d tmp_qloop_56 = _mm256_mul_pd(tmp_qloop_32,tmp_qloop_55); + const __m256d tmp_qloop_57 = _mm256_mul_pd(tmp_qloop_4,tmp_qloop_54); + const __m256d tmp_qloop_58 = _mm256_mul_pd(tmp_qloop_32,tmp_qloop_4); + const __m256d tmp_qloop_59 = _mm256_mul_pd(tmp_qloop_35,tmp_qloop_58); + const __m256d tmp_qloop_60 = _mm256_mul_pd(tmp_qloop_37,tmp_qloop_58); + const __m256d q_tmp_0_0 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_34); + const __m256d q_tmp_0_1 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_34); + const __m256d q_tmp_0_2 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_34); + const __m256d q_tmp_0_3 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_34); + const __m256d q_tmp_0_4 = _mm256_mul_pd(tmp_qloop_34,tmp_qloop_7); + const __m256d q_tmp_0_5 = _mm256_mul_pd(tmp_qloop_10,tmp_qloop_34); + const __m256d q_tmp_0_6 = _mm256_mul_pd(tmp_qloop_34,tmp_qloop_9); + const __m256d q_tmp_0_7 = _mm256_mul_pd(tmp_qloop_27,tmp_qloop_34); + const __m256d q_tmp_0_8 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_34); + const __m256d q_tmp_0_9 = _mm256_mul_pd(tmp_qloop_31,tmp_qloop_34); + const __m256d q_tmp_0_10 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_36); + const __m256d q_tmp_0_11 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_36); + const __m256d q_tmp_0_12 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_36); + const __m256d q_tmp_0_13 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_36); + const __m256d q_tmp_0_14 = _mm256_mul_pd(tmp_qloop_36,tmp_qloop_7); + const __m256d q_tmp_0_15 = _mm256_mul_pd(tmp_qloop_10,tmp_qloop_36); + const __m256d q_tmp_0_16 = _mm256_mul_pd(tmp_qloop_36,tmp_qloop_9); + const __m256d q_tmp_0_17 = _mm256_mul_pd(tmp_qloop_27,tmp_qloop_36); + const __m256d q_tmp_0_18 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_36); + const __m256d q_tmp_0_19 = _mm256_mul_pd(tmp_qloop_31,tmp_qloop_36); + const __m256d q_tmp_0_20 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_38); + const __m256d q_tmp_0_21 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_38); + const __m256d q_tmp_0_22 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_38); + const __m256d q_tmp_0_23 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_38); + const __m256d q_tmp_0_24 = _mm256_mul_pd(tmp_qloop_38,tmp_qloop_7); + const __m256d q_tmp_0_25 = _mm256_mul_pd(tmp_qloop_10,tmp_qloop_38); + const __m256d q_tmp_0_26 = _mm256_mul_pd(tmp_qloop_38,tmp_qloop_9); + const __m256d q_tmp_0_27 = _mm256_mul_pd(tmp_qloop_27,tmp_qloop_38); + const __m256d q_tmp_0_28 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_38); + const __m256d q_tmp_0_29 = _mm256_mul_pd(tmp_qloop_31,tmp_qloop_38); + const __m256d q_tmp_1_0 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_40); + const __m256d q_tmp_1_1 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_40); + const __m256d q_tmp_1_2 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_40); + const __m256d q_tmp_1_3 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_40); + const __m256d q_tmp_1_4 = tmp_qloop_43; + const __m256d q_tmp_1_5 = _mm256_mul_pd(tmp_qloop_30,tmp_qloop_42); + const __m256d q_tmp_1_6 = _mm256_mul_pd(tmp_qloop_30,tmp_qloop_45); + const __m256d q_tmp_1_7 = _mm256_mul_pd(tmp_qloop_27,tmp_qloop_40); + const __m256d q_tmp_1_8 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_40); + const __m256d q_tmp_1_9 = _mm256_mul_pd(tmp_qloop_31,tmp_qloop_40); + const __m256d q_tmp_1_10 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_46); + const __m256d q_tmp_1_11 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_46); + const __m256d q_tmp_1_12 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_46); + const __m256d q_tmp_1_13 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_46); + const __m256d q_tmp_1_14 = tmp_qloop_48; + const __m256d q_tmp_1_15 = _mm256_mul_pd(tmp_qloop_30,tmp_qloop_47); + const __m256d q_tmp_1_16 = _mm256_mul_pd(tmp_qloop_30,tmp_qloop_49); + const __m256d q_tmp_1_17 = _mm256_mul_pd(tmp_qloop_27,tmp_qloop_46); + const __m256d q_tmp_1_18 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_46); + const __m256d q_tmp_1_19 = _mm256_mul_pd(tmp_qloop_31,tmp_qloop_46); + const __m256d q_tmp_1_20 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_50); + const __m256d q_tmp_1_21 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_50); + const __m256d q_tmp_1_22 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_50); + const __m256d q_tmp_1_23 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_50); + const __m256d q_tmp_1_24 = tmp_qloop_52; + const __m256d q_tmp_1_25 = _mm256_mul_pd(tmp_qloop_30,tmp_qloop_51); + const __m256d q_tmp_1_26 = _mm256_mul_pd(tmp_qloop_30,tmp_qloop_53); + const __m256d q_tmp_1_27 = _mm256_mul_pd(tmp_qloop_27,tmp_qloop_50); + const __m256d q_tmp_1_28 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_50); + const __m256d q_tmp_1_29 = _mm256_mul_pd(tmp_qloop_31,tmp_qloop_50); + const __m256d q_tmp_2_0 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_45); + const __m256d q_tmp_2_1 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_45); + const __m256d q_tmp_2_2 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_45); + const __m256d q_tmp_2_3 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_45); + const __m256d q_tmp_2_4 = _mm256_mul_pd(tmp_qloop_28,tmp_qloop_42); + const __m256d q_tmp_2_5 = tmp_qloop_43; + const __m256d q_tmp_2_6 = _mm256_mul_pd(tmp_qloop_54,tmp_qloop_55); + const __m256d q_tmp_2_7 = _mm256_mul_pd(tmp_qloop_27,tmp_qloop_45); + const __m256d q_tmp_2_8 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_45); + const __m256d q_tmp_2_9 = _mm256_mul_pd(tmp_qloop_31,tmp_qloop_45); + const __m256d q_tmp_2_10 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_49); + const __m256d q_tmp_2_11 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_49); + const __m256d q_tmp_2_12 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_49); + const __m256d q_tmp_2_13 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_49); + const __m256d q_tmp_2_14 = _mm256_mul_pd(tmp_qloop_28,tmp_qloop_47); + const __m256d q_tmp_2_15 = tmp_qloop_48; + const __m256d q_tmp_2_16 = _mm256_mul_pd(tmp_qloop_35,tmp_qloop_56); + const __m256d q_tmp_2_17 = _mm256_mul_pd(tmp_qloop_27,tmp_qloop_49); + const __m256d q_tmp_2_18 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_49); + const __m256d q_tmp_2_19 = _mm256_mul_pd(tmp_qloop_31,tmp_qloop_49); + const __m256d q_tmp_2_20 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_53); + const __m256d q_tmp_2_21 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_53); + const __m256d q_tmp_2_22 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_53); + const __m256d q_tmp_2_23 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_53); + const __m256d q_tmp_2_24 = _mm256_mul_pd(tmp_qloop_28,tmp_qloop_51); + const __m256d q_tmp_2_25 = tmp_qloop_52; + const __m256d q_tmp_2_26 = _mm256_mul_pd(tmp_qloop_37,tmp_qloop_56); + const __m256d q_tmp_2_27 = _mm256_mul_pd(tmp_qloop_27,tmp_qloop_53); + const __m256d q_tmp_2_28 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_53); + const __m256d q_tmp_2_29 = _mm256_mul_pd(tmp_qloop_31,tmp_qloop_53); + const __m256d q_tmp_3_0 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_42); + const __m256d q_tmp_3_1 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_42); + const __m256d q_tmp_3_2 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_42); + const __m256d q_tmp_3_3 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_42); + const __m256d q_tmp_3_4 = _mm256_mul_pd(tmp_qloop_57,tmp_qloop_6); + const __m256d q_tmp_3_5 = _mm256_mul_pd(tmp_qloop_57,tmp_qloop_8); + const __m256d q_tmp_3_6 = tmp_qloop_43; + const __m256d q_tmp_3_7 = _mm256_mul_pd(tmp_qloop_27,tmp_qloop_42); + const __m256d q_tmp_3_8 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_42); + const __m256d q_tmp_3_9 = _mm256_mul_pd(tmp_qloop_31,tmp_qloop_42); + const __m256d q_tmp_3_10 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_47); + const __m256d q_tmp_3_11 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_47); + const __m256d q_tmp_3_12 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_47); + const __m256d q_tmp_3_13 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_47); + const __m256d q_tmp_3_14 = _mm256_mul_pd(tmp_qloop_59,tmp_qloop_6); + const __m256d q_tmp_3_15 = _mm256_mul_pd(tmp_qloop_59,tmp_qloop_8); + const __m256d q_tmp_3_16 = tmp_qloop_48; + const __m256d q_tmp_3_17 = _mm256_mul_pd(tmp_qloop_27,tmp_qloop_47); + const __m256d q_tmp_3_18 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_47); + const __m256d q_tmp_3_19 = _mm256_mul_pd(tmp_qloop_31,tmp_qloop_47); + const __m256d q_tmp_3_20 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_51); + const __m256d q_tmp_3_21 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_51); + const __m256d q_tmp_3_22 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_51); + const __m256d q_tmp_3_23 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_51); + const __m256d q_tmp_3_24 = _mm256_mul_pd(tmp_qloop_6,tmp_qloop_60); + const __m256d q_tmp_3_25 = _mm256_mul_pd(tmp_qloop_60,tmp_qloop_8); + const __m256d q_tmp_3_26 = tmp_qloop_52; + const __m256d q_tmp_3_27 = _mm256_mul_pd(tmp_qloop_27,tmp_qloop_51); + const __m256d q_tmp_3_28 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_51); + const __m256d q_tmp_3_29 = _mm256_mul_pd(tmp_qloop_31,tmp_qloop_51); + q_acc_0_0 = _mm256_add_pd(q_acc_0_0,q_tmp_0_0); + q_acc_0_1 = _mm256_add_pd(q_acc_0_1,q_tmp_0_1); + q_acc_0_2 = _mm256_add_pd(q_acc_0_2,q_tmp_0_2); + q_acc_0_3 = _mm256_add_pd(q_acc_0_3,q_tmp_0_3); + q_acc_0_4 = _mm256_add_pd(q_acc_0_4,q_tmp_0_4); + q_acc_0_5 = _mm256_add_pd(q_acc_0_5,q_tmp_0_5); + q_acc_0_6 = _mm256_add_pd(q_acc_0_6,q_tmp_0_6); + q_acc_0_7 = _mm256_add_pd(q_acc_0_7,q_tmp_0_7); + q_acc_0_8 = _mm256_add_pd(q_acc_0_8,q_tmp_0_8); + q_acc_0_9 = _mm256_add_pd(q_acc_0_9,q_tmp_0_9); + q_acc_0_10 = _mm256_add_pd(q_acc_0_10,q_tmp_0_10); + q_acc_0_11 = _mm256_add_pd(q_acc_0_11,q_tmp_0_11); + q_acc_0_12 = _mm256_add_pd(q_acc_0_12,q_tmp_0_12); + q_acc_0_13 = _mm256_add_pd(q_acc_0_13,q_tmp_0_13); + q_acc_0_14 = _mm256_add_pd(q_acc_0_14,q_tmp_0_14); + q_acc_0_15 = _mm256_add_pd(q_acc_0_15,q_tmp_0_15); + q_acc_0_16 = _mm256_add_pd(q_acc_0_16,q_tmp_0_16); + q_acc_0_17 = _mm256_add_pd(q_acc_0_17,q_tmp_0_17); + q_acc_0_18 = _mm256_add_pd(q_acc_0_18,q_tmp_0_18); + q_acc_0_19 = _mm256_add_pd(q_acc_0_19,q_tmp_0_19); + q_acc_0_20 = _mm256_add_pd(q_acc_0_20,q_tmp_0_20); + q_acc_0_21 = _mm256_add_pd(q_acc_0_21,q_tmp_0_21); + q_acc_0_22 = _mm256_add_pd(q_acc_0_22,q_tmp_0_22); + q_acc_0_23 = _mm256_add_pd(q_acc_0_23,q_tmp_0_23); + q_acc_0_24 = _mm256_add_pd(q_acc_0_24,q_tmp_0_24); + q_acc_0_25 = _mm256_add_pd(q_acc_0_25,q_tmp_0_25); + q_acc_0_26 = _mm256_add_pd(q_acc_0_26,q_tmp_0_26); + q_acc_0_27 = _mm256_add_pd(q_acc_0_27,q_tmp_0_27); + q_acc_0_28 = _mm256_add_pd(q_acc_0_28,q_tmp_0_28); + q_acc_0_29 = _mm256_add_pd(q_acc_0_29,q_tmp_0_29); + q_acc_1_0 = _mm256_add_pd(q_acc_1_0,q_tmp_1_0); + q_acc_1_1 = _mm256_add_pd(q_acc_1_1,q_tmp_1_1); + q_acc_1_2 = _mm256_add_pd(q_acc_1_2,q_tmp_1_2); + q_acc_1_3 = _mm256_add_pd(q_acc_1_3,q_tmp_1_3); + q_acc_1_4 = _mm256_add_pd(q_acc_1_4,q_tmp_1_4); + q_acc_1_5 = _mm256_add_pd(q_acc_1_5,q_tmp_1_5); + q_acc_1_6 = _mm256_add_pd(q_acc_1_6,q_tmp_1_6); + q_acc_1_7 = _mm256_add_pd(q_acc_1_7,q_tmp_1_7); + q_acc_1_8 = _mm256_add_pd(q_acc_1_8,q_tmp_1_8); + q_acc_1_9 = _mm256_add_pd(q_acc_1_9,q_tmp_1_9); + q_acc_1_10 = _mm256_add_pd(q_acc_1_10,q_tmp_1_10); + q_acc_1_11 = _mm256_add_pd(q_acc_1_11,q_tmp_1_11); + q_acc_1_12 = _mm256_add_pd(q_acc_1_12,q_tmp_1_12); + q_acc_1_13 = _mm256_add_pd(q_acc_1_13,q_tmp_1_13); + q_acc_1_14 = _mm256_add_pd(q_acc_1_14,q_tmp_1_14); + q_acc_1_15 = _mm256_add_pd(q_acc_1_15,q_tmp_1_15); + q_acc_1_16 = _mm256_add_pd(q_acc_1_16,q_tmp_1_16); + q_acc_1_17 = _mm256_add_pd(q_acc_1_17,q_tmp_1_17); + q_acc_1_18 = _mm256_add_pd(q_acc_1_18,q_tmp_1_18); + q_acc_1_19 = _mm256_add_pd(q_acc_1_19,q_tmp_1_19); + q_acc_1_20 = _mm256_add_pd(q_acc_1_20,q_tmp_1_20); + q_acc_1_21 = _mm256_add_pd(q_acc_1_21,q_tmp_1_21); + q_acc_1_22 = _mm256_add_pd(q_acc_1_22,q_tmp_1_22); + q_acc_1_23 = _mm256_add_pd(q_acc_1_23,q_tmp_1_23); + q_acc_1_24 = _mm256_add_pd(q_acc_1_24,q_tmp_1_24); + q_acc_1_25 = _mm256_add_pd(q_acc_1_25,q_tmp_1_25); + q_acc_1_26 = _mm256_add_pd(q_acc_1_26,q_tmp_1_26); + q_acc_1_27 = _mm256_add_pd(q_acc_1_27,q_tmp_1_27); + q_acc_1_28 = _mm256_add_pd(q_acc_1_28,q_tmp_1_28); + q_acc_1_29 = _mm256_add_pd(q_acc_1_29,q_tmp_1_29); + q_acc_2_0 = _mm256_add_pd(q_acc_2_0,q_tmp_2_0); + q_acc_2_1 = _mm256_add_pd(q_acc_2_1,q_tmp_2_1); + q_acc_2_2 = _mm256_add_pd(q_acc_2_2,q_tmp_2_2); + q_acc_2_3 = _mm256_add_pd(q_acc_2_3,q_tmp_2_3); + q_acc_2_4 = _mm256_add_pd(q_acc_2_4,q_tmp_2_4); + q_acc_2_5 = _mm256_add_pd(q_acc_2_5,q_tmp_2_5); + q_acc_2_6 = _mm256_add_pd(q_acc_2_6,q_tmp_2_6); + q_acc_2_7 = _mm256_add_pd(q_acc_2_7,q_tmp_2_7); + q_acc_2_8 = _mm256_add_pd(q_acc_2_8,q_tmp_2_8); + q_acc_2_9 = _mm256_add_pd(q_acc_2_9,q_tmp_2_9); + q_acc_2_10 = _mm256_add_pd(q_acc_2_10,q_tmp_2_10); + q_acc_2_11 = _mm256_add_pd(q_acc_2_11,q_tmp_2_11); + q_acc_2_12 = _mm256_add_pd(q_acc_2_12,q_tmp_2_12); + q_acc_2_13 = _mm256_add_pd(q_acc_2_13,q_tmp_2_13); + q_acc_2_14 = _mm256_add_pd(q_acc_2_14,q_tmp_2_14); + q_acc_2_15 = _mm256_add_pd(q_acc_2_15,q_tmp_2_15); + q_acc_2_16 = _mm256_add_pd(q_acc_2_16,q_tmp_2_16); + q_acc_2_17 = _mm256_add_pd(q_acc_2_17,q_tmp_2_17); + q_acc_2_18 = _mm256_add_pd(q_acc_2_18,q_tmp_2_18); + q_acc_2_19 = _mm256_add_pd(q_acc_2_19,q_tmp_2_19); + q_acc_2_20 = _mm256_add_pd(q_acc_2_20,q_tmp_2_20); + q_acc_2_21 = _mm256_add_pd(q_acc_2_21,q_tmp_2_21); + q_acc_2_22 = _mm256_add_pd(q_acc_2_22,q_tmp_2_22); + q_acc_2_23 = _mm256_add_pd(q_acc_2_23,q_tmp_2_23); + q_acc_2_24 = _mm256_add_pd(q_acc_2_24,q_tmp_2_24); + q_acc_2_25 = _mm256_add_pd(q_acc_2_25,q_tmp_2_25); + q_acc_2_26 = _mm256_add_pd(q_acc_2_26,q_tmp_2_26); + q_acc_2_27 = _mm256_add_pd(q_acc_2_27,q_tmp_2_27); + q_acc_2_28 = _mm256_add_pd(q_acc_2_28,q_tmp_2_28); + q_acc_2_29 = _mm256_add_pd(q_acc_2_29,q_tmp_2_29); + q_acc_3_0 = _mm256_add_pd(q_acc_3_0,q_tmp_3_0); + q_acc_3_1 = _mm256_add_pd(q_acc_3_1,q_tmp_3_1); + q_acc_3_2 = _mm256_add_pd(q_acc_3_2,q_tmp_3_2); + q_acc_3_3 = _mm256_add_pd(q_acc_3_3,q_tmp_3_3); + q_acc_3_4 = _mm256_add_pd(q_acc_3_4,q_tmp_3_4); + q_acc_3_5 = _mm256_add_pd(q_acc_3_5,q_tmp_3_5); + q_acc_3_6 = _mm256_add_pd(q_acc_3_6,q_tmp_3_6); + q_acc_3_7 = _mm256_add_pd(q_acc_3_7,q_tmp_3_7); + q_acc_3_8 = _mm256_add_pd(q_acc_3_8,q_tmp_3_8); + q_acc_3_9 = _mm256_add_pd(q_acc_3_9,q_tmp_3_9); + q_acc_3_10 = _mm256_add_pd(q_acc_3_10,q_tmp_3_10); + q_acc_3_11 = _mm256_add_pd(q_acc_3_11,q_tmp_3_11); + q_acc_3_12 = _mm256_add_pd(q_acc_3_12,q_tmp_3_12); + q_acc_3_13 = _mm256_add_pd(q_acc_3_13,q_tmp_3_13); + q_acc_3_14 = _mm256_add_pd(q_acc_3_14,q_tmp_3_14); + q_acc_3_15 = _mm256_add_pd(q_acc_3_15,q_tmp_3_15); + q_acc_3_16 = _mm256_add_pd(q_acc_3_16,q_tmp_3_16); + q_acc_3_17 = _mm256_add_pd(q_acc_3_17,q_tmp_3_17); + q_acc_3_18 = _mm256_add_pd(q_acc_3_18,q_tmp_3_18); + q_acc_3_19 = _mm256_add_pd(q_acc_3_19,q_tmp_3_19); + q_acc_3_20 = _mm256_add_pd(q_acc_3_20,q_tmp_3_20); + q_acc_3_21 = _mm256_add_pd(q_acc_3_21,q_tmp_3_21); + q_acc_3_22 = _mm256_add_pd(q_acc_3_22,q_tmp_3_22); + q_acc_3_23 = _mm256_add_pd(q_acc_3_23,q_tmp_3_23); + q_acc_3_24 = _mm256_add_pd(q_acc_3_24,q_tmp_3_24); + q_acc_3_25 = _mm256_add_pd(q_acc_3_25,q_tmp_3_25); + q_acc_3_26 = _mm256_add_pd(q_acc_3_26,q_tmp_3_26); + q_acc_3_27 = _mm256_add_pd(q_acc_3_27,q_tmp_3_27); + q_acc_3_28 = _mm256_add_pd(q_acc_3_28,q_tmp_3_28); + q_acc_3_29 = _mm256_add_pd(q_acc_3_29,q_tmp_3_29); + } + const __m256d elMatVec_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_0_0,src_dof_0),_mm256_mul_pd(q_acc_0_1,src_dof_1)),_mm256_mul_pd(q_acc_0_10,src_dof_10)),_mm256_mul_pd(q_acc_0_11,src_dof_11)),_mm256_mul_pd(q_acc_0_12,src_dof_12)),_mm256_mul_pd(q_acc_0_13,src_dof_13)),_mm256_mul_pd(q_acc_0_14,src_dof_14)),_mm256_mul_pd(q_acc_0_15,src_dof_15)),_mm256_mul_pd(q_acc_0_16,src_dof_16)),_mm256_mul_pd(q_acc_0_17,src_dof_17)),_mm256_mul_pd(q_acc_0_18,src_dof_18)),_mm256_mul_pd(q_acc_0_19,src_dof_19)),_mm256_mul_pd(q_acc_0_2,src_dof_2)),_mm256_mul_pd(q_acc_0_20,src_dof_20)),_mm256_mul_pd(q_acc_0_21,src_dof_21)),_mm256_mul_pd(q_acc_0_22,src_dof_22)),_mm256_mul_pd(q_acc_0_23,src_dof_23)),_mm256_mul_pd(q_acc_0_24,src_dof_24)),_mm256_mul_pd(q_acc_0_25,src_dof_25)),_mm256_mul_pd(q_acc_0_26,src_dof_26)),_mm256_mul_pd(q_acc_0_27,src_dof_27)),_mm256_mul_pd(q_acc_0_28,src_dof_28)),_mm256_mul_pd(q_acc_0_29,src_dof_29)),_mm256_mul_pd(q_acc_0_3,src_dof_3)),_mm256_mul_pd(q_acc_0_4,src_dof_4)),_mm256_mul_pd(q_acc_0_5,src_dof_5)),_mm256_mul_pd(q_acc_0_6,src_dof_6)),_mm256_mul_pd(q_acc_0_7,src_dof_7)),_mm256_mul_pd(q_acc_0_8,src_dof_8)),_mm256_mul_pd(q_acc_0_9,src_dof_9)); + const __m256d elMatVec_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_1_0,src_dof_0),_mm256_mul_pd(q_acc_1_1,src_dof_1)),_mm256_mul_pd(q_acc_1_10,src_dof_10)),_mm256_mul_pd(q_acc_1_11,src_dof_11)),_mm256_mul_pd(q_acc_1_12,src_dof_12)),_mm256_mul_pd(q_acc_1_13,src_dof_13)),_mm256_mul_pd(q_acc_1_14,src_dof_14)),_mm256_mul_pd(q_acc_1_15,src_dof_15)),_mm256_mul_pd(q_acc_1_16,src_dof_16)),_mm256_mul_pd(q_acc_1_17,src_dof_17)),_mm256_mul_pd(q_acc_1_18,src_dof_18)),_mm256_mul_pd(q_acc_1_19,src_dof_19)),_mm256_mul_pd(q_acc_1_2,src_dof_2)),_mm256_mul_pd(q_acc_1_20,src_dof_20)),_mm256_mul_pd(q_acc_1_21,src_dof_21)),_mm256_mul_pd(q_acc_1_22,src_dof_22)),_mm256_mul_pd(q_acc_1_23,src_dof_23)),_mm256_mul_pd(q_acc_1_24,src_dof_24)),_mm256_mul_pd(q_acc_1_25,src_dof_25)),_mm256_mul_pd(q_acc_1_26,src_dof_26)),_mm256_mul_pd(q_acc_1_27,src_dof_27)),_mm256_mul_pd(q_acc_1_28,src_dof_28)),_mm256_mul_pd(q_acc_1_29,src_dof_29)),_mm256_mul_pd(q_acc_1_3,src_dof_3)),_mm256_mul_pd(q_acc_1_4,src_dof_4)),_mm256_mul_pd(q_acc_1_5,src_dof_5)),_mm256_mul_pd(q_acc_1_6,src_dof_6)),_mm256_mul_pd(q_acc_1_7,src_dof_7)),_mm256_mul_pd(q_acc_1_8,src_dof_8)),_mm256_mul_pd(q_acc_1_9,src_dof_9)); + const __m256d elMatVec_2 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_2_0,src_dof_0),_mm256_mul_pd(q_acc_2_1,src_dof_1)),_mm256_mul_pd(q_acc_2_10,src_dof_10)),_mm256_mul_pd(q_acc_2_11,src_dof_11)),_mm256_mul_pd(q_acc_2_12,src_dof_12)),_mm256_mul_pd(q_acc_2_13,src_dof_13)),_mm256_mul_pd(q_acc_2_14,src_dof_14)),_mm256_mul_pd(q_acc_2_15,src_dof_15)),_mm256_mul_pd(q_acc_2_16,src_dof_16)),_mm256_mul_pd(q_acc_2_17,src_dof_17)),_mm256_mul_pd(q_acc_2_18,src_dof_18)),_mm256_mul_pd(q_acc_2_19,src_dof_19)),_mm256_mul_pd(q_acc_2_2,src_dof_2)),_mm256_mul_pd(q_acc_2_20,src_dof_20)),_mm256_mul_pd(q_acc_2_21,src_dof_21)),_mm256_mul_pd(q_acc_2_22,src_dof_22)),_mm256_mul_pd(q_acc_2_23,src_dof_23)),_mm256_mul_pd(q_acc_2_24,src_dof_24)),_mm256_mul_pd(q_acc_2_25,src_dof_25)),_mm256_mul_pd(q_acc_2_26,src_dof_26)),_mm256_mul_pd(q_acc_2_27,src_dof_27)),_mm256_mul_pd(q_acc_2_28,src_dof_28)),_mm256_mul_pd(q_acc_2_29,src_dof_29)),_mm256_mul_pd(q_acc_2_3,src_dof_3)),_mm256_mul_pd(q_acc_2_4,src_dof_4)),_mm256_mul_pd(q_acc_2_5,src_dof_5)),_mm256_mul_pd(q_acc_2_6,src_dof_6)),_mm256_mul_pd(q_acc_2_7,src_dof_7)),_mm256_mul_pd(q_acc_2_8,src_dof_8)),_mm256_mul_pd(q_acc_2_9,src_dof_9)); + const __m256d elMatVec_3 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_3_0,src_dof_0),_mm256_mul_pd(q_acc_3_1,src_dof_1)),_mm256_mul_pd(q_acc_3_10,src_dof_10)),_mm256_mul_pd(q_acc_3_11,src_dof_11)),_mm256_mul_pd(q_acc_3_12,src_dof_12)),_mm256_mul_pd(q_acc_3_13,src_dof_13)),_mm256_mul_pd(q_acc_3_14,src_dof_14)),_mm256_mul_pd(q_acc_3_15,src_dof_15)),_mm256_mul_pd(q_acc_3_16,src_dof_16)),_mm256_mul_pd(q_acc_3_17,src_dof_17)),_mm256_mul_pd(q_acc_3_18,src_dof_18)),_mm256_mul_pd(q_acc_3_19,src_dof_19)),_mm256_mul_pd(q_acc_3_2,src_dof_2)),_mm256_mul_pd(q_acc_3_20,src_dof_20)),_mm256_mul_pd(q_acc_3_21,src_dof_21)),_mm256_mul_pd(q_acc_3_22,src_dof_22)),_mm256_mul_pd(q_acc_3_23,src_dof_23)),_mm256_mul_pd(q_acc_3_24,src_dof_24)),_mm256_mul_pd(q_acc_3_25,src_dof_25)),_mm256_mul_pd(q_acc_3_26,src_dof_26)),_mm256_mul_pd(q_acc_3_27,src_dof_27)),_mm256_mul_pd(q_acc_3_28,src_dof_28)),_mm256_mul_pd(q_acc_3_29,src_dof_29)),_mm256_mul_pd(q_acc_3_3,src_dof_3)),_mm256_mul_pd(q_acc_3_4,src_dof_4)),_mm256_mul_pd(q_acc_3_5,src_dof_5)),_mm256_mul_pd(q_acc_3_6,src_dof_6)),_mm256_mul_pd(q_acc_3_7,src_dof_7)),_mm256_mul_pd(q_acc_3_8,src_dof_8)),_mm256_mul_pd(q_acc_3_9,src_dof_9)); + _mm256_storeu_pd(&_data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1],_mm256_add_pd(elMatVec_0,_mm256_loadu_pd(& _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]))); + _mm256_storeu_pd(&_data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))],_mm256_add_pd(elMatVec_1,_mm256_loadu_pd(& _data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]))); + _mm256_storeu_pd(&_data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1],_mm256_add_pd(elMatVec_2,_mm256_loadu_pd(& _data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]))); + _mm256_storeu_pd(&_data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1],_mm256_add_pd(elMatVec_3,_mm256_loadu_pd(& _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]))); + } + for (int64_t ctr_0 = (int64_t)((-ctr_1 - ctr_2 + micro_edges_per_macro_edge - 1) / (4)) * (4); ctr_0 < -ctr_1 - ctr_2 + micro_edges_per_macro_edge - 1; ctr_0 += 1) + { + + const int64_t phantom_ctr_0 = ctr_0; + real_t _data_float_loop_ctr_array_dim_0[4]; + _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0; + _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1; + _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2; + _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3; + real_t _data_float_loop_ctr_array_dim_1[4]; + _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1; + real_t _data_float_loop_ctr_array_dim_2[4]; + _data_float_loop_ctr_array_dim_2[0] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[1] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[2] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[3] = (real_t) ctr_2; + + const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_0_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_2_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_3_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_3_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_3_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t src_dof_0 = _data_src_vertex_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]; + const real_t src_dof_1 = _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]; + const real_t src_dof_2 = _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]; + const real_t src_dof_3 = _data_src_vertex_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]; + const real_t src_dof_4 = _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]; + const real_t src_dof_5 = _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge - 1)*(micro_edges_per_macro_edge + 1)) / (6)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]; + const real_t src_dof_6 = _data_src_edge_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t src_dof_7 = _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]; + const real_t src_dof_8 = _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]; + const real_t src_dof_9 = _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t src_dof_10 = _data_src_vertex_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]; + const real_t src_dof_11 = _data_src_vertex_1[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]; + const real_t src_dof_12 = _data_src_vertex_1[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]; + const real_t src_dof_13 = _data_src_vertex_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]; + const real_t src_dof_14 = _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]; + const real_t src_dof_15 = _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge - 1)*(micro_edges_per_macro_edge + 1)) / (6)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]; + const real_t src_dof_16 = _data_src_edge_1[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t src_dof_17 = _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]; + const real_t src_dof_18 = _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]; + const real_t src_dof_19 = _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t src_dof_20 = _data_src_vertex_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]; + const real_t src_dof_21 = _data_src_vertex_2[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]; + const real_t src_dof_22 = _data_src_vertex_2[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]; + const real_t src_dof_23 = _data_src_vertex_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]; + const real_t src_dof_24 = _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]; + const real_t src_dof_25 = _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge - 1)*(micro_edges_per_macro_edge + 1)) / (6)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]; + const real_t src_dof_26 = _data_src_edge_2[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t src_dof_27 = _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]; + const real_t src_dof_28 = _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]; + const real_t src_dof_29 = _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t rho_dof_0 = _data_rhoVertex[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]; + const real_t rho_dof_1 = _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]; + const real_t rho_dof_2 = _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]; + const real_t rho_dof_3 = _data_rhoVertex[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]; + const real_t rho_dof_4 = _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]; + const real_t rho_dof_5 = _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge - 1)*(micro_edges_per_macro_edge + 1)) / (6)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]; + const real_t rho_dof_6 = _data_rhoEdge[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t rho_dof_7 = _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]; + const real_t rho_dof_8 = _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]; + const real_t rho_dof_9 = _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + real_t q_acc_0_0 = 0.0; + real_t q_acc_0_1 = 0.0; + real_t q_acc_0_2 = 0.0; + real_t q_acc_0_3 = 0.0; + real_t q_acc_0_4 = 0.0; + real_t q_acc_0_5 = 0.0; + real_t q_acc_0_6 = 0.0; + real_t q_acc_0_7 = 0.0; + real_t q_acc_0_8 = 0.0; + real_t q_acc_0_9 = 0.0; + real_t q_acc_0_10 = 0.0; + real_t q_acc_0_11 = 0.0; + real_t q_acc_0_12 = 0.0; + real_t q_acc_0_13 = 0.0; + real_t q_acc_0_14 = 0.0; + real_t q_acc_0_15 = 0.0; + real_t q_acc_0_16 = 0.0; + real_t q_acc_0_17 = 0.0; + real_t q_acc_0_18 = 0.0; + real_t q_acc_0_19 = 0.0; + real_t q_acc_0_20 = 0.0; + real_t q_acc_0_21 = 0.0; + real_t q_acc_0_22 = 0.0; + real_t q_acc_0_23 = 0.0; + real_t q_acc_0_24 = 0.0; + real_t q_acc_0_25 = 0.0; + real_t q_acc_0_26 = 0.0; + real_t q_acc_0_27 = 0.0; + real_t q_acc_0_28 = 0.0; + real_t q_acc_0_29 = 0.0; + real_t q_acc_1_0 = 0.0; + real_t q_acc_1_1 = 0.0; + real_t q_acc_1_2 = 0.0; + real_t q_acc_1_3 = 0.0; + real_t q_acc_1_4 = 0.0; + real_t q_acc_1_5 = 0.0; + real_t q_acc_1_6 = 0.0; + real_t q_acc_1_7 = 0.0; + real_t q_acc_1_8 = 0.0; + real_t q_acc_1_9 = 0.0; + real_t q_acc_1_10 = 0.0; + real_t q_acc_1_11 = 0.0; + real_t q_acc_1_12 = 0.0; + real_t q_acc_1_13 = 0.0; + real_t q_acc_1_14 = 0.0; + real_t q_acc_1_15 = 0.0; + real_t q_acc_1_16 = 0.0; + real_t q_acc_1_17 = 0.0; + real_t q_acc_1_18 = 0.0; + real_t q_acc_1_19 = 0.0; + real_t q_acc_1_20 = 0.0; + real_t q_acc_1_21 = 0.0; + real_t q_acc_1_22 = 0.0; + real_t q_acc_1_23 = 0.0; + real_t q_acc_1_24 = 0.0; + real_t q_acc_1_25 = 0.0; + real_t q_acc_1_26 = 0.0; + real_t q_acc_1_27 = 0.0; + real_t q_acc_1_28 = 0.0; + real_t q_acc_1_29 = 0.0; + real_t q_acc_2_0 = 0.0; + real_t q_acc_2_1 = 0.0; + real_t q_acc_2_2 = 0.0; + real_t q_acc_2_3 = 0.0; + real_t q_acc_2_4 = 0.0; + real_t q_acc_2_5 = 0.0; + real_t q_acc_2_6 = 0.0; + real_t q_acc_2_7 = 0.0; + real_t q_acc_2_8 = 0.0; + real_t q_acc_2_9 = 0.0; + real_t q_acc_2_10 = 0.0; + real_t q_acc_2_11 = 0.0; + real_t q_acc_2_12 = 0.0; + real_t q_acc_2_13 = 0.0; + real_t q_acc_2_14 = 0.0; + real_t q_acc_2_15 = 0.0; + real_t q_acc_2_16 = 0.0; + real_t q_acc_2_17 = 0.0; + real_t q_acc_2_18 = 0.0; + real_t q_acc_2_19 = 0.0; + real_t q_acc_2_20 = 0.0; + real_t q_acc_2_21 = 0.0; + real_t q_acc_2_22 = 0.0; + real_t q_acc_2_23 = 0.0; + real_t q_acc_2_24 = 0.0; + real_t q_acc_2_25 = 0.0; + real_t q_acc_2_26 = 0.0; + real_t q_acc_2_27 = 0.0; + real_t q_acc_2_28 = 0.0; + real_t q_acc_2_29 = 0.0; + real_t q_acc_3_0 = 0.0; + real_t q_acc_3_1 = 0.0; + real_t q_acc_3_2 = 0.0; + real_t q_acc_3_3 = 0.0; + real_t q_acc_3_4 = 0.0; + real_t q_acc_3_5 = 0.0; + real_t q_acc_3_6 = 0.0; + real_t q_acc_3_7 = 0.0; + real_t q_acc_3_8 = 0.0; + real_t q_acc_3_9 = 0.0; + real_t q_acc_3_10 = 0.0; + real_t q_acc_3_11 = 0.0; + real_t q_acc_3_12 = 0.0; + real_t q_acc_3_13 = 0.0; + real_t q_acc_3_14 = 0.0; + real_t q_acc_3_15 = 0.0; + real_t q_acc_3_16 = 0.0; + real_t q_acc_3_17 = 0.0; + real_t q_acc_3_18 = 0.0; + real_t q_acc_3_19 = 0.0; + real_t q_acc_3_20 = 0.0; + real_t q_acc_3_21 = 0.0; + real_t q_acc_3_22 = 0.0; + real_t q_acc_3_23 = 0.0; + real_t q_acc_3_24 = 0.0; + real_t q_acc_3_25 = 0.0; + real_t q_acc_3_26 = 0.0; + real_t q_acc_3_27 = 0.0; + real_t q_acc_3_28 = 0.0; + real_t q_acc_3_29 = 0.0; + for (int64_t q = 0; q < 5; q += 1) + { + const real_t tmp_qloop_0 = (_data_q_p_0[q]*_data_q_p_0[q]); + const real_t tmp_qloop_1 = tmp_qloop_0*2.0; + const real_t tmp_qloop_2 = (_data_q_p_1[q]*_data_q_p_1[q]); + const real_t tmp_qloop_3 = tmp_qloop_2*2.0; + const real_t tmp_qloop_4 = (_data_q_p_2[q]*_data_q_p_2[q]); + const real_t tmp_qloop_5 = tmp_qloop_4*2.0; + const real_t tmp_qloop_6 = 4.0*_data_q_p_1[q]; + const real_t tmp_qloop_7 = tmp_qloop_6*_data_q_p_2[q]; + const real_t tmp_qloop_8 = 4.0*_data_q_p_0[q]; + const real_t tmp_qloop_9 = tmp_qloop_8*_data_q_p_1[q]; + const real_t tmp_qloop_10 = tmp_qloop_8*_data_q_p_2[q]; + const real_t tmp_qloop_11 = tmp_qloop_10 + tmp_qloop_9; + const real_t tmp_qloop_12 = tmp_qloop_1 + tmp_qloop_11 + tmp_qloop_3 + tmp_qloop_5 + tmp_qloop_7 - 3.0*_data_q_p_0[q] - 3.0*_data_q_p_1[q] - 3.0*_data_q_p_2[q] + 1.0; + const real_t tmp_qloop_13 = 4.0*_data_q_p_2[q]; + const real_t tmp_qloop_14 = tmp_qloop_13 + tmp_qloop_6; + const real_t tmp_qloop_15 = -rho_dof_8*tmp_qloop_6; + const real_t tmp_qloop_16 = rho_dof_0*(tmp_qloop_14 + tmp_qloop_8 - 3.0); + const real_t tmp_qloop_17 = -rho_dof_7*tmp_qloop_13 + tmp_qloop_16; + const real_t tmp_qloop_18 = rho_dof_1*(tmp_qloop_8 - 1.0) + rho_dof_5*tmp_qloop_13 + rho_dof_6*tmp_qloop_6 + rho_dof_9*(-tmp_qloop_14 - 8.0*_data_q_p_0[q] + 4.0) + tmp_qloop_15 + tmp_qloop_17; + const real_t tmp_qloop_19 = tmp_qloop_8 - 4.0; + const real_t tmp_qloop_20 = -rho_dof_9*tmp_qloop_8; + const real_t tmp_qloop_21 = rho_dof_2*(tmp_qloop_6 - 1.0) + rho_dof_4*tmp_qloop_13 + rho_dof_6*tmp_qloop_8 + rho_dof_8*(-tmp_qloop_13 - tmp_qloop_19 - 8.0*_data_q_p_1[q]) + tmp_qloop_17 + tmp_qloop_20; + const real_t tmp_qloop_22 = rho_dof_3*(tmp_qloop_13 - 1.0) + rho_dof_4*tmp_qloop_6 + rho_dof_5*tmp_qloop_8 + rho_dof_7*(-tmp_qloop_19 - tmp_qloop_6 - 8.0*_data_q_p_2[q]) + tmp_qloop_15 + tmp_qloop_16 + tmp_qloop_20; + const real_t tmp_qloop_23 = jac_affine_inv_0_0_BLUE_UP*tmp_qloop_18 + jac_affine_inv_1_0_BLUE_UP*tmp_qloop_21 + jac_affine_inv_2_0_BLUE_UP*tmp_qloop_22; + const real_t tmp_qloop_24 = tmp_qloop_1 - _data_q_p_0[q]; + const real_t tmp_qloop_25 = tmp_qloop_3 - _data_q_p_1[q]; + const real_t tmp_qloop_26 = tmp_qloop_5 - _data_q_p_2[q]; + const real_t tmp_qloop_27 = -tmp_qloop_10 + tmp_qloop_13 + tmp_qloop_4*-4.0 - tmp_qloop_7; + const real_t tmp_qloop_28 = tmp_qloop_2*4.0; + const real_t tmp_qloop_29 = -tmp_qloop_28 + tmp_qloop_6 - tmp_qloop_7 - tmp_qloop_9; + const real_t tmp_qloop_30 = tmp_qloop_0*4.0; + const real_t tmp_qloop_31 = -tmp_qloop_11 - tmp_qloop_30 + tmp_qloop_8; + const real_t tmp_qloop_32 = abs_det_jac_affine_BLUE_UP*1.0 / (rho_dof_0*tmp_qloop_12 + rho_dof_1*tmp_qloop_24 + rho_dof_2*tmp_qloop_25 + rho_dof_3*tmp_qloop_26 + rho_dof_4*tmp_qloop_7 + rho_dof_5*tmp_qloop_10 + rho_dof_6*tmp_qloop_9 + rho_dof_7*tmp_qloop_27 + rho_dof_8*tmp_qloop_29 + rho_dof_9*tmp_qloop_31)*_data_q_w[q]; + const real_t tmp_qloop_33 = tmp_qloop_32*(1.0 - _data_q_p_0[q] - _data_q_p_1[q] - _data_q_p_2[q]); + const real_t tmp_qloop_34 = tmp_qloop_23*tmp_qloop_33; + const real_t tmp_qloop_35 = jac_affine_inv_0_1_BLUE_UP*tmp_qloop_18 + jac_affine_inv_1_1_BLUE_UP*tmp_qloop_21 + jac_affine_inv_2_1_BLUE_UP*tmp_qloop_22; + const real_t tmp_qloop_36 = tmp_qloop_33*tmp_qloop_35; + const real_t tmp_qloop_37 = jac_affine_inv_0_2_BLUE_UP*tmp_qloop_18 + jac_affine_inv_1_2_BLUE_UP*tmp_qloop_21 + jac_affine_inv_2_2_BLUE_UP*tmp_qloop_22; + const real_t tmp_qloop_38 = tmp_qloop_33*tmp_qloop_37; + const real_t tmp_qloop_39 = tmp_qloop_32*_data_q_p_0[q]; + const real_t tmp_qloop_40 = tmp_qloop_23*tmp_qloop_39; + const real_t tmp_qloop_41 = tmp_qloop_32*_data_q_p_2[q]; + const real_t tmp_qloop_42 = tmp_qloop_23*tmp_qloop_41; + const real_t tmp_qloop_43 = tmp_qloop_42*tmp_qloop_9; + const real_t tmp_qloop_44 = tmp_qloop_32*_data_q_p_1[q]; + const real_t tmp_qloop_45 = tmp_qloop_23*tmp_qloop_44; + const real_t tmp_qloop_46 = tmp_qloop_35*tmp_qloop_39; + const real_t tmp_qloop_47 = tmp_qloop_35*tmp_qloop_41; + const real_t tmp_qloop_48 = tmp_qloop_47*tmp_qloop_9; + const real_t tmp_qloop_49 = tmp_qloop_35*tmp_qloop_44; + const real_t tmp_qloop_50 = tmp_qloop_37*tmp_qloop_39; + const real_t tmp_qloop_51 = tmp_qloop_37*tmp_qloop_41; + const real_t tmp_qloop_52 = tmp_qloop_51*tmp_qloop_9; + const real_t tmp_qloop_53 = tmp_qloop_37*tmp_qloop_44; + const real_t tmp_qloop_54 = tmp_qloop_23*tmp_qloop_32; + const real_t tmp_qloop_55 = tmp_qloop_2*tmp_qloop_8; + const real_t tmp_qloop_56 = tmp_qloop_32*tmp_qloop_55; + const real_t tmp_qloop_57 = tmp_qloop_4*tmp_qloop_54; + const real_t tmp_qloop_58 = tmp_qloop_32*tmp_qloop_4; + const real_t tmp_qloop_59 = tmp_qloop_35*tmp_qloop_58; + const real_t tmp_qloop_60 = tmp_qloop_37*tmp_qloop_58; + const real_t q_tmp_0_0 = tmp_qloop_12*tmp_qloop_34; + const real_t q_tmp_0_1 = tmp_qloop_24*tmp_qloop_34; + const real_t q_tmp_0_2 = tmp_qloop_25*tmp_qloop_34; + const real_t q_tmp_0_3 = tmp_qloop_26*tmp_qloop_34; + const real_t q_tmp_0_4 = tmp_qloop_34*tmp_qloop_7; + const real_t q_tmp_0_5 = tmp_qloop_10*tmp_qloop_34; + const real_t q_tmp_0_6 = tmp_qloop_34*tmp_qloop_9; + const real_t q_tmp_0_7 = tmp_qloop_27*tmp_qloop_34; + const real_t q_tmp_0_8 = tmp_qloop_29*tmp_qloop_34; + const real_t q_tmp_0_9 = tmp_qloop_31*tmp_qloop_34; + const real_t q_tmp_0_10 = tmp_qloop_12*tmp_qloop_36; + const real_t q_tmp_0_11 = tmp_qloop_24*tmp_qloop_36; + const real_t q_tmp_0_12 = tmp_qloop_25*tmp_qloop_36; + const real_t q_tmp_0_13 = tmp_qloop_26*tmp_qloop_36; + const real_t q_tmp_0_14 = tmp_qloop_36*tmp_qloop_7; + const real_t q_tmp_0_15 = tmp_qloop_10*tmp_qloop_36; + const real_t q_tmp_0_16 = tmp_qloop_36*tmp_qloop_9; + const real_t q_tmp_0_17 = tmp_qloop_27*tmp_qloop_36; + const real_t q_tmp_0_18 = tmp_qloop_29*tmp_qloop_36; + const real_t q_tmp_0_19 = tmp_qloop_31*tmp_qloop_36; + const real_t q_tmp_0_20 = tmp_qloop_12*tmp_qloop_38; + const real_t q_tmp_0_21 = tmp_qloop_24*tmp_qloop_38; + const real_t q_tmp_0_22 = tmp_qloop_25*tmp_qloop_38; + const real_t q_tmp_0_23 = tmp_qloop_26*tmp_qloop_38; + const real_t q_tmp_0_24 = tmp_qloop_38*tmp_qloop_7; + const real_t q_tmp_0_25 = tmp_qloop_10*tmp_qloop_38; + const real_t q_tmp_0_26 = tmp_qloop_38*tmp_qloop_9; + const real_t q_tmp_0_27 = tmp_qloop_27*tmp_qloop_38; + const real_t q_tmp_0_28 = tmp_qloop_29*tmp_qloop_38; + const real_t q_tmp_0_29 = tmp_qloop_31*tmp_qloop_38; + const real_t q_tmp_1_0 = tmp_qloop_12*tmp_qloop_40; + const real_t q_tmp_1_1 = tmp_qloop_24*tmp_qloop_40; + const real_t q_tmp_1_2 = tmp_qloop_25*tmp_qloop_40; + const real_t q_tmp_1_3 = tmp_qloop_26*tmp_qloop_40; + const real_t q_tmp_1_4 = tmp_qloop_43; + const real_t q_tmp_1_5 = tmp_qloop_30*tmp_qloop_42; + const real_t q_tmp_1_6 = tmp_qloop_30*tmp_qloop_45; + const real_t q_tmp_1_7 = tmp_qloop_27*tmp_qloop_40; + const real_t q_tmp_1_8 = tmp_qloop_29*tmp_qloop_40; + const real_t q_tmp_1_9 = tmp_qloop_31*tmp_qloop_40; + const real_t q_tmp_1_10 = tmp_qloop_12*tmp_qloop_46; + const real_t q_tmp_1_11 = tmp_qloop_24*tmp_qloop_46; + const real_t q_tmp_1_12 = tmp_qloop_25*tmp_qloop_46; + const real_t q_tmp_1_13 = tmp_qloop_26*tmp_qloop_46; + const real_t q_tmp_1_14 = tmp_qloop_48; + const real_t q_tmp_1_15 = tmp_qloop_30*tmp_qloop_47; + const real_t q_tmp_1_16 = tmp_qloop_30*tmp_qloop_49; + const real_t q_tmp_1_17 = tmp_qloop_27*tmp_qloop_46; + const real_t q_tmp_1_18 = tmp_qloop_29*tmp_qloop_46; + const real_t q_tmp_1_19 = tmp_qloop_31*tmp_qloop_46; + const real_t q_tmp_1_20 = tmp_qloop_12*tmp_qloop_50; + const real_t q_tmp_1_21 = tmp_qloop_24*tmp_qloop_50; + const real_t q_tmp_1_22 = tmp_qloop_25*tmp_qloop_50; + const real_t q_tmp_1_23 = tmp_qloop_26*tmp_qloop_50; + const real_t q_tmp_1_24 = tmp_qloop_52; + const real_t q_tmp_1_25 = tmp_qloop_30*tmp_qloop_51; + const real_t q_tmp_1_26 = tmp_qloop_30*tmp_qloop_53; + const real_t q_tmp_1_27 = tmp_qloop_27*tmp_qloop_50; + const real_t q_tmp_1_28 = tmp_qloop_29*tmp_qloop_50; + const real_t q_tmp_1_29 = tmp_qloop_31*tmp_qloop_50; + const real_t q_tmp_2_0 = tmp_qloop_12*tmp_qloop_45; + const real_t q_tmp_2_1 = tmp_qloop_24*tmp_qloop_45; + const real_t q_tmp_2_2 = tmp_qloop_25*tmp_qloop_45; + const real_t q_tmp_2_3 = tmp_qloop_26*tmp_qloop_45; + const real_t q_tmp_2_4 = tmp_qloop_28*tmp_qloop_42; + const real_t q_tmp_2_5 = tmp_qloop_43; + const real_t q_tmp_2_6 = tmp_qloop_54*tmp_qloop_55; + const real_t q_tmp_2_7 = tmp_qloop_27*tmp_qloop_45; + const real_t q_tmp_2_8 = tmp_qloop_29*tmp_qloop_45; + const real_t q_tmp_2_9 = tmp_qloop_31*tmp_qloop_45; + const real_t q_tmp_2_10 = tmp_qloop_12*tmp_qloop_49; + const real_t q_tmp_2_11 = tmp_qloop_24*tmp_qloop_49; + const real_t q_tmp_2_12 = tmp_qloop_25*tmp_qloop_49; + const real_t q_tmp_2_13 = tmp_qloop_26*tmp_qloop_49; + const real_t q_tmp_2_14 = tmp_qloop_28*tmp_qloop_47; + const real_t q_tmp_2_15 = tmp_qloop_48; + const real_t q_tmp_2_16 = tmp_qloop_35*tmp_qloop_56; + const real_t q_tmp_2_17 = tmp_qloop_27*tmp_qloop_49; + const real_t q_tmp_2_18 = tmp_qloop_29*tmp_qloop_49; + const real_t q_tmp_2_19 = tmp_qloop_31*tmp_qloop_49; + const real_t q_tmp_2_20 = tmp_qloop_12*tmp_qloop_53; + const real_t q_tmp_2_21 = tmp_qloop_24*tmp_qloop_53; + const real_t q_tmp_2_22 = tmp_qloop_25*tmp_qloop_53; + const real_t q_tmp_2_23 = tmp_qloop_26*tmp_qloop_53; + const real_t q_tmp_2_24 = tmp_qloop_28*tmp_qloop_51; + const real_t q_tmp_2_25 = tmp_qloop_52; + const real_t q_tmp_2_26 = tmp_qloop_37*tmp_qloop_56; + const real_t q_tmp_2_27 = tmp_qloop_27*tmp_qloop_53; + const real_t q_tmp_2_28 = tmp_qloop_29*tmp_qloop_53; + const real_t q_tmp_2_29 = tmp_qloop_31*tmp_qloop_53; + const real_t q_tmp_3_0 = tmp_qloop_12*tmp_qloop_42; + const real_t q_tmp_3_1 = tmp_qloop_24*tmp_qloop_42; + const real_t q_tmp_3_2 = tmp_qloop_25*tmp_qloop_42; + const real_t q_tmp_3_3 = tmp_qloop_26*tmp_qloop_42; + const real_t q_tmp_3_4 = tmp_qloop_57*tmp_qloop_6; + const real_t q_tmp_3_5 = tmp_qloop_57*tmp_qloop_8; + const real_t q_tmp_3_6 = tmp_qloop_43; + const real_t q_tmp_3_7 = tmp_qloop_27*tmp_qloop_42; + const real_t q_tmp_3_8 = tmp_qloop_29*tmp_qloop_42; + const real_t q_tmp_3_9 = tmp_qloop_31*tmp_qloop_42; + const real_t q_tmp_3_10 = tmp_qloop_12*tmp_qloop_47; + const real_t q_tmp_3_11 = tmp_qloop_24*tmp_qloop_47; + const real_t q_tmp_3_12 = tmp_qloop_25*tmp_qloop_47; + const real_t q_tmp_3_13 = tmp_qloop_26*tmp_qloop_47; + const real_t q_tmp_3_14 = tmp_qloop_59*tmp_qloop_6; + const real_t q_tmp_3_15 = tmp_qloop_59*tmp_qloop_8; + const real_t q_tmp_3_16 = tmp_qloop_48; + const real_t q_tmp_3_17 = tmp_qloop_27*tmp_qloop_47; + const real_t q_tmp_3_18 = tmp_qloop_29*tmp_qloop_47; + const real_t q_tmp_3_19 = tmp_qloop_31*tmp_qloop_47; + const real_t q_tmp_3_20 = tmp_qloop_12*tmp_qloop_51; + const real_t q_tmp_3_21 = tmp_qloop_24*tmp_qloop_51; + const real_t q_tmp_3_22 = tmp_qloop_25*tmp_qloop_51; + const real_t q_tmp_3_23 = tmp_qloop_26*tmp_qloop_51; + const real_t q_tmp_3_24 = tmp_qloop_6*tmp_qloop_60; + const real_t q_tmp_3_25 = tmp_qloop_60*tmp_qloop_8; + const real_t q_tmp_3_26 = tmp_qloop_52; + const real_t q_tmp_3_27 = tmp_qloop_27*tmp_qloop_51; + const real_t q_tmp_3_28 = tmp_qloop_29*tmp_qloop_51; + const real_t q_tmp_3_29 = tmp_qloop_31*tmp_qloop_51; + q_acc_0_0 = q_acc_0_0 + q_tmp_0_0; + q_acc_0_1 = q_acc_0_1 + q_tmp_0_1; + q_acc_0_2 = q_acc_0_2 + q_tmp_0_2; + q_acc_0_3 = q_acc_0_3 + q_tmp_0_3; + q_acc_0_4 = q_acc_0_4 + q_tmp_0_4; + q_acc_0_5 = q_acc_0_5 + q_tmp_0_5; + q_acc_0_6 = q_acc_0_6 + q_tmp_0_6; + q_acc_0_7 = q_acc_0_7 + q_tmp_0_7; + q_acc_0_8 = q_acc_0_8 + q_tmp_0_8; + q_acc_0_9 = q_acc_0_9 + q_tmp_0_9; + q_acc_0_10 = q_acc_0_10 + q_tmp_0_10; + q_acc_0_11 = q_acc_0_11 + q_tmp_0_11; + q_acc_0_12 = q_acc_0_12 + q_tmp_0_12; + q_acc_0_13 = q_acc_0_13 + q_tmp_0_13; + q_acc_0_14 = q_acc_0_14 + q_tmp_0_14; + q_acc_0_15 = q_acc_0_15 + q_tmp_0_15; + q_acc_0_16 = q_acc_0_16 + q_tmp_0_16; + q_acc_0_17 = q_acc_0_17 + q_tmp_0_17; + q_acc_0_18 = q_acc_0_18 + q_tmp_0_18; + q_acc_0_19 = q_acc_0_19 + q_tmp_0_19; + q_acc_0_20 = q_acc_0_20 + q_tmp_0_20; + q_acc_0_21 = q_acc_0_21 + q_tmp_0_21; + q_acc_0_22 = q_acc_0_22 + q_tmp_0_22; + q_acc_0_23 = q_acc_0_23 + q_tmp_0_23; + q_acc_0_24 = q_acc_0_24 + q_tmp_0_24; + q_acc_0_25 = q_acc_0_25 + q_tmp_0_25; + q_acc_0_26 = q_acc_0_26 + q_tmp_0_26; + q_acc_0_27 = q_acc_0_27 + q_tmp_0_27; + q_acc_0_28 = q_acc_0_28 + q_tmp_0_28; + q_acc_0_29 = q_acc_0_29 + q_tmp_0_29; + q_acc_1_0 = q_acc_1_0 + q_tmp_1_0; + q_acc_1_1 = q_acc_1_1 + q_tmp_1_1; + q_acc_1_2 = q_acc_1_2 + q_tmp_1_2; + q_acc_1_3 = q_acc_1_3 + q_tmp_1_3; + q_acc_1_4 = q_acc_1_4 + q_tmp_1_4; + q_acc_1_5 = q_acc_1_5 + q_tmp_1_5; + q_acc_1_6 = q_acc_1_6 + q_tmp_1_6; + q_acc_1_7 = q_acc_1_7 + q_tmp_1_7; + q_acc_1_8 = q_acc_1_8 + q_tmp_1_8; + q_acc_1_9 = q_acc_1_9 + q_tmp_1_9; + q_acc_1_10 = q_acc_1_10 + q_tmp_1_10; + q_acc_1_11 = q_acc_1_11 + q_tmp_1_11; + q_acc_1_12 = q_acc_1_12 + q_tmp_1_12; + q_acc_1_13 = q_acc_1_13 + q_tmp_1_13; + q_acc_1_14 = q_acc_1_14 + q_tmp_1_14; + q_acc_1_15 = q_acc_1_15 + q_tmp_1_15; + q_acc_1_16 = q_acc_1_16 + q_tmp_1_16; + q_acc_1_17 = q_acc_1_17 + q_tmp_1_17; + q_acc_1_18 = q_acc_1_18 + q_tmp_1_18; + q_acc_1_19 = q_acc_1_19 + q_tmp_1_19; + q_acc_1_20 = q_acc_1_20 + q_tmp_1_20; + q_acc_1_21 = q_acc_1_21 + q_tmp_1_21; + q_acc_1_22 = q_acc_1_22 + q_tmp_1_22; + q_acc_1_23 = q_acc_1_23 + q_tmp_1_23; + q_acc_1_24 = q_acc_1_24 + q_tmp_1_24; + q_acc_1_25 = q_acc_1_25 + q_tmp_1_25; + q_acc_1_26 = q_acc_1_26 + q_tmp_1_26; + q_acc_1_27 = q_acc_1_27 + q_tmp_1_27; + q_acc_1_28 = q_acc_1_28 + q_tmp_1_28; + q_acc_1_29 = q_acc_1_29 + q_tmp_1_29; + q_acc_2_0 = q_acc_2_0 + q_tmp_2_0; + q_acc_2_1 = q_acc_2_1 + q_tmp_2_1; + q_acc_2_2 = q_acc_2_2 + q_tmp_2_2; + q_acc_2_3 = q_acc_2_3 + q_tmp_2_3; + q_acc_2_4 = q_acc_2_4 + q_tmp_2_4; + q_acc_2_5 = q_acc_2_5 + q_tmp_2_5; + q_acc_2_6 = q_acc_2_6 + q_tmp_2_6; + q_acc_2_7 = q_acc_2_7 + q_tmp_2_7; + q_acc_2_8 = q_acc_2_8 + q_tmp_2_8; + q_acc_2_9 = q_acc_2_9 + q_tmp_2_9; + q_acc_2_10 = q_acc_2_10 + q_tmp_2_10; + q_acc_2_11 = q_acc_2_11 + q_tmp_2_11; + q_acc_2_12 = q_acc_2_12 + q_tmp_2_12; + q_acc_2_13 = q_acc_2_13 + q_tmp_2_13; + q_acc_2_14 = q_acc_2_14 + q_tmp_2_14; + q_acc_2_15 = q_acc_2_15 + q_tmp_2_15; + q_acc_2_16 = q_acc_2_16 + q_tmp_2_16; + q_acc_2_17 = q_acc_2_17 + q_tmp_2_17; + q_acc_2_18 = q_acc_2_18 + q_tmp_2_18; + q_acc_2_19 = q_acc_2_19 + q_tmp_2_19; + q_acc_2_20 = q_acc_2_20 + q_tmp_2_20; + q_acc_2_21 = q_acc_2_21 + q_tmp_2_21; + q_acc_2_22 = q_acc_2_22 + q_tmp_2_22; + q_acc_2_23 = q_acc_2_23 + q_tmp_2_23; + q_acc_2_24 = q_acc_2_24 + q_tmp_2_24; + q_acc_2_25 = q_acc_2_25 + q_tmp_2_25; + q_acc_2_26 = q_acc_2_26 + q_tmp_2_26; + q_acc_2_27 = q_acc_2_27 + q_tmp_2_27; + q_acc_2_28 = q_acc_2_28 + q_tmp_2_28; + q_acc_2_29 = q_acc_2_29 + q_tmp_2_29; + q_acc_3_0 = q_acc_3_0 + q_tmp_3_0; + q_acc_3_1 = q_acc_3_1 + q_tmp_3_1; + q_acc_3_2 = q_acc_3_2 + q_tmp_3_2; + q_acc_3_3 = q_acc_3_3 + q_tmp_3_3; + q_acc_3_4 = q_acc_3_4 + q_tmp_3_4; + q_acc_3_5 = q_acc_3_5 + q_tmp_3_5; + q_acc_3_6 = q_acc_3_6 + q_tmp_3_6; + q_acc_3_7 = q_acc_3_7 + q_tmp_3_7; + q_acc_3_8 = q_acc_3_8 + q_tmp_3_8; + q_acc_3_9 = q_acc_3_9 + q_tmp_3_9; + q_acc_3_10 = q_acc_3_10 + q_tmp_3_10; + q_acc_3_11 = q_acc_3_11 + q_tmp_3_11; + q_acc_3_12 = q_acc_3_12 + q_tmp_3_12; + q_acc_3_13 = q_acc_3_13 + q_tmp_3_13; + q_acc_3_14 = q_acc_3_14 + q_tmp_3_14; + q_acc_3_15 = q_acc_3_15 + q_tmp_3_15; + q_acc_3_16 = q_acc_3_16 + q_tmp_3_16; + q_acc_3_17 = q_acc_3_17 + q_tmp_3_17; + q_acc_3_18 = q_acc_3_18 + q_tmp_3_18; + q_acc_3_19 = q_acc_3_19 + q_tmp_3_19; + q_acc_3_20 = q_acc_3_20 + q_tmp_3_20; + q_acc_3_21 = q_acc_3_21 + q_tmp_3_21; + q_acc_3_22 = q_acc_3_22 + q_tmp_3_22; + q_acc_3_23 = q_acc_3_23 + q_tmp_3_23; + q_acc_3_24 = q_acc_3_24 + q_tmp_3_24; + q_acc_3_25 = q_acc_3_25 + q_tmp_3_25; + q_acc_3_26 = q_acc_3_26 + q_tmp_3_26; + q_acc_3_27 = q_acc_3_27 + q_tmp_3_27; + q_acc_3_28 = q_acc_3_28 + q_tmp_3_28; + q_acc_3_29 = q_acc_3_29 + q_tmp_3_29; + } + const real_t elMatVec_0 = q_acc_0_0*src_dof_0 + q_acc_0_1*src_dof_1 + q_acc_0_10*src_dof_10 + q_acc_0_11*src_dof_11 + q_acc_0_12*src_dof_12 + q_acc_0_13*src_dof_13 + q_acc_0_14*src_dof_14 + q_acc_0_15*src_dof_15 + q_acc_0_16*src_dof_16 + q_acc_0_17*src_dof_17 + q_acc_0_18*src_dof_18 + q_acc_0_19*src_dof_19 + q_acc_0_2*src_dof_2 + q_acc_0_20*src_dof_20 + q_acc_0_21*src_dof_21 + q_acc_0_22*src_dof_22 + q_acc_0_23*src_dof_23 + q_acc_0_24*src_dof_24 + q_acc_0_25*src_dof_25 + q_acc_0_26*src_dof_26 + q_acc_0_27*src_dof_27 + q_acc_0_28*src_dof_28 + q_acc_0_29*src_dof_29 + q_acc_0_3*src_dof_3 + q_acc_0_4*src_dof_4 + q_acc_0_5*src_dof_5 + q_acc_0_6*src_dof_6 + q_acc_0_7*src_dof_7 + q_acc_0_8*src_dof_8 + q_acc_0_9*src_dof_9; + const real_t elMatVec_1 = q_acc_1_0*src_dof_0 + q_acc_1_1*src_dof_1 + q_acc_1_10*src_dof_10 + q_acc_1_11*src_dof_11 + q_acc_1_12*src_dof_12 + q_acc_1_13*src_dof_13 + q_acc_1_14*src_dof_14 + q_acc_1_15*src_dof_15 + q_acc_1_16*src_dof_16 + q_acc_1_17*src_dof_17 + q_acc_1_18*src_dof_18 + q_acc_1_19*src_dof_19 + q_acc_1_2*src_dof_2 + q_acc_1_20*src_dof_20 + q_acc_1_21*src_dof_21 + q_acc_1_22*src_dof_22 + q_acc_1_23*src_dof_23 + q_acc_1_24*src_dof_24 + q_acc_1_25*src_dof_25 + q_acc_1_26*src_dof_26 + q_acc_1_27*src_dof_27 + q_acc_1_28*src_dof_28 + q_acc_1_29*src_dof_29 + q_acc_1_3*src_dof_3 + q_acc_1_4*src_dof_4 + q_acc_1_5*src_dof_5 + q_acc_1_6*src_dof_6 + q_acc_1_7*src_dof_7 + q_acc_1_8*src_dof_8 + q_acc_1_9*src_dof_9; + const real_t elMatVec_2 = q_acc_2_0*src_dof_0 + q_acc_2_1*src_dof_1 + q_acc_2_10*src_dof_10 + q_acc_2_11*src_dof_11 + q_acc_2_12*src_dof_12 + q_acc_2_13*src_dof_13 + q_acc_2_14*src_dof_14 + q_acc_2_15*src_dof_15 + q_acc_2_16*src_dof_16 + q_acc_2_17*src_dof_17 + q_acc_2_18*src_dof_18 + q_acc_2_19*src_dof_19 + q_acc_2_2*src_dof_2 + q_acc_2_20*src_dof_20 + q_acc_2_21*src_dof_21 + q_acc_2_22*src_dof_22 + q_acc_2_23*src_dof_23 + q_acc_2_24*src_dof_24 + q_acc_2_25*src_dof_25 + q_acc_2_26*src_dof_26 + q_acc_2_27*src_dof_27 + q_acc_2_28*src_dof_28 + q_acc_2_29*src_dof_29 + q_acc_2_3*src_dof_3 + q_acc_2_4*src_dof_4 + q_acc_2_5*src_dof_5 + q_acc_2_6*src_dof_6 + q_acc_2_7*src_dof_7 + q_acc_2_8*src_dof_8 + q_acc_2_9*src_dof_9; + const real_t elMatVec_3 = q_acc_3_0*src_dof_0 + q_acc_3_1*src_dof_1 + q_acc_3_10*src_dof_10 + q_acc_3_11*src_dof_11 + q_acc_3_12*src_dof_12 + q_acc_3_13*src_dof_13 + q_acc_3_14*src_dof_14 + q_acc_3_15*src_dof_15 + q_acc_3_16*src_dof_16 + q_acc_3_17*src_dof_17 + q_acc_3_18*src_dof_18 + q_acc_3_19*src_dof_19 + q_acc_3_2*src_dof_2 + q_acc_3_20*src_dof_20 + q_acc_3_21*src_dof_21 + q_acc_3_22*src_dof_22 + q_acc_3_23*src_dof_23 + q_acc_3_24*src_dof_24 + q_acc_3_25*src_dof_25 + q_acc_3_26*src_dof_26 + q_acc_3_27*src_dof_27 + q_acc_3_28*src_dof_28 + q_acc_3_29*src_dof_29 + q_acc_3_3*src_dof_3 + q_acc_3_4*src_dof_4 + q_acc_3_5*src_dof_5 + q_acc_3_6*src_dof_6 + q_acc_3_7*src_dof_7 + q_acc_3_8*src_dof_8 + q_acc_3_9*src_dof_9; + _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1] = elMatVec_0 + _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]; + _data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))] = elMatVec_1 + _data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]; + _data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1] = elMatVec_2 + _data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]; + _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1] = elMatVec_3 + _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]; + } + } + } + const real_t tmp_coords_jac_0_BLUE_DOWN = 1.0 / (micro_edges_per_macro_edge_float)*1.0; + const real_t tmp_coords_jac_1_BLUE_DOWN = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0); + const real_t tmp_coords_jac_2_BLUE_DOWN = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1); + const real_t tmp_coords_jac_3_BLUE_DOWN = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2); + const real_t tmp_coords_jac_4_BLUE_DOWN = tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0); + const real_t tmp_coords_jac_5_BLUE_DOWN = macro_vertex_coord_id_0comp0 + tmp_coords_jac_4_BLUE_DOWN; + const real_t tmp_coords_jac_6_BLUE_DOWN = tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1); + const real_t tmp_coords_jac_7_BLUE_DOWN = macro_vertex_coord_id_0comp1 + tmp_coords_jac_6_BLUE_DOWN; + const real_t tmp_coords_jac_8_BLUE_DOWN = tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2); + const real_t tmp_coords_jac_9_BLUE_DOWN = macro_vertex_coord_id_0comp2 + tmp_coords_jac_8_BLUE_DOWN; + const real_t p_affine_const_0_0_BLUE_DOWN = tmp_coords_jac_1_BLUE_DOWN; + const real_t p_affine_const_0_1_BLUE_DOWN = tmp_coords_jac_2_BLUE_DOWN; + const real_t p_affine_const_0_2_BLUE_DOWN = tmp_coords_jac_3_BLUE_DOWN; + const real_t p_affine_const_1_0_BLUE_DOWN = tmp_coords_jac_5_BLUE_DOWN; + const real_t p_affine_const_1_1_BLUE_DOWN = tmp_coords_jac_7_BLUE_DOWN; + const real_t p_affine_const_1_2_BLUE_DOWN = tmp_coords_jac_9_BLUE_DOWN; + const real_t p_affine_const_2_0_BLUE_DOWN = tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0) + tmp_coords_jac_5_BLUE_DOWN; + const real_t p_affine_const_2_1_BLUE_DOWN = tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1) + tmp_coords_jac_7_BLUE_DOWN; + const real_t p_affine_const_2_2_BLUE_DOWN = tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2) + tmp_coords_jac_9_BLUE_DOWN; + const real_t p_affine_const_3_0_BLUE_DOWN = tmp_coords_jac_1_BLUE_DOWN + tmp_coords_jac_4_BLUE_DOWN; + const real_t p_affine_const_3_1_BLUE_DOWN = tmp_coords_jac_2_BLUE_DOWN + tmp_coords_jac_6_BLUE_DOWN; + const real_t p_affine_const_3_2_BLUE_DOWN = tmp_coords_jac_3_BLUE_DOWN + tmp_coords_jac_8_BLUE_DOWN; + const real_t jac_affine_0_0_BLUE_DOWN = -p_affine_const_0_0_BLUE_DOWN + p_affine_const_1_0_BLUE_DOWN; + const real_t jac_affine_0_1_BLUE_DOWN = -p_affine_const_0_0_BLUE_DOWN + p_affine_const_2_0_BLUE_DOWN; + const real_t jac_affine_0_2_BLUE_DOWN = -p_affine_const_0_0_BLUE_DOWN + p_affine_const_3_0_BLUE_DOWN; + const real_t jac_affine_1_0_BLUE_DOWN = -p_affine_const_0_1_BLUE_DOWN + p_affine_const_1_1_BLUE_DOWN; + const real_t jac_affine_1_1_BLUE_DOWN = -p_affine_const_0_1_BLUE_DOWN + p_affine_const_2_1_BLUE_DOWN; + const real_t tmp_coords_jac_14_BLUE_DOWN = jac_affine_0_2_BLUE_DOWN*jac_affine_1_1_BLUE_DOWN; + const real_t jac_affine_1_2_BLUE_DOWN = -p_affine_const_0_1_BLUE_DOWN + p_affine_const_3_1_BLUE_DOWN; + const real_t tmp_coords_jac_12_BLUE_DOWN = jac_affine_0_1_BLUE_DOWN*jac_affine_1_2_BLUE_DOWN; + const real_t jac_affine_2_0_BLUE_DOWN = -p_affine_const_0_2_BLUE_DOWN + p_affine_const_1_2_BLUE_DOWN; + const real_t jac_affine_2_1_BLUE_DOWN = -p_affine_const_0_2_BLUE_DOWN + p_affine_const_2_2_BLUE_DOWN; + const real_t tmp_coords_jac_11_BLUE_DOWN = jac_affine_1_2_BLUE_DOWN*jac_affine_2_1_BLUE_DOWN; + const real_t jac_affine_2_2_BLUE_DOWN = -p_affine_const_0_2_BLUE_DOWN + p_affine_const_3_2_BLUE_DOWN; + const real_t tmp_coords_jac_10_BLUE_DOWN = jac_affine_1_1_BLUE_DOWN*jac_affine_2_2_BLUE_DOWN; + const real_t tmp_coords_jac_13_BLUE_DOWN = jac_affine_0_1_BLUE_DOWN*jac_affine_2_2_BLUE_DOWN; + const real_t tmp_coords_jac_15_BLUE_DOWN = jac_affine_0_0_BLUE_DOWN*tmp_coords_jac_10_BLUE_DOWN - jac_affine_0_0_BLUE_DOWN*tmp_coords_jac_11_BLUE_DOWN + jac_affine_0_2_BLUE_DOWN*jac_affine_1_0_BLUE_DOWN*jac_affine_2_1_BLUE_DOWN - jac_affine_1_0_BLUE_DOWN*tmp_coords_jac_13_BLUE_DOWN + jac_affine_2_0_BLUE_DOWN*tmp_coords_jac_12_BLUE_DOWN - jac_affine_2_0_BLUE_DOWN*tmp_coords_jac_14_BLUE_DOWN; + const real_t tmp_coords_jac_16_BLUE_DOWN = 1.0 / (tmp_coords_jac_15_BLUE_DOWN); + const real_t jac_affine_inv_0_0_BLUE_DOWN = tmp_coords_jac_16_BLUE_DOWN*(tmp_coords_jac_10_BLUE_DOWN - tmp_coords_jac_11_BLUE_DOWN); + const real_t jac_affine_inv_0_1_BLUE_DOWN = tmp_coords_jac_16_BLUE_DOWN*(jac_affine_0_2_BLUE_DOWN*jac_affine_2_1_BLUE_DOWN - tmp_coords_jac_13_BLUE_DOWN); + const real_t jac_affine_inv_0_2_BLUE_DOWN = tmp_coords_jac_16_BLUE_DOWN*(tmp_coords_jac_12_BLUE_DOWN - tmp_coords_jac_14_BLUE_DOWN); + const real_t jac_affine_inv_1_0_BLUE_DOWN = tmp_coords_jac_16_BLUE_DOWN*(-jac_affine_1_0_BLUE_DOWN*jac_affine_2_2_BLUE_DOWN + jac_affine_1_2_BLUE_DOWN*jac_affine_2_0_BLUE_DOWN); + const real_t jac_affine_inv_1_1_BLUE_DOWN = tmp_coords_jac_16_BLUE_DOWN*(jac_affine_0_0_BLUE_DOWN*jac_affine_2_2_BLUE_DOWN - jac_affine_0_2_BLUE_DOWN*jac_affine_2_0_BLUE_DOWN); + const real_t jac_affine_inv_1_2_BLUE_DOWN = tmp_coords_jac_16_BLUE_DOWN*(-jac_affine_0_0_BLUE_DOWN*jac_affine_1_2_BLUE_DOWN + jac_affine_0_2_BLUE_DOWN*jac_affine_1_0_BLUE_DOWN); + const real_t jac_affine_inv_2_0_BLUE_DOWN = tmp_coords_jac_16_BLUE_DOWN*(jac_affine_1_0_BLUE_DOWN*jac_affine_2_1_BLUE_DOWN - jac_affine_1_1_BLUE_DOWN*jac_affine_2_0_BLUE_DOWN); + const real_t jac_affine_inv_2_1_BLUE_DOWN = tmp_coords_jac_16_BLUE_DOWN*(-jac_affine_0_0_BLUE_DOWN*jac_affine_2_1_BLUE_DOWN + jac_affine_0_1_BLUE_DOWN*jac_affine_2_0_BLUE_DOWN); + const real_t jac_affine_inv_2_2_BLUE_DOWN = tmp_coords_jac_16_BLUE_DOWN*(jac_affine_0_0_BLUE_DOWN*jac_affine_1_1_BLUE_DOWN - jac_affine_0_1_BLUE_DOWN*jac_affine_1_0_BLUE_DOWN); + const real_t abs_det_jac_affine_BLUE_DOWN = abs(tmp_coords_jac_15_BLUE_DOWN); + { + /* CellType.BLUE_DOWN */ + for (int64_t ctr_2 = 0; ctr_2 < micro_edges_per_macro_edge; ctr_2 += 1) + for (int64_t ctr_1 = 0; ctr_1 < -ctr_2 + micro_edges_per_macro_edge; ctr_1 += 1) + { + for (int64_t ctr_0 = 0; ctr_0 < (int64_t)((-ctr_1 - ctr_2 + micro_edges_per_macro_edge - 1) / (4)) * (4); ctr_0 += 4) + { + + const int64_t phantom_ctr_0 = ctr_0; + real_t _data_float_loop_ctr_array_dim_0[4]; + _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0; + _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1; + _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2; + _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3; + real_t _data_float_loop_ctr_array_dim_1[4]; + _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1; + real_t _data_float_loop_ctr_array_dim_2[4]; + _data_float_loop_ctr_array_dim_2[0] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[1] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[2] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[3] = (real_t) ctr_2; + + const __m256d p_affine_0_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)); + const __m256d p_affine_0_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)); + const __m256d p_affine_0_2 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)); + const __m256d p_affine_1_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)); + const __m256d p_affine_1_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)); + const __m256d p_affine_1_2 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)); + const __m256d p_affine_2_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)); + const __m256d p_affine_2_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)); + const __m256d p_affine_2_2 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)); + const __m256d p_affine_3_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)); + const __m256d p_affine_3_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)); + const __m256d p_affine_3_2 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)); + const __m256d src_dof_0 = _mm256_loadu_pd(& _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]); + const __m256d src_dof_1 = _mm256_loadu_pd(& _data_src_vertex_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]); + const __m256d src_dof_2 = _mm256_loadu_pd(& _data_src_vertex_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]); + const __m256d src_dof_3 = _mm256_loadu_pd(& _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]); + const __m256d src_dof_4 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]); + const __m256d src_dof_5 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]); + const __m256d src_dof_6 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]); + const __m256d src_dof_7 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d src_dof_8 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge - 1)*(micro_edges_per_macro_edge + 1)) / (6)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]); + const __m256d src_dof_9 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d src_dof_10 = _mm256_loadu_pd(& _data_src_vertex_1[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]); + const __m256d src_dof_11 = _mm256_loadu_pd(& _data_src_vertex_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]); + const __m256d src_dof_12 = _mm256_loadu_pd(& _data_src_vertex_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]); + const __m256d src_dof_13 = _mm256_loadu_pd(& _data_src_vertex_1[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]); + const __m256d src_dof_14 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]); + const __m256d src_dof_15 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]); + const __m256d src_dof_16 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]); + const __m256d src_dof_17 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d src_dof_18 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge - 1)*(micro_edges_per_macro_edge + 1)) / (6)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]); + const __m256d src_dof_19 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d src_dof_20 = _mm256_loadu_pd(& _data_src_vertex_2[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]); + const __m256d src_dof_21 = _mm256_loadu_pd(& _data_src_vertex_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]); + const __m256d src_dof_22 = _mm256_loadu_pd(& _data_src_vertex_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]); + const __m256d src_dof_23 = _mm256_loadu_pd(& _data_src_vertex_2[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]); + const __m256d src_dof_24 = _mm256_loadu_pd(& _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]); + const __m256d src_dof_25 = _mm256_loadu_pd(& _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]); + const __m256d src_dof_26 = _mm256_loadu_pd(& _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]); + const __m256d src_dof_27 = _mm256_loadu_pd(& _data_src_edge_2[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d src_dof_28 = _mm256_loadu_pd(& _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge - 1)*(micro_edges_per_macro_edge + 1)) / (6)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]); + const __m256d src_dof_29 = _mm256_loadu_pd(& _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d rho_dof_0 = _mm256_loadu_pd(& _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]); + const __m256d rho_dof_1 = _mm256_loadu_pd(& _data_rhoVertex[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]); + const __m256d rho_dof_2 = _mm256_loadu_pd(& _data_rhoVertex[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]); + const __m256d rho_dof_3 = _mm256_loadu_pd(& _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]); + const __m256d rho_dof_4 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]); + const __m256d rho_dof_5 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]); + const __m256d rho_dof_6 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]); + const __m256d rho_dof_7 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d rho_dof_8 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge - 1)*(micro_edges_per_macro_edge + 1)) / (6)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]); + const __m256d rho_dof_9 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + __m256d q_acc_0_0 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_1 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_2 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_3 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_4 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_5 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_6 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_7 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_8 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_9 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_10 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_11 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_12 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_13 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_14 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_15 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_16 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_17 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_18 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_19 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_20 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_21 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_22 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_23 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_24 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_25 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_26 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_27 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_28 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_29 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_0 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_1 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_2 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_3 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_4 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_5 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_6 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_7 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_8 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_9 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_10 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_11 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_12 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_13 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_14 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_15 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_16 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_17 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_18 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_19 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_20 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_21 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_22 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_23 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_24 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_25 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_26 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_27 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_28 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_29 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_0 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_1 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_2 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_3 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_4 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_5 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_6 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_7 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_8 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_9 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_10 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_11 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_12 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_13 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_14 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_15 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_16 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_17 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_18 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_19 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_20 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_21 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_22 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_23 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_24 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_25 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_26 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_27 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_28 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_29 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_0 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_1 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_2 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_3 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_4 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_5 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_6 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_7 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_8 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_9 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_10 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_11 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_12 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_13 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_14 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_15 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_16 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_17 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_18 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_19 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_20 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_21 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_22 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_23 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_24 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_25 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_26 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_27 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_28 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_29 = _mm256_set_pd(0.0,0.0,0.0,0.0); + for (int64_t q = 0; q < 5; q += 1) + { + const __m256d tmp_qloop_0 = _mm256_mul_pd(_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])); + const __m256d tmp_qloop_1 = _mm256_mul_pd(tmp_qloop_0,_mm256_set_pd(2.0,2.0,2.0,2.0)); + const __m256d tmp_qloop_2 = _mm256_mul_pd(_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])); + const __m256d tmp_qloop_3 = _mm256_mul_pd(tmp_qloop_2,_mm256_set_pd(2.0,2.0,2.0,2.0)); + const __m256d tmp_qloop_4 = _mm256_mul_pd(_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q])); + const __m256d tmp_qloop_5 = _mm256_mul_pd(tmp_qloop_4,_mm256_set_pd(2.0,2.0,2.0,2.0)); + const __m256d tmp_qloop_6 = _mm256_mul_pd(_mm256_set_pd(4.0,4.0,4.0,4.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])); + const __m256d tmp_qloop_7 = _mm256_mul_pd(tmp_qloop_6,_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q])); + const __m256d tmp_qloop_8 = _mm256_mul_pd(_mm256_set_pd(4.0,4.0,4.0,4.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])); + const __m256d tmp_qloop_9 = _mm256_mul_pd(tmp_qloop_8,_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])); + const __m256d tmp_qloop_10 = _mm256_mul_pd(tmp_qloop_8,_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q])); + const __m256d tmp_qloop_11 = _mm256_add_pd(tmp_qloop_10,tmp_qloop_9); + const __m256d tmp_qloop_12 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),_mm256_mul_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),tmp_qloop_1),tmp_qloop_11),tmp_qloop_3),tmp_qloop_5),tmp_qloop_7); + const __m256d tmp_qloop_13 = _mm256_mul_pd(_mm256_set_pd(4.0,4.0,4.0,4.0),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q])); + const __m256d tmp_qloop_14 = _mm256_add_pd(tmp_qloop_13,tmp_qloop_6); + const __m256d tmp_qloop_15 = _mm256_mul_pd(_mm256_mul_pd(rho_dof_8,tmp_qloop_6),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)); + const __m256d tmp_qloop_16 = _mm256_mul_pd(rho_dof_0,_mm256_add_pd(_mm256_add_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),tmp_qloop_14),tmp_qloop_8)); + const __m256d tmp_qloop_17 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(rho_dof_7,tmp_qloop_13),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),tmp_qloop_16); + const __m256d tmp_qloop_18 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(rho_dof_1,_mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),tmp_qloop_8)),_mm256_mul_pd(rho_dof_9,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_14,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_set_pd(-8.0,-8.0,-8.0,-8.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]))),_mm256_set_pd(4.0,4.0,4.0,4.0)))),_mm256_mul_pd(rho_dof_5,tmp_qloop_13)),_mm256_mul_pd(rho_dof_6,tmp_qloop_6)),tmp_qloop_15),tmp_qloop_17); + const __m256d tmp_qloop_19 = _mm256_add_pd(_mm256_set_pd(-4.0,-4.0,-4.0,-4.0),tmp_qloop_8); + const __m256d tmp_qloop_20 = _mm256_mul_pd(_mm256_mul_pd(rho_dof_9,tmp_qloop_8),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)); + const __m256d tmp_qloop_21 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(rho_dof_2,_mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),tmp_qloop_6)),_mm256_mul_pd(rho_dof_8,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_13,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_19,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(_mm256_set_pd(-8.0,-8.0,-8.0,-8.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))))),_mm256_mul_pd(rho_dof_4,tmp_qloop_13)),_mm256_mul_pd(rho_dof_6,tmp_qloop_8)),tmp_qloop_17),tmp_qloop_20); + const __m256d tmp_qloop_22 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(rho_dof_3,_mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),tmp_qloop_13)),_mm256_mul_pd(rho_dof_7,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_19,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_6,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(_mm256_set_pd(-8.0,-8.0,-8.0,-8.0),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]))))),_mm256_mul_pd(rho_dof_4,tmp_qloop_6)),_mm256_mul_pd(rho_dof_5,tmp_qloop_8)),tmp_qloop_15),tmp_qloop_16),tmp_qloop_20); + const __m256d tmp_qloop_23 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_18,_mm256_set_pd(jac_affine_inv_0_0_BLUE_DOWN,jac_affine_inv_0_0_BLUE_DOWN,jac_affine_inv_0_0_BLUE_DOWN,jac_affine_inv_0_0_BLUE_DOWN)),_mm256_mul_pd(tmp_qloop_21,_mm256_set_pd(jac_affine_inv_1_0_BLUE_DOWN,jac_affine_inv_1_0_BLUE_DOWN,jac_affine_inv_1_0_BLUE_DOWN,jac_affine_inv_1_0_BLUE_DOWN))),_mm256_mul_pd(tmp_qloop_22,_mm256_set_pd(jac_affine_inv_2_0_BLUE_DOWN,jac_affine_inv_2_0_BLUE_DOWN,jac_affine_inv_2_0_BLUE_DOWN,jac_affine_inv_2_0_BLUE_DOWN))); + const __m256d tmp_qloop_24 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),tmp_qloop_1); + const __m256d tmp_qloop_25 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])),tmp_qloop_3); + const __m256d tmp_qloop_26 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q])),tmp_qloop_5); + const __m256d tmp_qloop_27 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_10,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_7,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(tmp_qloop_4,_mm256_set_pd(-4.0,-4.0,-4.0,-4.0))),tmp_qloop_13); + const __m256d tmp_qloop_28 = _mm256_mul_pd(tmp_qloop_2,_mm256_set_pd(4.0,4.0,4.0,4.0)); + const __m256d tmp_qloop_29 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_28,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_7,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(tmp_qloop_9,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_6); + const __m256d tmp_qloop_30 = _mm256_mul_pd(tmp_qloop_0,_mm256_set_pd(4.0,4.0,4.0,4.0)); + const __m256d tmp_qloop_31 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_11,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_30,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_8); + const __m256d tmp_qloop_32 = _mm256_mul_pd(_mm256_mul_pd(_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(rho_dof_0,tmp_qloop_12),_mm256_mul_pd(rho_dof_1,tmp_qloop_24)),_mm256_mul_pd(rho_dof_2,tmp_qloop_25)),_mm256_mul_pd(rho_dof_3,tmp_qloop_26)),_mm256_mul_pd(rho_dof_4,tmp_qloop_7)),_mm256_mul_pd(rho_dof_5,tmp_qloop_10)),_mm256_mul_pd(rho_dof_6,tmp_qloop_9)),_mm256_mul_pd(rho_dof_7,tmp_qloop_27)),_mm256_mul_pd(rho_dof_8,tmp_qloop_29)),_mm256_mul_pd(rho_dof_9,tmp_qloop_31))),_mm256_set_pd(_data_q_w[q],_data_q_w[q],_data_q_w[q],_data_q_w[q])),_mm256_set_pd(abs_det_jac_affine_BLUE_DOWN,abs_det_jac_affine_BLUE_DOWN,abs_det_jac_affine_BLUE_DOWN,abs_det_jac_affine_BLUE_DOWN)); + const __m256d tmp_qloop_33 = _mm256_mul_pd(tmp_qloop_32,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]))),_mm256_set_pd(1.0,1.0,1.0,1.0))); + const __m256d tmp_qloop_34 = _mm256_mul_pd(tmp_qloop_23,tmp_qloop_33); + const __m256d tmp_qloop_35 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_18,_mm256_set_pd(jac_affine_inv_0_1_BLUE_DOWN,jac_affine_inv_0_1_BLUE_DOWN,jac_affine_inv_0_1_BLUE_DOWN,jac_affine_inv_0_1_BLUE_DOWN)),_mm256_mul_pd(tmp_qloop_21,_mm256_set_pd(jac_affine_inv_1_1_BLUE_DOWN,jac_affine_inv_1_1_BLUE_DOWN,jac_affine_inv_1_1_BLUE_DOWN,jac_affine_inv_1_1_BLUE_DOWN))),_mm256_mul_pd(tmp_qloop_22,_mm256_set_pd(jac_affine_inv_2_1_BLUE_DOWN,jac_affine_inv_2_1_BLUE_DOWN,jac_affine_inv_2_1_BLUE_DOWN,jac_affine_inv_2_1_BLUE_DOWN))); + const __m256d tmp_qloop_36 = _mm256_mul_pd(tmp_qloop_33,tmp_qloop_35); + const __m256d tmp_qloop_37 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_18,_mm256_set_pd(jac_affine_inv_0_2_BLUE_DOWN,jac_affine_inv_0_2_BLUE_DOWN,jac_affine_inv_0_2_BLUE_DOWN,jac_affine_inv_0_2_BLUE_DOWN)),_mm256_mul_pd(tmp_qloop_21,_mm256_set_pd(jac_affine_inv_1_2_BLUE_DOWN,jac_affine_inv_1_2_BLUE_DOWN,jac_affine_inv_1_2_BLUE_DOWN,jac_affine_inv_1_2_BLUE_DOWN))),_mm256_mul_pd(tmp_qloop_22,_mm256_set_pd(jac_affine_inv_2_2_BLUE_DOWN,jac_affine_inv_2_2_BLUE_DOWN,jac_affine_inv_2_2_BLUE_DOWN,jac_affine_inv_2_2_BLUE_DOWN))); + const __m256d tmp_qloop_38 = _mm256_mul_pd(tmp_qloop_33,tmp_qloop_37); + const __m256d tmp_qloop_39 = _mm256_mul_pd(tmp_qloop_32,_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])); + const __m256d tmp_qloop_40 = _mm256_mul_pd(tmp_qloop_23,tmp_qloop_39); + const __m256d tmp_qloop_41 = _mm256_mul_pd(tmp_qloop_32,_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q])); + const __m256d tmp_qloop_42 = _mm256_mul_pd(tmp_qloop_23,tmp_qloop_41); + const __m256d tmp_qloop_43 = _mm256_mul_pd(tmp_qloop_42,tmp_qloop_9); + const __m256d tmp_qloop_44 = _mm256_mul_pd(tmp_qloop_32,_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])); + const __m256d tmp_qloop_45 = _mm256_mul_pd(tmp_qloop_23,tmp_qloop_44); + const __m256d tmp_qloop_46 = _mm256_mul_pd(tmp_qloop_35,tmp_qloop_39); + const __m256d tmp_qloop_47 = _mm256_mul_pd(tmp_qloop_35,tmp_qloop_41); + const __m256d tmp_qloop_48 = _mm256_mul_pd(tmp_qloop_47,tmp_qloop_9); + const __m256d tmp_qloop_49 = _mm256_mul_pd(tmp_qloop_35,tmp_qloop_44); + const __m256d tmp_qloop_50 = _mm256_mul_pd(tmp_qloop_37,tmp_qloop_39); + const __m256d tmp_qloop_51 = _mm256_mul_pd(tmp_qloop_37,tmp_qloop_41); + const __m256d tmp_qloop_52 = _mm256_mul_pd(tmp_qloop_51,tmp_qloop_9); + const __m256d tmp_qloop_53 = _mm256_mul_pd(tmp_qloop_37,tmp_qloop_44); + const __m256d tmp_qloop_54 = _mm256_mul_pd(tmp_qloop_23,tmp_qloop_32); + const __m256d tmp_qloop_55 = _mm256_mul_pd(tmp_qloop_2,tmp_qloop_8); + const __m256d tmp_qloop_56 = _mm256_mul_pd(tmp_qloop_32,tmp_qloop_55); + const __m256d tmp_qloop_57 = _mm256_mul_pd(tmp_qloop_4,tmp_qloop_54); + const __m256d tmp_qloop_58 = _mm256_mul_pd(tmp_qloop_32,tmp_qloop_4); + const __m256d tmp_qloop_59 = _mm256_mul_pd(tmp_qloop_35,tmp_qloop_58); + const __m256d tmp_qloop_60 = _mm256_mul_pd(tmp_qloop_37,tmp_qloop_58); + const __m256d q_tmp_0_0 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_34); + const __m256d q_tmp_0_1 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_34); + const __m256d q_tmp_0_2 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_34); + const __m256d q_tmp_0_3 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_34); + const __m256d q_tmp_0_4 = _mm256_mul_pd(tmp_qloop_34,tmp_qloop_7); + const __m256d q_tmp_0_5 = _mm256_mul_pd(tmp_qloop_10,tmp_qloop_34); + const __m256d q_tmp_0_6 = _mm256_mul_pd(tmp_qloop_34,tmp_qloop_9); + const __m256d q_tmp_0_7 = _mm256_mul_pd(tmp_qloop_27,tmp_qloop_34); + const __m256d q_tmp_0_8 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_34); + const __m256d q_tmp_0_9 = _mm256_mul_pd(tmp_qloop_31,tmp_qloop_34); + const __m256d q_tmp_0_10 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_36); + const __m256d q_tmp_0_11 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_36); + const __m256d q_tmp_0_12 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_36); + const __m256d q_tmp_0_13 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_36); + const __m256d q_tmp_0_14 = _mm256_mul_pd(tmp_qloop_36,tmp_qloop_7); + const __m256d q_tmp_0_15 = _mm256_mul_pd(tmp_qloop_10,tmp_qloop_36); + const __m256d q_tmp_0_16 = _mm256_mul_pd(tmp_qloop_36,tmp_qloop_9); + const __m256d q_tmp_0_17 = _mm256_mul_pd(tmp_qloop_27,tmp_qloop_36); + const __m256d q_tmp_0_18 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_36); + const __m256d q_tmp_0_19 = _mm256_mul_pd(tmp_qloop_31,tmp_qloop_36); + const __m256d q_tmp_0_20 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_38); + const __m256d q_tmp_0_21 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_38); + const __m256d q_tmp_0_22 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_38); + const __m256d q_tmp_0_23 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_38); + const __m256d q_tmp_0_24 = _mm256_mul_pd(tmp_qloop_38,tmp_qloop_7); + const __m256d q_tmp_0_25 = _mm256_mul_pd(tmp_qloop_10,tmp_qloop_38); + const __m256d q_tmp_0_26 = _mm256_mul_pd(tmp_qloop_38,tmp_qloop_9); + const __m256d q_tmp_0_27 = _mm256_mul_pd(tmp_qloop_27,tmp_qloop_38); + const __m256d q_tmp_0_28 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_38); + const __m256d q_tmp_0_29 = _mm256_mul_pd(tmp_qloop_31,tmp_qloop_38); + const __m256d q_tmp_1_0 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_40); + const __m256d q_tmp_1_1 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_40); + const __m256d q_tmp_1_2 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_40); + const __m256d q_tmp_1_3 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_40); + const __m256d q_tmp_1_4 = tmp_qloop_43; + const __m256d q_tmp_1_5 = _mm256_mul_pd(tmp_qloop_30,tmp_qloop_42); + const __m256d q_tmp_1_6 = _mm256_mul_pd(tmp_qloop_30,tmp_qloop_45); + const __m256d q_tmp_1_7 = _mm256_mul_pd(tmp_qloop_27,tmp_qloop_40); + const __m256d q_tmp_1_8 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_40); + const __m256d q_tmp_1_9 = _mm256_mul_pd(tmp_qloop_31,tmp_qloop_40); + const __m256d q_tmp_1_10 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_46); + const __m256d q_tmp_1_11 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_46); + const __m256d q_tmp_1_12 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_46); + const __m256d q_tmp_1_13 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_46); + const __m256d q_tmp_1_14 = tmp_qloop_48; + const __m256d q_tmp_1_15 = _mm256_mul_pd(tmp_qloop_30,tmp_qloop_47); + const __m256d q_tmp_1_16 = _mm256_mul_pd(tmp_qloop_30,tmp_qloop_49); + const __m256d q_tmp_1_17 = _mm256_mul_pd(tmp_qloop_27,tmp_qloop_46); + const __m256d q_tmp_1_18 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_46); + const __m256d q_tmp_1_19 = _mm256_mul_pd(tmp_qloop_31,tmp_qloop_46); + const __m256d q_tmp_1_20 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_50); + const __m256d q_tmp_1_21 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_50); + const __m256d q_tmp_1_22 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_50); + const __m256d q_tmp_1_23 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_50); + const __m256d q_tmp_1_24 = tmp_qloop_52; + const __m256d q_tmp_1_25 = _mm256_mul_pd(tmp_qloop_30,tmp_qloop_51); + const __m256d q_tmp_1_26 = _mm256_mul_pd(tmp_qloop_30,tmp_qloop_53); + const __m256d q_tmp_1_27 = _mm256_mul_pd(tmp_qloop_27,tmp_qloop_50); + const __m256d q_tmp_1_28 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_50); + const __m256d q_tmp_1_29 = _mm256_mul_pd(tmp_qloop_31,tmp_qloop_50); + const __m256d q_tmp_2_0 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_45); + const __m256d q_tmp_2_1 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_45); + const __m256d q_tmp_2_2 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_45); + const __m256d q_tmp_2_3 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_45); + const __m256d q_tmp_2_4 = _mm256_mul_pd(tmp_qloop_28,tmp_qloop_42); + const __m256d q_tmp_2_5 = tmp_qloop_43; + const __m256d q_tmp_2_6 = _mm256_mul_pd(tmp_qloop_54,tmp_qloop_55); + const __m256d q_tmp_2_7 = _mm256_mul_pd(tmp_qloop_27,tmp_qloop_45); + const __m256d q_tmp_2_8 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_45); + const __m256d q_tmp_2_9 = _mm256_mul_pd(tmp_qloop_31,tmp_qloop_45); + const __m256d q_tmp_2_10 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_49); + const __m256d q_tmp_2_11 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_49); + const __m256d q_tmp_2_12 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_49); + const __m256d q_tmp_2_13 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_49); + const __m256d q_tmp_2_14 = _mm256_mul_pd(tmp_qloop_28,tmp_qloop_47); + const __m256d q_tmp_2_15 = tmp_qloop_48; + const __m256d q_tmp_2_16 = _mm256_mul_pd(tmp_qloop_35,tmp_qloop_56); + const __m256d q_tmp_2_17 = _mm256_mul_pd(tmp_qloop_27,tmp_qloop_49); + const __m256d q_tmp_2_18 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_49); + const __m256d q_tmp_2_19 = _mm256_mul_pd(tmp_qloop_31,tmp_qloop_49); + const __m256d q_tmp_2_20 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_53); + const __m256d q_tmp_2_21 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_53); + const __m256d q_tmp_2_22 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_53); + const __m256d q_tmp_2_23 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_53); + const __m256d q_tmp_2_24 = _mm256_mul_pd(tmp_qloop_28,tmp_qloop_51); + const __m256d q_tmp_2_25 = tmp_qloop_52; + const __m256d q_tmp_2_26 = _mm256_mul_pd(tmp_qloop_37,tmp_qloop_56); + const __m256d q_tmp_2_27 = _mm256_mul_pd(tmp_qloop_27,tmp_qloop_53); + const __m256d q_tmp_2_28 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_53); + const __m256d q_tmp_2_29 = _mm256_mul_pd(tmp_qloop_31,tmp_qloop_53); + const __m256d q_tmp_3_0 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_42); + const __m256d q_tmp_3_1 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_42); + const __m256d q_tmp_3_2 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_42); + const __m256d q_tmp_3_3 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_42); + const __m256d q_tmp_3_4 = _mm256_mul_pd(tmp_qloop_57,tmp_qloop_6); + const __m256d q_tmp_3_5 = _mm256_mul_pd(tmp_qloop_57,tmp_qloop_8); + const __m256d q_tmp_3_6 = tmp_qloop_43; + const __m256d q_tmp_3_7 = _mm256_mul_pd(tmp_qloop_27,tmp_qloop_42); + const __m256d q_tmp_3_8 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_42); + const __m256d q_tmp_3_9 = _mm256_mul_pd(tmp_qloop_31,tmp_qloop_42); + const __m256d q_tmp_3_10 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_47); + const __m256d q_tmp_3_11 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_47); + const __m256d q_tmp_3_12 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_47); + const __m256d q_tmp_3_13 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_47); + const __m256d q_tmp_3_14 = _mm256_mul_pd(tmp_qloop_59,tmp_qloop_6); + const __m256d q_tmp_3_15 = _mm256_mul_pd(tmp_qloop_59,tmp_qloop_8); + const __m256d q_tmp_3_16 = tmp_qloop_48; + const __m256d q_tmp_3_17 = _mm256_mul_pd(tmp_qloop_27,tmp_qloop_47); + const __m256d q_tmp_3_18 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_47); + const __m256d q_tmp_3_19 = _mm256_mul_pd(tmp_qloop_31,tmp_qloop_47); + const __m256d q_tmp_3_20 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_51); + const __m256d q_tmp_3_21 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_51); + const __m256d q_tmp_3_22 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_51); + const __m256d q_tmp_3_23 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_51); + const __m256d q_tmp_3_24 = _mm256_mul_pd(tmp_qloop_6,tmp_qloop_60); + const __m256d q_tmp_3_25 = _mm256_mul_pd(tmp_qloop_60,tmp_qloop_8); + const __m256d q_tmp_3_26 = tmp_qloop_52; + const __m256d q_tmp_3_27 = _mm256_mul_pd(tmp_qloop_27,tmp_qloop_51); + const __m256d q_tmp_3_28 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_51); + const __m256d q_tmp_3_29 = _mm256_mul_pd(tmp_qloop_31,tmp_qloop_51); + q_acc_0_0 = _mm256_add_pd(q_acc_0_0,q_tmp_0_0); + q_acc_0_1 = _mm256_add_pd(q_acc_0_1,q_tmp_0_1); + q_acc_0_2 = _mm256_add_pd(q_acc_0_2,q_tmp_0_2); + q_acc_0_3 = _mm256_add_pd(q_acc_0_3,q_tmp_0_3); + q_acc_0_4 = _mm256_add_pd(q_acc_0_4,q_tmp_0_4); + q_acc_0_5 = _mm256_add_pd(q_acc_0_5,q_tmp_0_5); + q_acc_0_6 = _mm256_add_pd(q_acc_0_6,q_tmp_0_6); + q_acc_0_7 = _mm256_add_pd(q_acc_0_7,q_tmp_0_7); + q_acc_0_8 = _mm256_add_pd(q_acc_0_8,q_tmp_0_8); + q_acc_0_9 = _mm256_add_pd(q_acc_0_9,q_tmp_0_9); + q_acc_0_10 = _mm256_add_pd(q_acc_0_10,q_tmp_0_10); + q_acc_0_11 = _mm256_add_pd(q_acc_0_11,q_tmp_0_11); + q_acc_0_12 = _mm256_add_pd(q_acc_0_12,q_tmp_0_12); + q_acc_0_13 = _mm256_add_pd(q_acc_0_13,q_tmp_0_13); + q_acc_0_14 = _mm256_add_pd(q_acc_0_14,q_tmp_0_14); + q_acc_0_15 = _mm256_add_pd(q_acc_0_15,q_tmp_0_15); + q_acc_0_16 = _mm256_add_pd(q_acc_0_16,q_tmp_0_16); + q_acc_0_17 = _mm256_add_pd(q_acc_0_17,q_tmp_0_17); + q_acc_0_18 = _mm256_add_pd(q_acc_0_18,q_tmp_0_18); + q_acc_0_19 = _mm256_add_pd(q_acc_0_19,q_tmp_0_19); + q_acc_0_20 = _mm256_add_pd(q_acc_0_20,q_tmp_0_20); + q_acc_0_21 = _mm256_add_pd(q_acc_0_21,q_tmp_0_21); + q_acc_0_22 = _mm256_add_pd(q_acc_0_22,q_tmp_0_22); + q_acc_0_23 = _mm256_add_pd(q_acc_0_23,q_tmp_0_23); + q_acc_0_24 = _mm256_add_pd(q_acc_0_24,q_tmp_0_24); + q_acc_0_25 = _mm256_add_pd(q_acc_0_25,q_tmp_0_25); + q_acc_0_26 = _mm256_add_pd(q_acc_0_26,q_tmp_0_26); + q_acc_0_27 = _mm256_add_pd(q_acc_0_27,q_tmp_0_27); + q_acc_0_28 = _mm256_add_pd(q_acc_0_28,q_tmp_0_28); + q_acc_0_29 = _mm256_add_pd(q_acc_0_29,q_tmp_0_29); + q_acc_1_0 = _mm256_add_pd(q_acc_1_0,q_tmp_1_0); + q_acc_1_1 = _mm256_add_pd(q_acc_1_1,q_tmp_1_1); + q_acc_1_2 = _mm256_add_pd(q_acc_1_2,q_tmp_1_2); + q_acc_1_3 = _mm256_add_pd(q_acc_1_3,q_tmp_1_3); + q_acc_1_4 = _mm256_add_pd(q_acc_1_4,q_tmp_1_4); + q_acc_1_5 = _mm256_add_pd(q_acc_1_5,q_tmp_1_5); + q_acc_1_6 = _mm256_add_pd(q_acc_1_6,q_tmp_1_6); + q_acc_1_7 = _mm256_add_pd(q_acc_1_7,q_tmp_1_7); + q_acc_1_8 = _mm256_add_pd(q_acc_1_8,q_tmp_1_8); + q_acc_1_9 = _mm256_add_pd(q_acc_1_9,q_tmp_1_9); + q_acc_1_10 = _mm256_add_pd(q_acc_1_10,q_tmp_1_10); + q_acc_1_11 = _mm256_add_pd(q_acc_1_11,q_tmp_1_11); + q_acc_1_12 = _mm256_add_pd(q_acc_1_12,q_tmp_1_12); + q_acc_1_13 = _mm256_add_pd(q_acc_1_13,q_tmp_1_13); + q_acc_1_14 = _mm256_add_pd(q_acc_1_14,q_tmp_1_14); + q_acc_1_15 = _mm256_add_pd(q_acc_1_15,q_tmp_1_15); + q_acc_1_16 = _mm256_add_pd(q_acc_1_16,q_tmp_1_16); + q_acc_1_17 = _mm256_add_pd(q_acc_1_17,q_tmp_1_17); + q_acc_1_18 = _mm256_add_pd(q_acc_1_18,q_tmp_1_18); + q_acc_1_19 = _mm256_add_pd(q_acc_1_19,q_tmp_1_19); + q_acc_1_20 = _mm256_add_pd(q_acc_1_20,q_tmp_1_20); + q_acc_1_21 = _mm256_add_pd(q_acc_1_21,q_tmp_1_21); + q_acc_1_22 = _mm256_add_pd(q_acc_1_22,q_tmp_1_22); + q_acc_1_23 = _mm256_add_pd(q_acc_1_23,q_tmp_1_23); + q_acc_1_24 = _mm256_add_pd(q_acc_1_24,q_tmp_1_24); + q_acc_1_25 = _mm256_add_pd(q_acc_1_25,q_tmp_1_25); + q_acc_1_26 = _mm256_add_pd(q_acc_1_26,q_tmp_1_26); + q_acc_1_27 = _mm256_add_pd(q_acc_1_27,q_tmp_1_27); + q_acc_1_28 = _mm256_add_pd(q_acc_1_28,q_tmp_1_28); + q_acc_1_29 = _mm256_add_pd(q_acc_1_29,q_tmp_1_29); + q_acc_2_0 = _mm256_add_pd(q_acc_2_0,q_tmp_2_0); + q_acc_2_1 = _mm256_add_pd(q_acc_2_1,q_tmp_2_1); + q_acc_2_2 = _mm256_add_pd(q_acc_2_2,q_tmp_2_2); + q_acc_2_3 = _mm256_add_pd(q_acc_2_3,q_tmp_2_3); + q_acc_2_4 = _mm256_add_pd(q_acc_2_4,q_tmp_2_4); + q_acc_2_5 = _mm256_add_pd(q_acc_2_5,q_tmp_2_5); + q_acc_2_6 = _mm256_add_pd(q_acc_2_6,q_tmp_2_6); + q_acc_2_7 = _mm256_add_pd(q_acc_2_7,q_tmp_2_7); + q_acc_2_8 = _mm256_add_pd(q_acc_2_8,q_tmp_2_8); + q_acc_2_9 = _mm256_add_pd(q_acc_2_9,q_tmp_2_9); + q_acc_2_10 = _mm256_add_pd(q_acc_2_10,q_tmp_2_10); + q_acc_2_11 = _mm256_add_pd(q_acc_2_11,q_tmp_2_11); + q_acc_2_12 = _mm256_add_pd(q_acc_2_12,q_tmp_2_12); + q_acc_2_13 = _mm256_add_pd(q_acc_2_13,q_tmp_2_13); + q_acc_2_14 = _mm256_add_pd(q_acc_2_14,q_tmp_2_14); + q_acc_2_15 = _mm256_add_pd(q_acc_2_15,q_tmp_2_15); + q_acc_2_16 = _mm256_add_pd(q_acc_2_16,q_tmp_2_16); + q_acc_2_17 = _mm256_add_pd(q_acc_2_17,q_tmp_2_17); + q_acc_2_18 = _mm256_add_pd(q_acc_2_18,q_tmp_2_18); + q_acc_2_19 = _mm256_add_pd(q_acc_2_19,q_tmp_2_19); + q_acc_2_20 = _mm256_add_pd(q_acc_2_20,q_tmp_2_20); + q_acc_2_21 = _mm256_add_pd(q_acc_2_21,q_tmp_2_21); + q_acc_2_22 = _mm256_add_pd(q_acc_2_22,q_tmp_2_22); + q_acc_2_23 = _mm256_add_pd(q_acc_2_23,q_tmp_2_23); + q_acc_2_24 = _mm256_add_pd(q_acc_2_24,q_tmp_2_24); + q_acc_2_25 = _mm256_add_pd(q_acc_2_25,q_tmp_2_25); + q_acc_2_26 = _mm256_add_pd(q_acc_2_26,q_tmp_2_26); + q_acc_2_27 = _mm256_add_pd(q_acc_2_27,q_tmp_2_27); + q_acc_2_28 = _mm256_add_pd(q_acc_2_28,q_tmp_2_28); + q_acc_2_29 = _mm256_add_pd(q_acc_2_29,q_tmp_2_29); + q_acc_3_0 = _mm256_add_pd(q_acc_3_0,q_tmp_3_0); + q_acc_3_1 = _mm256_add_pd(q_acc_3_1,q_tmp_3_1); + q_acc_3_2 = _mm256_add_pd(q_acc_3_2,q_tmp_3_2); + q_acc_3_3 = _mm256_add_pd(q_acc_3_3,q_tmp_3_3); + q_acc_3_4 = _mm256_add_pd(q_acc_3_4,q_tmp_3_4); + q_acc_3_5 = _mm256_add_pd(q_acc_3_5,q_tmp_3_5); + q_acc_3_6 = _mm256_add_pd(q_acc_3_6,q_tmp_3_6); + q_acc_3_7 = _mm256_add_pd(q_acc_3_7,q_tmp_3_7); + q_acc_3_8 = _mm256_add_pd(q_acc_3_8,q_tmp_3_8); + q_acc_3_9 = _mm256_add_pd(q_acc_3_9,q_tmp_3_9); + q_acc_3_10 = _mm256_add_pd(q_acc_3_10,q_tmp_3_10); + q_acc_3_11 = _mm256_add_pd(q_acc_3_11,q_tmp_3_11); + q_acc_3_12 = _mm256_add_pd(q_acc_3_12,q_tmp_3_12); + q_acc_3_13 = _mm256_add_pd(q_acc_3_13,q_tmp_3_13); + q_acc_3_14 = _mm256_add_pd(q_acc_3_14,q_tmp_3_14); + q_acc_3_15 = _mm256_add_pd(q_acc_3_15,q_tmp_3_15); + q_acc_3_16 = _mm256_add_pd(q_acc_3_16,q_tmp_3_16); + q_acc_3_17 = _mm256_add_pd(q_acc_3_17,q_tmp_3_17); + q_acc_3_18 = _mm256_add_pd(q_acc_3_18,q_tmp_3_18); + q_acc_3_19 = _mm256_add_pd(q_acc_3_19,q_tmp_3_19); + q_acc_3_20 = _mm256_add_pd(q_acc_3_20,q_tmp_3_20); + q_acc_3_21 = _mm256_add_pd(q_acc_3_21,q_tmp_3_21); + q_acc_3_22 = _mm256_add_pd(q_acc_3_22,q_tmp_3_22); + q_acc_3_23 = _mm256_add_pd(q_acc_3_23,q_tmp_3_23); + q_acc_3_24 = _mm256_add_pd(q_acc_3_24,q_tmp_3_24); + q_acc_3_25 = _mm256_add_pd(q_acc_3_25,q_tmp_3_25); + q_acc_3_26 = _mm256_add_pd(q_acc_3_26,q_tmp_3_26); + q_acc_3_27 = _mm256_add_pd(q_acc_3_27,q_tmp_3_27); + q_acc_3_28 = _mm256_add_pd(q_acc_3_28,q_tmp_3_28); + q_acc_3_29 = _mm256_add_pd(q_acc_3_29,q_tmp_3_29); + } + const __m256d elMatVec_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_0_0,src_dof_0),_mm256_mul_pd(q_acc_0_1,src_dof_1)),_mm256_mul_pd(q_acc_0_10,src_dof_10)),_mm256_mul_pd(q_acc_0_11,src_dof_11)),_mm256_mul_pd(q_acc_0_12,src_dof_12)),_mm256_mul_pd(q_acc_0_13,src_dof_13)),_mm256_mul_pd(q_acc_0_14,src_dof_14)),_mm256_mul_pd(q_acc_0_15,src_dof_15)),_mm256_mul_pd(q_acc_0_16,src_dof_16)),_mm256_mul_pd(q_acc_0_17,src_dof_17)),_mm256_mul_pd(q_acc_0_18,src_dof_18)),_mm256_mul_pd(q_acc_0_19,src_dof_19)),_mm256_mul_pd(q_acc_0_2,src_dof_2)),_mm256_mul_pd(q_acc_0_20,src_dof_20)),_mm256_mul_pd(q_acc_0_21,src_dof_21)),_mm256_mul_pd(q_acc_0_22,src_dof_22)),_mm256_mul_pd(q_acc_0_23,src_dof_23)),_mm256_mul_pd(q_acc_0_24,src_dof_24)),_mm256_mul_pd(q_acc_0_25,src_dof_25)),_mm256_mul_pd(q_acc_0_26,src_dof_26)),_mm256_mul_pd(q_acc_0_27,src_dof_27)),_mm256_mul_pd(q_acc_0_28,src_dof_28)),_mm256_mul_pd(q_acc_0_29,src_dof_29)),_mm256_mul_pd(q_acc_0_3,src_dof_3)),_mm256_mul_pd(q_acc_0_4,src_dof_4)),_mm256_mul_pd(q_acc_0_5,src_dof_5)),_mm256_mul_pd(q_acc_0_6,src_dof_6)),_mm256_mul_pd(q_acc_0_7,src_dof_7)),_mm256_mul_pd(q_acc_0_8,src_dof_8)),_mm256_mul_pd(q_acc_0_9,src_dof_9)); + const __m256d elMatVec_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_1_0,src_dof_0),_mm256_mul_pd(q_acc_1_1,src_dof_1)),_mm256_mul_pd(q_acc_1_10,src_dof_10)),_mm256_mul_pd(q_acc_1_11,src_dof_11)),_mm256_mul_pd(q_acc_1_12,src_dof_12)),_mm256_mul_pd(q_acc_1_13,src_dof_13)),_mm256_mul_pd(q_acc_1_14,src_dof_14)),_mm256_mul_pd(q_acc_1_15,src_dof_15)),_mm256_mul_pd(q_acc_1_16,src_dof_16)),_mm256_mul_pd(q_acc_1_17,src_dof_17)),_mm256_mul_pd(q_acc_1_18,src_dof_18)),_mm256_mul_pd(q_acc_1_19,src_dof_19)),_mm256_mul_pd(q_acc_1_2,src_dof_2)),_mm256_mul_pd(q_acc_1_20,src_dof_20)),_mm256_mul_pd(q_acc_1_21,src_dof_21)),_mm256_mul_pd(q_acc_1_22,src_dof_22)),_mm256_mul_pd(q_acc_1_23,src_dof_23)),_mm256_mul_pd(q_acc_1_24,src_dof_24)),_mm256_mul_pd(q_acc_1_25,src_dof_25)),_mm256_mul_pd(q_acc_1_26,src_dof_26)),_mm256_mul_pd(q_acc_1_27,src_dof_27)),_mm256_mul_pd(q_acc_1_28,src_dof_28)),_mm256_mul_pd(q_acc_1_29,src_dof_29)),_mm256_mul_pd(q_acc_1_3,src_dof_3)),_mm256_mul_pd(q_acc_1_4,src_dof_4)),_mm256_mul_pd(q_acc_1_5,src_dof_5)),_mm256_mul_pd(q_acc_1_6,src_dof_6)),_mm256_mul_pd(q_acc_1_7,src_dof_7)),_mm256_mul_pd(q_acc_1_8,src_dof_8)),_mm256_mul_pd(q_acc_1_9,src_dof_9)); + const __m256d elMatVec_2 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_2_0,src_dof_0),_mm256_mul_pd(q_acc_2_1,src_dof_1)),_mm256_mul_pd(q_acc_2_10,src_dof_10)),_mm256_mul_pd(q_acc_2_11,src_dof_11)),_mm256_mul_pd(q_acc_2_12,src_dof_12)),_mm256_mul_pd(q_acc_2_13,src_dof_13)),_mm256_mul_pd(q_acc_2_14,src_dof_14)),_mm256_mul_pd(q_acc_2_15,src_dof_15)),_mm256_mul_pd(q_acc_2_16,src_dof_16)),_mm256_mul_pd(q_acc_2_17,src_dof_17)),_mm256_mul_pd(q_acc_2_18,src_dof_18)),_mm256_mul_pd(q_acc_2_19,src_dof_19)),_mm256_mul_pd(q_acc_2_2,src_dof_2)),_mm256_mul_pd(q_acc_2_20,src_dof_20)),_mm256_mul_pd(q_acc_2_21,src_dof_21)),_mm256_mul_pd(q_acc_2_22,src_dof_22)),_mm256_mul_pd(q_acc_2_23,src_dof_23)),_mm256_mul_pd(q_acc_2_24,src_dof_24)),_mm256_mul_pd(q_acc_2_25,src_dof_25)),_mm256_mul_pd(q_acc_2_26,src_dof_26)),_mm256_mul_pd(q_acc_2_27,src_dof_27)),_mm256_mul_pd(q_acc_2_28,src_dof_28)),_mm256_mul_pd(q_acc_2_29,src_dof_29)),_mm256_mul_pd(q_acc_2_3,src_dof_3)),_mm256_mul_pd(q_acc_2_4,src_dof_4)),_mm256_mul_pd(q_acc_2_5,src_dof_5)),_mm256_mul_pd(q_acc_2_6,src_dof_6)),_mm256_mul_pd(q_acc_2_7,src_dof_7)),_mm256_mul_pd(q_acc_2_8,src_dof_8)),_mm256_mul_pd(q_acc_2_9,src_dof_9)); + const __m256d elMatVec_3 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_3_0,src_dof_0),_mm256_mul_pd(q_acc_3_1,src_dof_1)),_mm256_mul_pd(q_acc_3_10,src_dof_10)),_mm256_mul_pd(q_acc_3_11,src_dof_11)),_mm256_mul_pd(q_acc_3_12,src_dof_12)),_mm256_mul_pd(q_acc_3_13,src_dof_13)),_mm256_mul_pd(q_acc_3_14,src_dof_14)),_mm256_mul_pd(q_acc_3_15,src_dof_15)),_mm256_mul_pd(q_acc_3_16,src_dof_16)),_mm256_mul_pd(q_acc_3_17,src_dof_17)),_mm256_mul_pd(q_acc_3_18,src_dof_18)),_mm256_mul_pd(q_acc_3_19,src_dof_19)),_mm256_mul_pd(q_acc_3_2,src_dof_2)),_mm256_mul_pd(q_acc_3_20,src_dof_20)),_mm256_mul_pd(q_acc_3_21,src_dof_21)),_mm256_mul_pd(q_acc_3_22,src_dof_22)),_mm256_mul_pd(q_acc_3_23,src_dof_23)),_mm256_mul_pd(q_acc_3_24,src_dof_24)),_mm256_mul_pd(q_acc_3_25,src_dof_25)),_mm256_mul_pd(q_acc_3_26,src_dof_26)),_mm256_mul_pd(q_acc_3_27,src_dof_27)),_mm256_mul_pd(q_acc_3_28,src_dof_28)),_mm256_mul_pd(q_acc_3_29,src_dof_29)),_mm256_mul_pd(q_acc_3_3,src_dof_3)),_mm256_mul_pd(q_acc_3_4,src_dof_4)),_mm256_mul_pd(q_acc_3_5,src_dof_5)),_mm256_mul_pd(q_acc_3_6,src_dof_6)),_mm256_mul_pd(q_acc_3_7,src_dof_7)),_mm256_mul_pd(q_acc_3_8,src_dof_8)),_mm256_mul_pd(q_acc_3_9,src_dof_9)); + _mm256_storeu_pd(&_data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))],_mm256_add_pd(elMatVec_0,_mm256_loadu_pd(& _data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]))); + _mm256_storeu_pd(&_data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))],_mm256_add_pd(elMatVec_1,_mm256_loadu_pd(& _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]))); + _mm256_storeu_pd(&_data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1],_mm256_add_pd(elMatVec_2,_mm256_loadu_pd(& _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]))); + _mm256_storeu_pd(&_data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))],_mm256_add_pd(elMatVec_3,_mm256_loadu_pd(& _data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]))); + } + for (int64_t ctr_0 = (int64_t)((-ctr_1 - ctr_2 + micro_edges_per_macro_edge - 1) / (4)) * (4); ctr_0 < -ctr_1 - ctr_2 + micro_edges_per_macro_edge - 1; ctr_0 += 1) + { + + const int64_t phantom_ctr_0 = ctr_0; + real_t _data_float_loop_ctr_array_dim_0[4]; + _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0; + _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1; + _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2; + _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3; + real_t _data_float_loop_ctr_array_dim_1[4]; + _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1; + real_t _data_float_loop_ctr_array_dim_2[4]; + _data_float_loop_ctr_array_dim_2[0] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[1] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[2] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[3] = (real_t) ctr_2; + + const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_0_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_1_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_2_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_3_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_3_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_3_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t src_dof_0 = _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]; + const real_t src_dof_1 = _data_src_vertex_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]; + const real_t src_dof_2 = _data_src_vertex_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]; + const real_t src_dof_3 = _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]; + const real_t src_dof_4 = _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]; + const real_t src_dof_5 = _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]; + const real_t src_dof_6 = _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]; + const real_t src_dof_7 = _data_src_edge_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t src_dof_8 = _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge - 1)*(micro_edges_per_macro_edge + 1)) / (6)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]; + const real_t src_dof_9 = _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t src_dof_10 = _data_src_vertex_1[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]; + const real_t src_dof_11 = _data_src_vertex_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]; + const real_t src_dof_12 = _data_src_vertex_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]; + const real_t src_dof_13 = _data_src_vertex_1[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]; + const real_t src_dof_14 = _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]; + const real_t src_dof_15 = _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]; + const real_t src_dof_16 = _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]; + const real_t src_dof_17 = _data_src_edge_1[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t src_dof_18 = _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge - 1)*(micro_edges_per_macro_edge + 1)) / (6)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]; + const real_t src_dof_19 = _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t src_dof_20 = _data_src_vertex_2[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]; + const real_t src_dof_21 = _data_src_vertex_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]; + const real_t src_dof_22 = _data_src_vertex_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]; + const real_t src_dof_23 = _data_src_vertex_2[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]; + const real_t src_dof_24 = _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]; + const real_t src_dof_25 = _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]; + const real_t src_dof_26 = _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]; + const real_t src_dof_27 = _data_src_edge_2[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t src_dof_28 = _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge - 1)*(micro_edges_per_macro_edge + 1)) / (6)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]; + const real_t src_dof_29 = _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t rho_dof_0 = _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]; + const real_t rho_dof_1 = _data_rhoVertex[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]; + const real_t rho_dof_2 = _data_rhoVertex[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]; + const real_t rho_dof_3 = _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]; + const real_t rho_dof_4 = _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]; + const real_t rho_dof_5 = _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]; + const real_t rho_dof_6 = _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]; + const real_t rho_dof_7 = _data_rhoEdge[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t rho_dof_8 = _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge - 1)*(micro_edges_per_macro_edge + 1)) / (6)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]; + const real_t rho_dof_9 = _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + real_t q_acc_0_0 = 0.0; + real_t q_acc_0_1 = 0.0; + real_t q_acc_0_2 = 0.0; + real_t q_acc_0_3 = 0.0; + real_t q_acc_0_4 = 0.0; + real_t q_acc_0_5 = 0.0; + real_t q_acc_0_6 = 0.0; + real_t q_acc_0_7 = 0.0; + real_t q_acc_0_8 = 0.0; + real_t q_acc_0_9 = 0.0; + real_t q_acc_0_10 = 0.0; + real_t q_acc_0_11 = 0.0; + real_t q_acc_0_12 = 0.0; + real_t q_acc_0_13 = 0.0; + real_t q_acc_0_14 = 0.0; + real_t q_acc_0_15 = 0.0; + real_t q_acc_0_16 = 0.0; + real_t q_acc_0_17 = 0.0; + real_t q_acc_0_18 = 0.0; + real_t q_acc_0_19 = 0.0; + real_t q_acc_0_20 = 0.0; + real_t q_acc_0_21 = 0.0; + real_t q_acc_0_22 = 0.0; + real_t q_acc_0_23 = 0.0; + real_t q_acc_0_24 = 0.0; + real_t q_acc_0_25 = 0.0; + real_t q_acc_0_26 = 0.0; + real_t q_acc_0_27 = 0.0; + real_t q_acc_0_28 = 0.0; + real_t q_acc_0_29 = 0.0; + real_t q_acc_1_0 = 0.0; + real_t q_acc_1_1 = 0.0; + real_t q_acc_1_2 = 0.0; + real_t q_acc_1_3 = 0.0; + real_t q_acc_1_4 = 0.0; + real_t q_acc_1_5 = 0.0; + real_t q_acc_1_6 = 0.0; + real_t q_acc_1_7 = 0.0; + real_t q_acc_1_8 = 0.0; + real_t q_acc_1_9 = 0.0; + real_t q_acc_1_10 = 0.0; + real_t q_acc_1_11 = 0.0; + real_t q_acc_1_12 = 0.0; + real_t q_acc_1_13 = 0.0; + real_t q_acc_1_14 = 0.0; + real_t q_acc_1_15 = 0.0; + real_t q_acc_1_16 = 0.0; + real_t q_acc_1_17 = 0.0; + real_t q_acc_1_18 = 0.0; + real_t q_acc_1_19 = 0.0; + real_t q_acc_1_20 = 0.0; + real_t q_acc_1_21 = 0.0; + real_t q_acc_1_22 = 0.0; + real_t q_acc_1_23 = 0.0; + real_t q_acc_1_24 = 0.0; + real_t q_acc_1_25 = 0.0; + real_t q_acc_1_26 = 0.0; + real_t q_acc_1_27 = 0.0; + real_t q_acc_1_28 = 0.0; + real_t q_acc_1_29 = 0.0; + real_t q_acc_2_0 = 0.0; + real_t q_acc_2_1 = 0.0; + real_t q_acc_2_2 = 0.0; + real_t q_acc_2_3 = 0.0; + real_t q_acc_2_4 = 0.0; + real_t q_acc_2_5 = 0.0; + real_t q_acc_2_6 = 0.0; + real_t q_acc_2_7 = 0.0; + real_t q_acc_2_8 = 0.0; + real_t q_acc_2_9 = 0.0; + real_t q_acc_2_10 = 0.0; + real_t q_acc_2_11 = 0.0; + real_t q_acc_2_12 = 0.0; + real_t q_acc_2_13 = 0.0; + real_t q_acc_2_14 = 0.0; + real_t q_acc_2_15 = 0.0; + real_t q_acc_2_16 = 0.0; + real_t q_acc_2_17 = 0.0; + real_t q_acc_2_18 = 0.0; + real_t q_acc_2_19 = 0.0; + real_t q_acc_2_20 = 0.0; + real_t q_acc_2_21 = 0.0; + real_t q_acc_2_22 = 0.0; + real_t q_acc_2_23 = 0.0; + real_t q_acc_2_24 = 0.0; + real_t q_acc_2_25 = 0.0; + real_t q_acc_2_26 = 0.0; + real_t q_acc_2_27 = 0.0; + real_t q_acc_2_28 = 0.0; + real_t q_acc_2_29 = 0.0; + real_t q_acc_3_0 = 0.0; + real_t q_acc_3_1 = 0.0; + real_t q_acc_3_2 = 0.0; + real_t q_acc_3_3 = 0.0; + real_t q_acc_3_4 = 0.0; + real_t q_acc_3_5 = 0.0; + real_t q_acc_3_6 = 0.0; + real_t q_acc_3_7 = 0.0; + real_t q_acc_3_8 = 0.0; + real_t q_acc_3_9 = 0.0; + real_t q_acc_3_10 = 0.0; + real_t q_acc_3_11 = 0.0; + real_t q_acc_3_12 = 0.0; + real_t q_acc_3_13 = 0.0; + real_t q_acc_3_14 = 0.0; + real_t q_acc_3_15 = 0.0; + real_t q_acc_3_16 = 0.0; + real_t q_acc_3_17 = 0.0; + real_t q_acc_3_18 = 0.0; + real_t q_acc_3_19 = 0.0; + real_t q_acc_3_20 = 0.0; + real_t q_acc_3_21 = 0.0; + real_t q_acc_3_22 = 0.0; + real_t q_acc_3_23 = 0.0; + real_t q_acc_3_24 = 0.0; + real_t q_acc_3_25 = 0.0; + real_t q_acc_3_26 = 0.0; + real_t q_acc_3_27 = 0.0; + real_t q_acc_3_28 = 0.0; + real_t q_acc_3_29 = 0.0; + for (int64_t q = 0; q < 5; q += 1) + { + const real_t tmp_qloop_0 = (_data_q_p_0[q]*_data_q_p_0[q]); + const real_t tmp_qloop_1 = tmp_qloop_0*2.0; + const real_t tmp_qloop_2 = (_data_q_p_1[q]*_data_q_p_1[q]); + const real_t tmp_qloop_3 = tmp_qloop_2*2.0; + const real_t tmp_qloop_4 = (_data_q_p_2[q]*_data_q_p_2[q]); + const real_t tmp_qloop_5 = tmp_qloop_4*2.0; + const real_t tmp_qloop_6 = 4.0*_data_q_p_1[q]; + const real_t tmp_qloop_7 = tmp_qloop_6*_data_q_p_2[q]; + const real_t tmp_qloop_8 = 4.0*_data_q_p_0[q]; + const real_t tmp_qloop_9 = tmp_qloop_8*_data_q_p_1[q]; + const real_t tmp_qloop_10 = tmp_qloop_8*_data_q_p_2[q]; + const real_t tmp_qloop_11 = tmp_qloop_10 + tmp_qloop_9; + const real_t tmp_qloop_12 = tmp_qloop_1 + tmp_qloop_11 + tmp_qloop_3 + tmp_qloop_5 + tmp_qloop_7 - 3.0*_data_q_p_0[q] - 3.0*_data_q_p_1[q] - 3.0*_data_q_p_2[q] + 1.0; + const real_t tmp_qloop_13 = 4.0*_data_q_p_2[q]; + const real_t tmp_qloop_14 = tmp_qloop_13 + tmp_qloop_6; + const real_t tmp_qloop_15 = -rho_dof_8*tmp_qloop_6; + const real_t tmp_qloop_16 = rho_dof_0*(tmp_qloop_14 + tmp_qloop_8 - 3.0); + const real_t tmp_qloop_17 = -rho_dof_7*tmp_qloop_13 + tmp_qloop_16; + const real_t tmp_qloop_18 = rho_dof_1*(tmp_qloop_8 - 1.0) + rho_dof_5*tmp_qloop_13 + rho_dof_6*tmp_qloop_6 + rho_dof_9*(-tmp_qloop_14 - 8.0*_data_q_p_0[q] + 4.0) + tmp_qloop_15 + tmp_qloop_17; + const real_t tmp_qloop_19 = tmp_qloop_8 - 4.0; + const real_t tmp_qloop_20 = -rho_dof_9*tmp_qloop_8; + const real_t tmp_qloop_21 = rho_dof_2*(tmp_qloop_6 - 1.0) + rho_dof_4*tmp_qloop_13 + rho_dof_6*tmp_qloop_8 + rho_dof_8*(-tmp_qloop_13 - tmp_qloop_19 - 8.0*_data_q_p_1[q]) + tmp_qloop_17 + tmp_qloop_20; + const real_t tmp_qloop_22 = rho_dof_3*(tmp_qloop_13 - 1.0) + rho_dof_4*tmp_qloop_6 + rho_dof_5*tmp_qloop_8 + rho_dof_7*(-tmp_qloop_19 - tmp_qloop_6 - 8.0*_data_q_p_2[q]) + tmp_qloop_15 + tmp_qloop_16 + tmp_qloop_20; + const real_t tmp_qloop_23 = jac_affine_inv_0_0_BLUE_DOWN*tmp_qloop_18 + jac_affine_inv_1_0_BLUE_DOWN*tmp_qloop_21 + jac_affine_inv_2_0_BLUE_DOWN*tmp_qloop_22; + const real_t tmp_qloop_24 = tmp_qloop_1 - _data_q_p_0[q]; + const real_t tmp_qloop_25 = tmp_qloop_3 - _data_q_p_1[q]; + const real_t tmp_qloop_26 = tmp_qloop_5 - _data_q_p_2[q]; + const real_t tmp_qloop_27 = -tmp_qloop_10 + tmp_qloop_13 + tmp_qloop_4*-4.0 - tmp_qloop_7; + const real_t tmp_qloop_28 = tmp_qloop_2*4.0; + const real_t tmp_qloop_29 = -tmp_qloop_28 + tmp_qloop_6 - tmp_qloop_7 - tmp_qloop_9; + const real_t tmp_qloop_30 = tmp_qloop_0*4.0; + const real_t tmp_qloop_31 = -tmp_qloop_11 - tmp_qloop_30 + tmp_qloop_8; + const real_t tmp_qloop_32 = abs_det_jac_affine_BLUE_DOWN*1.0 / (rho_dof_0*tmp_qloop_12 + rho_dof_1*tmp_qloop_24 + rho_dof_2*tmp_qloop_25 + rho_dof_3*tmp_qloop_26 + rho_dof_4*tmp_qloop_7 + rho_dof_5*tmp_qloop_10 + rho_dof_6*tmp_qloop_9 + rho_dof_7*tmp_qloop_27 + rho_dof_8*tmp_qloop_29 + rho_dof_9*tmp_qloop_31)*_data_q_w[q]; + const real_t tmp_qloop_33 = tmp_qloop_32*(1.0 - _data_q_p_0[q] - _data_q_p_1[q] - _data_q_p_2[q]); + const real_t tmp_qloop_34 = tmp_qloop_23*tmp_qloop_33; + const real_t tmp_qloop_35 = jac_affine_inv_0_1_BLUE_DOWN*tmp_qloop_18 + jac_affine_inv_1_1_BLUE_DOWN*tmp_qloop_21 + jac_affine_inv_2_1_BLUE_DOWN*tmp_qloop_22; + const real_t tmp_qloop_36 = tmp_qloop_33*tmp_qloop_35; + const real_t tmp_qloop_37 = jac_affine_inv_0_2_BLUE_DOWN*tmp_qloop_18 + jac_affine_inv_1_2_BLUE_DOWN*tmp_qloop_21 + jac_affine_inv_2_2_BLUE_DOWN*tmp_qloop_22; + const real_t tmp_qloop_38 = tmp_qloop_33*tmp_qloop_37; + const real_t tmp_qloop_39 = tmp_qloop_32*_data_q_p_0[q]; + const real_t tmp_qloop_40 = tmp_qloop_23*tmp_qloop_39; + const real_t tmp_qloop_41 = tmp_qloop_32*_data_q_p_2[q]; + const real_t tmp_qloop_42 = tmp_qloop_23*tmp_qloop_41; + const real_t tmp_qloop_43 = tmp_qloop_42*tmp_qloop_9; + const real_t tmp_qloop_44 = tmp_qloop_32*_data_q_p_1[q]; + const real_t tmp_qloop_45 = tmp_qloop_23*tmp_qloop_44; + const real_t tmp_qloop_46 = tmp_qloop_35*tmp_qloop_39; + const real_t tmp_qloop_47 = tmp_qloop_35*tmp_qloop_41; + const real_t tmp_qloop_48 = tmp_qloop_47*tmp_qloop_9; + const real_t tmp_qloop_49 = tmp_qloop_35*tmp_qloop_44; + const real_t tmp_qloop_50 = tmp_qloop_37*tmp_qloop_39; + const real_t tmp_qloop_51 = tmp_qloop_37*tmp_qloop_41; + const real_t tmp_qloop_52 = tmp_qloop_51*tmp_qloop_9; + const real_t tmp_qloop_53 = tmp_qloop_37*tmp_qloop_44; + const real_t tmp_qloop_54 = tmp_qloop_23*tmp_qloop_32; + const real_t tmp_qloop_55 = tmp_qloop_2*tmp_qloop_8; + const real_t tmp_qloop_56 = tmp_qloop_32*tmp_qloop_55; + const real_t tmp_qloop_57 = tmp_qloop_4*tmp_qloop_54; + const real_t tmp_qloop_58 = tmp_qloop_32*tmp_qloop_4; + const real_t tmp_qloop_59 = tmp_qloop_35*tmp_qloop_58; + const real_t tmp_qloop_60 = tmp_qloop_37*tmp_qloop_58; + const real_t q_tmp_0_0 = tmp_qloop_12*tmp_qloop_34; + const real_t q_tmp_0_1 = tmp_qloop_24*tmp_qloop_34; + const real_t q_tmp_0_2 = tmp_qloop_25*tmp_qloop_34; + const real_t q_tmp_0_3 = tmp_qloop_26*tmp_qloop_34; + const real_t q_tmp_0_4 = tmp_qloop_34*tmp_qloop_7; + const real_t q_tmp_0_5 = tmp_qloop_10*tmp_qloop_34; + const real_t q_tmp_0_6 = tmp_qloop_34*tmp_qloop_9; + const real_t q_tmp_0_7 = tmp_qloop_27*tmp_qloop_34; + const real_t q_tmp_0_8 = tmp_qloop_29*tmp_qloop_34; + const real_t q_tmp_0_9 = tmp_qloop_31*tmp_qloop_34; + const real_t q_tmp_0_10 = tmp_qloop_12*tmp_qloop_36; + const real_t q_tmp_0_11 = tmp_qloop_24*tmp_qloop_36; + const real_t q_tmp_0_12 = tmp_qloop_25*tmp_qloop_36; + const real_t q_tmp_0_13 = tmp_qloop_26*tmp_qloop_36; + const real_t q_tmp_0_14 = tmp_qloop_36*tmp_qloop_7; + const real_t q_tmp_0_15 = tmp_qloop_10*tmp_qloop_36; + const real_t q_tmp_0_16 = tmp_qloop_36*tmp_qloop_9; + const real_t q_tmp_0_17 = tmp_qloop_27*tmp_qloop_36; + const real_t q_tmp_0_18 = tmp_qloop_29*tmp_qloop_36; + const real_t q_tmp_0_19 = tmp_qloop_31*tmp_qloop_36; + const real_t q_tmp_0_20 = tmp_qloop_12*tmp_qloop_38; + const real_t q_tmp_0_21 = tmp_qloop_24*tmp_qloop_38; + const real_t q_tmp_0_22 = tmp_qloop_25*tmp_qloop_38; + const real_t q_tmp_0_23 = tmp_qloop_26*tmp_qloop_38; + const real_t q_tmp_0_24 = tmp_qloop_38*tmp_qloop_7; + const real_t q_tmp_0_25 = tmp_qloop_10*tmp_qloop_38; + const real_t q_tmp_0_26 = tmp_qloop_38*tmp_qloop_9; + const real_t q_tmp_0_27 = tmp_qloop_27*tmp_qloop_38; + const real_t q_tmp_0_28 = tmp_qloop_29*tmp_qloop_38; + const real_t q_tmp_0_29 = tmp_qloop_31*tmp_qloop_38; + const real_t q_tmp_1_0 = tmp_qloop_12*tmp_qloop_40; + const real_t q_tmp_1_1 = tmp_qloop_24*tmp_qloop_40; + const real_t q_tmp_1_2 = tmp_qloop_25*tmp_qloop_40; + const real_t q_tmp_1_3 = tmp_qloop_26*tmp_qloop_40; + const real_t q_tmp_1_4 = tmp_qloop_43; + const real_t q_tmp_1_5 = tmp_qloop_30*tmp_qloop_42; + const real_t q_tmp_1_6 = tmp_qloop_30*tmp_qloop_45; + const real_t q_tmp_1_7 = tmp_qloop_27*tmp_qloop_40; + const real_t q_tmp_1_8 = tmp_qloop_29*tmp_qloop_40; + const real_t q_tmp_1_9 = tmp_qloop_31*tmp_qloop_40; + const real_t q_tmp_1_10 = tmp_qloop_12*tmp_qloop_46; + const real_t q_tmp_1_11 = tmp_qloop_24*tmp_qloop_46; + const real_t q_tmp_1_12 = tmp_qloop_25*tmp_qloop_46; + const real_t q_tmp_1_13 = tmp_qloop_26*tmp_qloop_46; + const real_t q_tmp_1_14 = tmp_qloop_48; + const real_t q_tmp_1_15 = tmp_qloop_30*tmp_qloop_47; + const real_t q_tmp_1_16 = tmp_qloop_30*tmp_qloop_49; + const real_t q_tmp_1_17 = tmp_qloop_27*tmp_qloop_46; + const real_t q_tmp_1_18 = tmp_qloop_29*tmp_qloop_46; + const real_t q_tmp_1_19 = tmp_qloop_31*tmp_qloop_46; + const real_t q_tmp_1_20 = tmp_qloop_12*tmp_qloop_50; + const real_t q_tmp_1_21 = tmp_qloop_24*tmp_qloop_50; + const real_t q_tmp_1_22 = tmp_qloop_25*tmp_qloop_50; + const real_t q_tmp_1_23 = tmp_qloop_26*tmp_qloop_50; + const real_t q_tmp_1_24 = tmp_qloop_52; + const real_t q_tmp_1_25 = tmp_qloop_30*tmp_qloop_51; + const real_t q_tmp_1_26 = tmp_qloop_30*tmp_qloop_53; + const real_t q_tmp_1_27 = tmp_qloop_27*tmp_qloop_50; + const real_t q_tmp_1_28 = tmp_qloop_29*tmp_qloop_50; + const real_t q_tmp_1_29 = tmp_qloop_31*tmp_qloop_50; + const real_t q_tmp_2_0 = tmp_qloop_12*tmp_qloop_45; + const real_t q_tmp_2_1 = tmp_qloop_24*tmp_qloop_45; + const real_t q_tmp_2_2 = tmp_qloop_25*tmp_qloop_45; + const real_t q_tmp_2_3 = tmp_qloop_26*tmp_qloop_45; + const real_t q_tmp_2_4 = tmp_qloop_28*tmp_qloop_42; + const real_t q_tmp_2_5 = tmp_qloop_43; + const real_t q_tmp_2_6 = tmp_qloop_54*tmp_qloop_55; + const real_t q_tmp_2_7 = tmp_qloop_27*tmp_qloop_45; + const real_t q_tmp_2_8 = tmp_qloop_29*tmp_qloop_45; + const real_t q_tmp_2_9 = tmp_qloop_31*tmp_qloop_45; + const real_t q_tmp_2_10 = tmp_qloop_12*tmp_qloop_49; + const real_t q_tmp_2_11 = tmp_qloop_24*tmp_qloop_49; + const real_t q_tmp_2_12 = tmp_qloop_25*tmp_qloop_49; + const real_t q_tmp_2_13 = tmp_qloop_26*tmp_qloop_49; + const real_t q_tmp_2_14 = tmp_qloop_28*tmp_qloop_47; + const real_t q_tmp_2_15 = tmp_qloop_48; + const real_t q_tmp_2_16 = tmp_qloop_35*tmp_qloop_56; + const real_t q_tmp_2_17 = tmp_qloop_27*tmp_qloop_49; + const real_t q_tmp_2_18 = tmp_qloop_29*tmp_qloop_49; + const real_t q_tmp_2_19 = tmp_qloop_31*tmp_qloop_49; + const real_t q_tmp_2_20 = tmp_qloop_12*tmp_qloop_53; + const real_t q_tmp_2_21 = tmp_qloop_24*tmp_qloop_53; + const real_t q_tmp_2_22 = tmp_qloop_25*tmp_qloop_53; + const real_t q_tmp_2_23 = tmp_qloop_26*tmp_qloop_53; + const real_t q_tmp_2_24 = tmp_qloop_28*tmp_qloop_51; + const real_t q_tmp_2_25 = tmp_qloop_52; + const real_t q_tmp_2_26 = tmp_qloop_37*tmp_qloop_56; + const real_t q_tmp_2_27 = tmp_qloop_27*tmp_qloop_53; + const real_t q_tmp_2_28 = tmp_qloop_29*tmp_qloop_53; + const real_t q_tmp_2_29 = tmp_qloop_31*tmp_qloop_53; + const real_t q_tmp_3_0 = tmp_qloop_12*tmp_qloop_42; + const real_t q_tmp_3_1 = tmp_qloop_24*tmp_qloop_42; + const real_t q_tmp_3_2 = tmp_qloop_25*tmp_qloop_42; + const real_t q_tmp_3_3 = tmp_qloop_26*tmp_qloop_42; + const real_t q_tmp_3_4 = tmp_qloop_57*tmp_qloop_6; + const real_t q_tmp_3_5 = tmp_qloop_57*tmp_qloop_8; + const real_t q_tmp_3_6 = tmp_qloop_43; + const real_t q_tmp_3_7 = tmp_qloop_27*tmp_qloop_42; + const real_t q_tmp_3_8 = tmp_qloop_29*tmp_qloop_42; + const real_t q_tmp_3_9 = tmp_qloop_31*tmp_qloop_42; + const real_t q_tmp_3_10 = tmp_qloop_12*tmp_qloop_47; + const real_t q_tmp_3_11 = tmp_qloop_24*tmp_qloop_47; + const real_t q_tmp_3_12 = tmp_qloop_25*tmp_qloop_47; + const real_t q_tmp_3_13 = tmp_qloop_26*tmp_qloop_47; + const real_t q_tmp_3_14 = tmp_qloop_59*tmp_qloop_6; + const real_t q_tmp_3_15 = tmp_qloop_59*tmp_qloop_8; + const real_t q_tmp_3_16 = tmp_qloop_48; + const real_t q_tmp_3_17 = tmp_qloop_27*tmp_qloop_47; + const real_t q_tmp_3_18 = tmp_qloop_29*tmp_qloop_47; + const real_t q_tmp_3_19 = tmp_qloop_31*tmp_qloop_47; + const real_t q_tmp_3_20 = tmp_qloop_12*tmp_qloop_51; + const real_t q_tmp_3_21 = tmp_qloop_24*tmp_qloop_51; + const real_t q_tmp_3_22 = tmp_qloop_25*tmp_qloop_51; + const real_t q_tmp_3_23 = tmp_qloop_26*tmp_qloop_51; + const real_t q_tmp_3_24 = tmp_qloop_6*tmp_qloop_60; + const real_t q_tmp_3_25 = tmp_qloop_60*tmp_qloop_8; + const real_t q_tmp_3_26 = tmp_qloop_52; + const real_t q_tmp_3_27 = tmp_qloop_27*tmp_qloop_51; + const real_t q_tmp_3_28 = tmp_qloop_29*tmp_qloop_51; + const real_t q_tmp_3_29 = tmp_qloop_31*tmp_qloop_51; + q_acc_0_0 = q_acc_0_0 + q_tmp_0_0; + q_acc_0_1 = q_acc_0_1 + q_tmp_0_1; + q_acc_0_2 = q_acc_0_2 + q_tmp_0_2; + q_acc_0_3 = q_acc_0_3 + q_tmp_0_3; + q_acc_0_4 = q_acc_0_4 + q_tmp_0_4; + q_acc_0_5 = q_acc_0_5 + q_tmp_0_5; + q_acc_0_6 = q_acc_0_6 + q_tmp_0_6; + q_acc_0_7 = q_acc_0_7 + q_tmp_0_7; + q_acc_0_8 = q_acc_0_8 + q_tmp_0_8; + q_acc_0_9 = q_acc_0_9 + q_tmp_0_9; + q_acc_0_10 = q_acc_0_10 + q_tmp_0_10; + q_acc_0_11 = q_acc_0_11 + q_tmp_0_11; + q_acc_0_12 = q_acc_0_12 + q_tmp_0_12; + q_acc_0_13 = q_acc_0_13 + q_tmp_0_13; + q_acc_0_14 = q_acc_0_14 + q_tmp_0_14; + q_acc_0_15 = q_acc_0_15 + q_tmp_0_15; + q_acc_0_16 = q_acc_0_16 + q_tmp_0_16; + q_acc_0_17 = q_acc_0_17 + q_tmp_0_17; + q_acc_0_18 = q_acc_0_18 + q_tmp_0_18; + q_acc_0_19 = q_acc_0_19 + q_tmp_0_19; + q_acc_0_20 = q_acc_0_20 + q_tmp_0_20; + q_acc_0_21 = q_acc_0_21 + q_tmp_0_21; + q_acc_0_22 = q_acc_0_22 + q_tmp_0_22; + q_acc_0_23 = q_acc_0_23 + q_tmp_0_23; + q_acc_0_24 = q_acc_0_24 + q_tmp_0_24; + q_acc_0_25 = q_acc_0_25 + q_tmp_0_25; + q_acc_0_26 = q_acc_0_26 + q_tmp_0_26; + q_acc_0_27 = q_acc_0_27 + q_tmp_0_27; + q_acc_0_28 = q_acc_0_28 + q_tmp_0_28; + q_acc_0_29 = q_acc_0_29 + q_tmp_0_29; + q_acc_1_0 = q_acc_1_0 + q_tmp_1_0; + q_acc_1_1 = q_acc_1_1 + q_tmp_1_1; + q_acc_1_2 = q_acc_1_2 + q_tmp_1_2; + q_acc_1_3 = q_acc_1_3 + q_tmp_1_3; + q_acc_1_4 = q_acc_1_4 + q_tmp_1_4; + q_acc_1_5 = q_acc_1_5 + q_tmp_1_5; + q_acc_1_6 = q_acc_1_6 + q_tmp_1_6; + q_acc_1_7 = q_acc_1_7 + q_tmp_1_7; + q_acc_1_8 = q_acc_1_8 + q_tmp_1_8; + q_acc_1_9 = q_acc_1_9 + q_tmp_1_9; + q_acc_1_10 = q_acc_1_10 + q_tmp_1_10; + q_acc_1_11 = q_acc_1_11 + q_tmp_1_11; + q_acc_1_12 = q_acc_1_12 + q_tmp_1_12; + q_acc_1_13 = q_acc_1_13 + q_tmp_1_13; + q_acc_1_14 = q_acc_1_14 + q_tmp_1_14; + q_acc_1_15 = q_acc_1_15 + q_tmp_1_15; + q_acc_1_16 = q_acc_1_16 + q_tmp_1_16; + q_acc_1_17 = q_acc_1_17 + q_tmp_1_17; + q_acc_1_18 = q_acc_1_18 + q_tmp_1_18; + q_acc_1_19 = q_acc_1_19 + q_tmp_1_19; + q_acc_1_20 = q_acc_1_20 + q_tmp_1_20; + q_acc_1_21 = q_acc_1_21 + q_tmp_1_21; + q_acc_1_22 = q_acc_1_22 + q_tmp_1_22; + q_acc_1_23 = q_acc_1_23 + q_tmp_1_23; + q_acc_1_24 = q_acc_1_24 + q_tmp_1_24; + q_acc_1_25 = q_acc_1_25 + q_tmp_1_25; + q_acc_1_26 = q_acc_1_26 + q_tmp_1_26; + q_acc_1_27 = q_acc_1_27 + q_tmp_1_27; + q_acc_1_28 = q_acc_1_28 + q_tmp_1_28; + q_acc_1_29 = q_acc_1_29 + q_tmp_1_29; + q_acc_2_0 = q_acc_2_0 + q_tmp_2_0; + q_acc_2_1 = q_acc_2_1 + q_tmp_2_1; + q_acc_2_2 = q_acc_2_2 + q_tmp_2_2; + q_acc_2_3 = q_acc_2_3 + q_tmp_2_3; + q_acc_2_4 = q_acc_2_4 + q_tmp_2_4; + q_acc_2_5 = q_acc_2_5 + q_tmp_2_5; + q_acc_2_6 = q_acc_2_6 + q_tmp_2_6; + q_acc_2_7 = q_acc_2_7 + q_tmp_2_7; + q_acc_2_8 = q_acc_2_8 + q_tmp_2_8; + q_acc_2_9 = q_acc_2_9 + q_tmp_2_9; + q_acc_2_10 = q_acc_2_10 + q_tmp_2_10; + q_acc_2_11 = q_acc_2_11 + q_tmp_2_11; + q_acc_2_12 = q_acc_2_12 + q_tmp_2_12; + q_acc_2_13 = q_acc_2_13 + q_tmp_2_13; + q_acc_2_14 = q_acc_2_14 + q_tmp_2_14; + q_acc_2_15 = q_acc_2_15 + q_tmp_2_15; + q_acc_2_16 = q_acc_2_16 + q_tmp_2_16; + q_acc_2_17 = q_acc_2_17 + q_tmp_2_17; + q_acc_2_18 = q_acc_2_18 + q_tmp_2_18; + q_acc_2_19 = q_acc_2_19 + q_tmp_2_19; + q_acc_2_20 = q_acc_2_20 + q_tmp_2_20; + q_acc_2_21 = q_acc_2_21 + q_tmp_2_21; + q_acc_2_22 = q_acc_2_22 + q_tmp_2_22; + q_acc_2_23 = q_acc_2_23 + q_tmp_2_23; + q_acc_2_24 = q_acc_2_24 + q_tmp_2_24; + q_acc_2_25 = q_acc_2_25 + q_tmp_2_25; + q_acc_2_26 = q_acc_2_26 + q_tmp_2_26; + q_acc_2_27 = q_acc_2_27 + q_tmp_2_27; + q_acc_2_28 = q_acc_2_28 + q_tmp_2_28; + q_acc_2_29 = q_acc_2_29 + q_tmp_2_29; + q_acc_3_0 = q_acc_3_0 + q_tmp_3_0; + q_acc_3_1 = q_acc_3_1 + q_tmp_3_1; + q_acc_3_2 = q_acc_3_2 + q_tmp_3_2; + q_acc_3_3 = q_acc_3_3 + q_tmp_3_3; + q_acc_3_4 = q_acc_3_4 + q_tmp_3_4; + q_acc_3_5 = q_acc_3_5 + q_tmp_3_5; + q_acc_3_6 = q_acc_3_6 + q_tmp_3_6; + q_acc_3_7 = q_acc_3_7 + q_tmp_3_7; + q_acc_3_8 = q_acc_3_8 + q_tmp_3_8; + q_acc_3_9 = q_acc_3_9 + q_tmp_3_9; + q_acc_3_10 = q_acc_3_10 + q_tmp_3_10; + q_acc_3_11 = q_acc_3_11 + q_tmp_3_11; + q_acc_3_12 = q_acc_3_12 + q_tmp_3_12; + q_acc_3_13 = q_acc_3_13 + q_tmp_3_13; + q_acc_3_14 = q_acc_3_14 + q_tmp_3_14; + q_acc_3_15 = q_acc_3_15 + q_tmp_3_15; + q_acc_3_16 = q_acc_3_16 + q_tmp_3_16; + q_acc_3_17 = q_acc_3_17 + q_tmp_3_17; + q_acc_3_18 = q_acc_3_18 + q_tmp_3_18; + q_acc_3_19 = q_acc_3_19 + q_tmp_3_19; + q_acc_3_20 = q_acc_3_20 + q_tmp_3_20; + q_acc_3_21 = q_acc_3_21 + q_tmp_3_21; + q_acc_3_22 = q_acc_3_22 + q_tmp_3_22; + q_acc_3_23 = q_acc_3_23 + q_tmp_3_23; + q_acc_3_24 = q_acc_3_24 + q_tmp_3_24; + q_acc_3_25 = q_acc_3_25 + q_tmp_3_25; + q_acc_3_26 = q_acc_3_26 + q_tmp_3_26; + q_acc_3_27 = q_acc_3_27 + q_tmp_3_27; + q_acc_3_28 = q_acc_3_28 + q_tmp_3_28; + q_acc_3_29 = q_acc_3_29 + q_tmp_3_29; + } + const real_t elMatVec_0 = q_acc_0_0*src_dof_0 + q_acc_0_1*src_dof_1 + q_acc_0_10*src_dof_10 + q_acc_0_11*src_dof_11 + q_acc_0_12*src_dof_12 + q_acc_0_13*src_dof_13 + q_acc_0_14*src_dof_14 + q_acc_0_15*src_dof_15 + q_acc_0_16*src_dof_16 + q_acc_0_17*src_dof_17 + q_acc_0_18*src_dof_18 + q_acc_0_19*src_dof_19 + q_acc_0_2*src_dof_2 + q_acc_0_20*src_dof_20 + q_acc_0_21*src_dof_21 + q_acc_0_22*src_dof_22 + q_acc_0_23*src_dof_23 + q_acc_0_24*src_dof_24 + q_acc_0_25*src_dof_25 + q_acc_0_26*src_dof_26 + q_acc_0_27*src_dof_27 + q_acc_0_28*src_dof_28 + q_acc_0_29*src_dof_29 + q_acc_0_3*src_dof_3 + q_acc_0_4*src_dof_4 + q_acc_0_5*src_dof_5 + q_acc_0_6*src_dof_6 + q_acc_0_7*src_dof_7 + q_acc_0_8*src_dof_8 + q_acc_0_9*src_dof_9; + const real_t elMatVec_1 = q_acc_1_0*src_dof_0 + q_acc_1_1*src_dof_1 + q_acc_1_10*src_dof_10 + q_acc_1_11*src_dof_11 + q_acc_1_12*src_dof_12 + q_acc_1_13*src_dof_13 + q_acc_1_14*src_dof_14 + q_acc_1_15*src_dof_15 + q_acc_1_16*src_dof_16 + q_acc_1_17*src_dof_17 + q_acc_1_18*src_dof_18 + q_acc_1_19*src_dof_19 + q_acc_1_2*src_dof_2 + q_acc_1_20*src_dof_20 + q_acc_1_21*src_dof_21 + q_acc_1_22*src_dof_22 + q_acc_1_23*src_dof_23 + q_acc_1_24*src_dof_24 + q_acc_1_25*src_dof_25 + q_acc_1_26*src_dof_26 + q_acc_1_27*src_dof_27 + q_acc_1_28*src_dof_28 + q_acc_1_29*src_dof_29 + q_acc_1_3*src_dof_3 + q_acc_1_4*src_dof_4 + q_acc_1_5*src_dof_5 + q_acc_1_6*src_dof_6 + q_acc_1_7*src_dof_7 + q_acc_1_8*src_dof_8 + q_acc_1_9*src_dof_9; + const real_t elMatVec_2 = q_acc_2_0*src_dof_0 + q_acc_2_1*src_dof_1 + q_acc_2_10*src_dof_10 + q_acc_2_11*src_dof_11 + q_acc_2_12*src_dof_12 + q_acc_2_13*src_dof_13 + q_acc_2_14*src_dof_14 + q_acc_2_15*src_dof_15 + q_acc_2_16*src_dof_16 + q_acc_2_17*src_dof_17 + q_acc_2_18*src_dof_18 + q_acc_2_19*src_dof_19 + q_acc_2_2*src_dof_2 + q_acc_2_20*src_dof_20 + q_acc_2_21*src_dof_21 + q_acc_2_22*src_dof_22 + q_acc_2_23*src_dof_23 + q_acc_2_24*src_dof_24 + q_acc_2_25*src_dof_25 + q_acc_2_26*src_dof_26 + q_acc_2_27*src_dof_27 + q_acc_2_28*src_dof_28 + q_acc_2_29*src_dof_29 + q_acc_2_3*src_dof_3 + q_acc_2_4*src_dof_4 + q_acc_2_5*src_dof_5 + q_acc_2_6*src_dof_6 + q_acc_2_7*src_dof_7 + q_acc_2_8*src_dof_8 + q_acc_2_9*src_dof_9; + const real_t elMatVec_3 = q_acc_3_0*src_dof_0 + q_acc_3_1*src_dof_1 + q_acc_3_10*src_dof_10 + q_acc_3_11*src_dof_11 + q_acc_3_12*src_dof_12 + q_acc_3_13*src_dof_13 + q_acc_3_14*src_dof_14 + q_acc_3_15*src_dof_15 + q_acc_3_16*src_dof_16 + q_acc_3_17*src_dof_17 + q_acc_3_18*src_dof_18 + q_acc_3_19*src_dof_19 + q_acc_3_2*src_dof_2 + q_acc_3_20*src_dof_20 + q_acc_3_21*src_dof_21 + q_acc_3_22*src_dof_22 + q_acc_3_23*src_dof_23 + q_acc_3_24*src_dof_24 + q_acc_3_25*src_dof_25 + q_acc_3_26*src_dof_26 + q_acc_3_27*src_dof_27 + q_acc_3_28*src_dof_28 + q_acc_3_29*src_dof_29 + q_acc_3_3*src_dof_3 + q_acc_3_4*src_dof_4 + q_acc_3_5*src_dof_5 + q_acc_3_6*src_dof_6 + q_acc_3_7*src_dof_7 + q_acc_3_8*src_dof_8 + q_acc_3_9*src_dof_9; + _data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))] = elMatVec_0 + _data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]; + _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))] = elMatVec_1 + _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]; + _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1] = elMatVec_2 + _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]; + _data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))] = elMatVec_3 + _data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]; + } + } + } + const real_t tmp_coords_jac_0_GREEN_UP = 1.0 / (micro_edges_per_macro_edge_float)*1.0; + const real_t tmp_coords_jac_1_GREEN_UP = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0); + const real_t tmp_coords_jac_2_GREEN_UP = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1); + const real_t tmp_coords_jac_3_GREEN_UP = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2); + const real_t tmp_coords_jac_4_GREEN_UP = tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0); + const real_t tmp_coords_jac_5_GREEN_UP = tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1); + const real_t tmp_coords_jac_6_GREEN_UP = tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2); + const real_t p_affine_const_0_0_GREEN_UP = tmp_coords_jac_1_GREEN_UP; + const real_t p_affine_const_0_1_GREEN_UP = tmp_coords_jac_2_GREEN_UP; + const real_t p_affine_const_0_2_GREEN_UP = tmp_coords_jac_3_GREEN_UP; + const real_t p_affine_const_1_0_GREEN_UP = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0); + const real_t p_affine_const_1_1_GREEN_UP = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1); + const real_t p_affine_const_1_2_GREEN_UP = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2); + const real_t p_affine_const_2_0_GREEN_UP = macro_vertex_coord_id_0comp0 + tmp_coords_jac_4_GREEN_UP; + const real_t p_affine_const_2_1_GREEN_UP = macro_vertex_coord_id_0comp1 + tmp_coords_jac_5_GREEN_UP; + const real_t p_affine_const_2_2_GREEN_UP = macro_vertex_coord_id_0comp2 + tmp_coords_jac_6_GREEN_UP; + const real_t p_affine_const_3_0_GREEN_UP = tmp_coords_jac_1_GREEN_UP + tmp_coords_jac_4_GREEN_UP; + const real_t p_affine_const_3_1_GREEN_UP = tmp_coords_jac_2_GREEN_UP + tmp_coords_jac_5_GREEN_UP; + const real_t p_affine_const_3_2_GREEN_UP = tmp_coords_jac_3_GREEN_UP + tmp_coords_jac_6_GREEN_UP; + const real_t jac_affine_0_0_GREEN_UP = -p_affine_const_0_0_GREEN_UP + p_affine_const_1_0_GREEN_UP; + const real_t jac_affine_0_1_GREEN_UP = -p_affine_const_0_0_GREEN_UP + p_affine_const_2_0_GREEN_UP; + const real_t jac_affine_0_2_GREEN_UP = -p_affine_const_0_0_GREEN_UP + p_affine_const_3_0_GREEN_UP; + const real_t jac_affine_1_0_GREEN_UP = -p_affine_const_0_1_GREEN_UP + p_affine_const_1_1_GREEN_UP; + const real_t jac_affine_1_1_GREEN_UP = -p_affine_const_0_1_GREEN_UP + p_affine_const_2_1_GREEN_UP; + const real_t tmp_coords_jac_11_GREEN_UP = jac_affine_0_2_GREEN_UP*jac_affine_1_1_GREEN_UP; + const real_t jac_affine_1_2_GREEN_UP = -p_affine_const_0_1_GREEN_UP + p_affine_const_3_1_GREEN_UP; + const real_t tmp_coords_jac_9_GREEN_UP = jac_affine_0_1_GREEN_UP*jac_affine_1_2_GREEN_UP; + const real_t jac_affine_2_0_GREEN_UP = -p_affine_const_0_2_GREEN_UP + p_affine_const_1_2_GREEN_UP; + const real_t jac_affine_2_1_GREEN_UP = -p_affine_const_0_2_GREEN_UP + p_affine_const_2_2_GREEN_UP; + const real_t tmp_coords_jac_8_GREEN_UP = jac_affine_1_2_GREEN_UP*jac_affine_2_1_GREEN_UP; + const real_t jac_affine_2_2_GREEN_UP = -p_affine_const_0_2_GREEN_UP + p_affine_const_3_2_GREEN_UP; + const real_t tmp_coords_jac_7_GREEN_UP = jac_affine_1_1_GREEN_UP*jac_affine_2_2_GREEN_UP; + const real_t tmp_coords_jac_10_GREEN_UP = jac_affine_0_1_GREEN_UP*jac_affine_2_2_GREEN_UP; + const real_t tmp_coords_jac_12_GREEN_UP = jac_affine_0_0_GREEN_UP*tmp_coords_jac_7_GREEN_UP - jac_affine_0_0_GREEN_UP*tmp_coords_jac_8_GREEN_UP + jac_affine_0_2_GREEN_UP*jac_affine_1_0_GREEN_UP*jac_affine_2_1_GREEN_UP - jac_affine_1_0_GREEN_UP*tmp_coords_jac_10_GREEN_UP - jac_affine_2_0_GREEN_UP*tmp_coords_jac_11_GREEN_UP + jac_affine_2_0_GREEN_UP*tmp_coords_jac_9_GREEN_UP; + const real_t tmp_coords_jac_13_GREEN_UP = 1.0 / (tmp_coords_jac_12_GREEN_UP); + const real_t jac_affine_inv_0_0_GREEN_UP = tmp_coords_jac_13_GREEN_UP*(tmp_coords_jac_7_GREEN_UP - tmp_coords_jac_8_GREEN_UP); + const real_t jac_affine_inv_0_1_GREEN_UP = tmp_coords_jac_13_GREEN_UP*(jac_affine_0_2_GREEN_UP*jac_affine_2_1_GREEN_UP - tmp_coords_jac_10_GREEN_UP); + const real_t jac_affine_inv_0_2_GREEN_UP = tmp_coords_jac_13_GREEN_UP*(-tmp_coords_jac_11_GREEN_UP + tmp_coords_jac_9_GREEN_UP); + const real_t jac_affine_inv_1_0_GREEN_UP = tmp_coords_jac_13_GREEN_UP*(-jac_affine_1_0_GREEN_UP*jac_affine_2_2_GREEN_UP + jac_affine_1_2_GREEN_UP*jac_affine_2_0_GREEN_UP); + const real_t jac_affine_inv_1_1_GREEN_UP = tmp_coords_jac_13_GREEN_UP*(jac_affine_0_0_GREEN_UP*jac_affine_2_2_GREEN_UP - jac_affine_0_2_GREEN_UP*jac_affine_2_0_GREEN_UP); + const real_t jac_affine_inv_1_2_GREEN_UP = tmp_coords_jac_13_GREEN_UP*(-jac_affine_0_0_GREEN_UP*jac_affine_1_2_GREEN_UP + jac_affine_0_2_GREEN_UP*jac_affine_1_0_GREEN_UP); + const real_t jac_affine_inv_2_0_GREEN_UP = tmp_coords_jac_13_GREEN_UP*(jac_affine_1_0_GREEN_UP*jac_affine_2_1_GREEN_UP - jac_affine_1_1_GREEN_UP*jac_affine_2_0_GREEN_UP); + const real_t jac_affine_inv_2_1_GREEN_UP = tmp_coords_jac_13_GREEN_UP*(-jac_affine_0_0_GREEN_UP*jac_affine_2_1_GREEN_UP + jac_affine_0_1_GREEN_UP*jac_affine_2_0_GREEN_UP); + const real_t jac_affine_inv_2_2_GREEN_UP = tmp_coords_jac_13_GREEN_UP*(jac_affine_0_0_GREEN_UP*jac_affine_1_1_GREEN_UP - jac_affine_0_1_GREEN_UP*jac_affine_1_0_GREEN_UP); + const real_t abs_det_jac_affine_GREEN_UP = abs(tmp_coords_jac_12_GREEN_UP); + { + /* CellType.GREEN_UP */ + for (int64_t ctr_2 = 0; ctr_2 < micro_edges_per_macro_edge; ctr_2 += 1) + for (int64_t ctr_1 = 0; ctr_1 < -ctr_2 + micro_edges_per_macro_edge; ctr_1 += 1) + { + for (int64_t ctr_0 = 0; ctr_0 < (int64_t)((-ctr_1 - ctr_2 + micro_edges_per_macro_edge - 1) / (4)) * (4); ctr_0 += 4) + { + + const int64_t phantom_ctr_0 = ctr_0; + real_t _data_float_loop_ctr_array_dim_0[4]; + _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0; + _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1; + _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2; + _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3; + real_t _data_float_loop_ctr_array_dim_1[4]; + _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1; + real_t _data_float_loop_ctr_array_dim_2[4]; + _data_float_loop_ctr_array_dim_2[0] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[1] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[2] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[3] = (real_t) ctr_2; + + const __m256d p_affine_0_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)); + const __m256d p_affine_0_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)); + const __m256d p_affine_0_2 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)); + const __m256d p_affine_1_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)); + const __m256d p_affine_1_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)); + const __m256d p_affine_1_2 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)); + const __m256d p_affine_2_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)); + const __m256d p_affine_2_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)); + const __m256d p_affine_2_2 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)); + const __m256d p_affine_3_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)); + const __m256d p_affine_3_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)); + const __m256d p_affine_3_2 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)); + const __m256d src_dof_0 = _mm256_loadu_pd(& _data_src_vertex_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]); + const __m256d src_dof_1 = _mm256_loadu_pd(& _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]); + const __m256d src_dof_2 = _mm256_loadu_pd(& _data_src_vertex_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]); + const __m256d src_dof_3 = _mm256_loadu_pd(& _data_src_vertex_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]); + const __m256d src_dof_4 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]); + const __m256d src_dof_5 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge - 1)*(micro_edges_per_macro_edge + 1)) / (6)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]); + const __m256d src_dof_6 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d src_dof_7 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]); + const __m256d src_dof_8 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 5*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d src_dof_9 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d src_dof_10 = _mm256_loadu_pd(& _data_src_vertex_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]); + const __m256d src_dof_11 = _mm256_loadu_pd(& _data_src_vertex_1[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]); + const __m256d src_dof_12 = _mm256_loadu_pd(& _data_src_vertex_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]); + const __m256d src_dof_13 = _mm256_loadu_pd(& _data_src_vertex_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]); + const __m256d src_dof_14 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]); + const __m256d src_dof_15 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge - 1)*(micro_edges_per_macro_edge + 1)) / (6)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]); + const __m256d src_dof_16 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d src_dof_17 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]); + const __m256d src_dof_18 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 5*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d src_dof_19 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d src_dof_20 = _mm256_loadu_pd(& _data_src_vertex_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]); + const __m256d src_dof_21 = _mm256_loadu_pd(& _data_src_vertex_2[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]); + const __m256d src_dof_22 = _mm256_loadu_pd(& _data_src_vertex_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]); + const __m256d src_dof_23 = _mm256_loadu_pd(& _data_src_vertex_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]); + const __m256d src_dof_24 = _mm256_loadu_pd(& _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]); + const __m256d src_dof_25 = _mm256_loadu_pd(& _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge - 1)*(micro_edges_per_macro_edge + 1)) / (6)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]); + const __m256d src_dof_26 = _mm256_loadu_pd(& _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d src_dof_27 = _mm256_loadu_pd(& _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]); + const __m256d src_dof_28 = _mm256_loadu_pd(& _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 5*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d src_dof_29 = _mm256_loadu_pd(& _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d rho_dof_0 = _mm256_loadu_pd(& _data_rhoVertex[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]); + const __m256d rho_dof_1 = _mm256_loadu_pd(& _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]); + const __m256d rho_dof_2 = _mm256_loadu_pd(& _data_rhoVertex[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]); + const __m256d rho_dof_3 = _mm256_loadu_pd(& _data_rhoVertex[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]); + const __m256d rho_dof_4 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]); + const __m256d rho_dof_5 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge - 1)*(micro_edges_per_macro_edge + 1)) / (6)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]); + const __m256d rho_dof_6 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d rho_dof_7 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]); + const __m256d rho_dof_8 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 5*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d rho_dof_9 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + __m256d q_acc_0_0 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_1 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_2 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_3 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_4 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_5 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_6 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_7 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_8 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_9 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_10 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_11 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_12 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_13 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_14 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_15 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_16 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_17 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_18 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_19 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_20 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_21 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_22 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_23 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_24 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_25 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_26 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_27 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_28 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_29 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_0 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_1 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_2 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_3 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_4 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_5 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_6 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_7 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_8 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_9 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_10 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_11 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_12 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_13 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_14 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_15 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_16 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_17 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_18 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_19 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_20 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_21 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_22 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_23 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_24 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_25 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_26 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_27 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_28 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_29 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_0 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_1 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_2 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_3 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_4 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_5 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_6 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_7 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_8 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_9 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_10 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_11 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_12 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_13 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_14 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_15 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_16 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_17 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_18 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_19 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_20 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_21 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_22 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_23 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_24 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_25 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_26 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_27 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_28 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_29 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_0 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_1 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_2 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_3 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_4 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_5 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_6 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_7 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_8 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_9 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_10 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_11 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_12 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_13 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_14 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_15 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_16 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_17 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_18 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_19 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_20 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_21 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_22 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_23 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_24 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_25 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_26 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_27 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_28 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_29 = _mm256_set_pd(0.0,0.0,0.0,0.0); + for (int64_t q = 0; q < 5; q += 1) + { + const __m256d tmp_qloop_0 = _mm256_mul_pd(_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])); + const __m256d tmp_qloop_1 = _mm256_mul_pd(tmp_qloop_0,_mm256_set_pd(2.0,2.0,2.0,2.0)); + const __m256d tmp_qloop_2 = _mm256_mul_pd(_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])); + const __m256d tmp_qloop_3 = _mm256_mul_pd(tmp_qloop_2,_mm256_set_pd(2.0,2.0,2.0,2.0)); + const __m256d tmp_qloop_4 = _mm256_mul_pd(_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q])); + const __m256d tmp_qloop_5 = _mm256_mul_pd(tmp_qloop_4,_mm256_set_pd(2.0,2.0,2.0,2.0)); + const __m256d tmp_qloop_6 = _mm256_mul_pd(_mm256_set_pd(4.0,4.0,4.0,4.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])); + const __m256d tmp_qloop_7 = _mm256_mul_pd(tmp_qloop_6,_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q])); + const __m256d tmp_qloop_8 = _mm256_mul_pd(_mm256_set_pd(4.0,4.0,4.0,4.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])); + const __m256d tmp_qloop_9 = _mm256_mul_pd(tmp_qloop_8,_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])); + const __m256d tmp_qloop_10 = _mm256_mul_pd(tmp_qloop_8,_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q])); + const __m256d tmp_qloop_11 = _mm256_add_pd(tmp_qloop_10,tmp_qloop_9); + const __m256d tmp_qloop_12 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),_mm256_mul_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),tmp_qloop_1),tmp_qloop_11),tmp_qloop_3),tmp_qloop_5),tmp_qloop_7); + const __m256d tmp_qloop_13 = _mm256_mul_pd(_mm256_set_pd(4.0,4.0,4.0,4.0),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q])); + const __m256d tmp_qloop_14 = _mm256_add_pd(tmp_qloop_13,tmp_qloop_6); + const __m256d tmp_qloop_15 = _mm256_mul_pd(_mm256_mul_pd(rho_dof_8,tmp_qloop_6),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)); + const __m256d tmp_qloop_16 = _mm256_mul_pd(rho_dof_0,_mm256_add_pd(_mm256_add_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),tmp_qloop_14),tmp_qloop_8)); + const __m256d tmp_qloop_17 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(rho_dof_7,tmp_qloop_13),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),tmp_qloop_16); + const __m256d tmp_qloop_18 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(rho_dof_1,_mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),tmp_qloop_8)),_mm256_mul_pd(rho_dof_9,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_14,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_set_pd(-8.0,-8.0,-8.0,-8.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]))),_mm256_set_pd(4.0,4.0,4.0,4.0)))),_mm256_mul_pd(rho_dof_5,tmp_qloop_13)),_mm256_mul_pd(rho_dof_6,tmp_qloop_6)),tmp_qloop_15),tmp_qloop_17); + const __m256d tmp_qloop_19 = _mm256_add_pd(_mm256_set_pd(-4.0,-4.0,-4.0,-4.0),tmp_qloop_8); + const __m256d tmp_qloop_20 = _mm256_mul_pd(_mm256_mul_pd(rho_dof_9,tmp_qloop_8),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)); + const __m256d tmp_qloop_21 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(rho_dof_2,_mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),tmp_qloop_6)),_mm256_mul_pd(rho_dof_8,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_13,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_19,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(_mm256_set_pd(-8.0,-8.0,-8.0,-8.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))))),_mm256_mul_pd(rho_dof_4,tmp_qloop_13)),_mm256_mul_pd(rho_dof_6,tmp_qloop_8)),tmp_qloop_17),tmp_qloop_20); + const __m256d tmp_qloop_22 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(rho_dof_3,_mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),tmp_qloop_13)),_mm256_mul_pd(rho_dof_7,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_19,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_6,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(_mm256_set_pd(-8.0,-8.0,-8.0,-8.0),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]))))),_mm256_mul_pd(rho_dof_4,tmp_qloop_6)),_mm256_mul_pd(rho_dof_5,tmp_qloop_8)),tmp_qloop_15),tmp_qloop_16),tmp_qloop_20); + const __m256d tmp_qloop_23 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_18,_mm256_set_pd(jac_affine_inv_0_0_GREEN_UP,jac_affine_inv_0_0_GREEN_UP,jac_affine_inv_0_0_GREEN_UP,jac_affine_inv_0_0_GREEN_UP)),_mm256_mul_pd(tmp_qloop_21,_mm256_set_pd(jac_affine_inv_1_0_GREEN_UP,jac_affine_inv_1_0_GREEN_UP,jac_affine_inv_1_0_GREEN_UP,jac_affine_inv_1_0_GREEN_UP))),_mm256_mul_pd(tmp_qloop_22,_mm256_set_pd(jac_affine_inv_2_0_GREEN_UP,jac_affine_inv_2_0_GREEN_UP,jac_affine_inv_2_0_GREEN_UP,jac_affine_inv_2_0_GREEN_UP))); + const __m256d tmp_qloop_24 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),tmp_qloop_1); + const __m256d tmp_qloop_25 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])),tmp_qloop_3); + const __m256d tmp_qloop_26 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q])),tmp_qloop_5); + const __m256d tmp_qloop_27 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_10,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_7,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(tmp_qloop_4,_mm256_set_pd(-4.0,-4.0,-4.0,-4.0))),tmp_qloop_13); + const __m256d tmp_qloop_28 = _mm256_mul_pd(tmp_qloop_2,_mm256_set_pd(4.0,4.0,4.0,4.0)); + const __m256d tmp_qloop_29 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_28,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_7,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(tmp_qloop_9,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_6); + const __m256d tmp_qloop_30 = _mm256_mul_pd(tmp_qloop_0,_mm256_set_pd(4.0,4.0,4.0,4.0)); + const __m256d tmp_qloop_31 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_11,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_30,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_8); + const __m256d tmp_qloop_32 = _mm256_mul_pd(_mm256_mul_pd(_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(rho_dof_0,tmp_qloop_12),_mm256_mul_pd(rho_dof_1,tmp_qloop_24)),_mm256_mul_pd(rho_dof_2,tmp_qloop_25)),_mm256_mul_pd(rho_dof_3,tmp_qloop_26)),_mm256_mul_pd(rho_dof_4,tmp_qloop_7)),_mm256_mul_pd(rho_dof_5,tmp_qloop_10)),_mm256_mul_pd(rho_dof_6,tmp_qloop_9)),_mm256_mul_pd(rho_dof_7,tmp_qloop_27)),_mm256_mul_pd(rho_dof_8,tmp_qloop_29)),_mm256_mul_pd(rho_dof_9,tmp_qloop_31))),_mm256_set_pd(_data_q_w[q],_data_q_w[q],_data_q_w[q],_data_q_w[q])),_mm256_set_pd(abs_det_jac_affine_GREEN_UP,abs_det_jac_affine_GREEN_UP,abs_det_jac_affine_GREEN_UP,abs_det_jac_affine_GREEN_UP)); + const __m256d tmp_qloop_33 = _mm256_mul_pd(tmp_qloop_32,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]))),_mm256_set_pd(1.0,1.0,1.0,1.0))); + const __m256d tmp_qloop_34 = _mm256_mul_pd(tmp_qloop_23,tmp_qloop_33); + const __m256d tmp_qloop_35 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_18,_mm256_set_pd(jac_affine_inv_0_1_GREEN_UP,jac_affine_inv_0_1_GREEN_UP,jac_affine_inv_0_1_GREEN_UP,jac_affine_inv_0_1_GREEN_UP)),_mm256_mul_pd(tmp_qloop_21,_mm256_set_pd(jac_affine_inv_1_1_GREEN_UP,jac_affine_inv_1_1_GREEN_UP,jac_affine_inv_1_1_GREEN_UP,jac_affine_inv_1_1_GREEN_UP))),_mm256_mul_pd(tmp_qloop_22,_mm256_set_pd(jac_affine_inv_2_1_GREEN_UP,jac_affine_inv_2_1_GREEN_UP,jac_affine_inv_2_1_GREEN_UP,jac_affine_inv_2_1_GREEN_UP))); + const __m256d tmp_qloop_36 = _mm256_mul_pd(tmp_qloop_33,tmp_qloop_35); + const __m256d tmp_qloop_37 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_18,_mm256_set_pd(jac_affine_inv_0_2_GREEN_UP,jac_affine_inv_0_2_GREEN_UP,jac_affine_inv_0_2_GREEN_UP,jac_affine_inv_0_2_GREEN_UP)),_mm256_mul_pd(tmp_qloop_21,_mm256_set_pd(jac_affine_inv_1_2_GREEN_UP,jac_affine_inv_1_2_GREEN_UP,jac_affine_inv_1_2_GREEN_UP,jac_affine_inv_1_2_GREEN_UP))),_mm256_mul_pd(tmp_qloop_22,_mm256_set_pd(jac_affine_inv_2_2_GREEN_UP,jac_affine_inv_2_2_GREEN_UP,jac_affine_inv_2_2_GREEN_UP,jac_affine_inv_2_2_GREEN_UP))); + const __m256d tmp_qloop_38 = _mm256_mul_pd(tmp_qloop_33,tmp_qloop_37); + const __m256d tmp_qloop_39 = _mm256_mul_pd(tmp_qloop_32,_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])); + const __m256d tmp_qloop_40 = _mm256_mul_pd(tmp_qloop_23,tmp_qloop_39); + const __m256d tmp_qloop_41 = _mm256_mul_pd(tmp_qloop_32,_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q])); + const __m256d tmp_qloop_42 = _mm256_mul_pd(tmp_qloop_23,tmp_qloop_41); + const __m256d tmp_qloop_43 = _mm256_mul_pd(tmp_qloop_42,tmp_qloop_9); + const __m256d tmp_qloop_44 = _mm256_mul_pd(tmp_qloop_32,_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])); + const __m256d tmp_qloop_45 = _mm256_mul_pd(tmp_qloop_23,tmp_qloop_44); + const __m256d tmp_qloop_46 = _mm256_mul_pd(tmp_qloop_35,tmp_qloop_39); + const __m256d tmp_qloop_47 = _mm256_mul_pd(tmp_qloop_35,tmp_qloop_41); + const __m256d tmp_qloop_48 = _mm256_mul_pd(tmp_qloop_47,tmp_qloop_9); + const __m256d tmp_qloop_49 = _mm256_mul_pd(tmp_qloop_35,tmp_qloop_44); + const __m256d tmp_qloop_50 = _mm256_mul_pd(tmp_qloop_37,tmp_qloop_39); + const __m256d tmp_qloop_51 = _mm256_mul_pd(tmp_qloop_37,tmp_qloop_41); + const __m256d tmp_qloop_52 = _mm256_mul_pd(tmp_qloop_51,tmp_qloop_9); + const __m256d tmp_qloop_53 = _mm256_mul_pd(tmp_qloop_37,tmp_qloop_44); + const __m256d tmp_qloop_54 = _mm256_mul_pd(tmp_qloop_23,tmp_qloop_32); + const __m256d tmp_qloop_55 = _mm256_mul_pd(tmp_qloop_2,tmp_qloop_8); + const __m256d tmp_qloop_56 = _mm256_mul_pd(tmp_qloop_32,tmp_qloop_55); + const __m256d tmp_qloop_57 = _mm256_mul_pd(tmp_qloop_4,tmp_qloop_54); + const __m256d tmp_qloop_58 = _mm256_mul_pd(tmp_qloop_32,tmp_qloop_4); + const __m256d tmp_qloop_59 = _mm256_mul_pd(tmp_qloop_35,tmp_qloop_58); + const __m256d tmp_qloop_60 = _mm256_mul_pd(tmp_qloop_37,tmp_qloop_58); + const __m256d q_tmp_0_0 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_34); + const __m256d q_tmp_0_1 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_34); + const __m256d q_tmp_0_2 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_34); + const __m256d q_tmp_0_3 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_34); + const __m256d q_tmp_0_4 = _mm256_mul_pd(tmp_qloop_34,tmp_qloop_7); + const __m256d q_tmp_0_5 = _mm256_mul_pd(tmp_qloop_10,tmp_qloop_34); + const __m256d q_tmp_0_6 = _mm256_mul_pd(tmp_qloop_34,tmp_qloop_9); + const __m256d q_tmp_0_7 = _mm256_mul_pd(tmp_qloop_27,tmp_qloop_34); + const __m256d q_tmp_0_8 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_34); + const __m256d q_tmp_0_9 = _mm256_mul_pd(tmp_qloop_31,tmp_qloop_34); + const __m256d q_tmp_0_10 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_36); + const __m256d q_tmp_0_11 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_36); + const __m256d q_tmp_0_12 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_36); + const __m256d q_tmp_0_13 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_36); + const __m256d q_tmp_0_14 = _mm256_mul_pd(tmp_qloop_36,tmp_qloop_7); + const __m256d q_tmp_0_15 = _mm256_mul_pd(tmp_qloop_10,tmp_qloop_36); + const __m256d q_tmp_0_16 = _mm256_mul_pd(tmp_qloop_36,tmp_qloop_9); + const __m256d q_tmp_0_17 = _mm256_mul_pd(tmp_qloop_27,tmp_qloop_36); + const __m256d q_tmp_0_18 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_36); + const __m256d q_tmp_0_19 = _mm256_mul_pd(tmp_qloop_31,tmp_qloop_36); + const __m256d q_tmp_0_20 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_38); + const __m256d q_tmp_0_21 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_38); + const __m256d q_tmp_0_22 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_38); + const __m256d q_tmp_0_23 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_38); + const __m256d q_tmp_0_24 = _mm256_mul_pd(tmp_qloop_38,tmp_qloop_7); + const __m256d q_tmp_0_25 = _mm256_mul_pd(tmp_qloop_10,tmp_qloop_38); + const __m256d q_tmp_0_26 = _mm256_mul_pd(tmp_qloop_38,tmp_qloop_9); + const __m256d q_tmp_0_27 = _mm256_mul_pd(tmp_qloop_27,tmp_qloop_38); + const __m256d q_tmp_0_28 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_38); + const __m256d q_tmp_0_29 = _mm256_mul_pd(tmp_qloop_31,tmp_qloop_38); + const __m256d q_tmp_1_0 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_40); + const __m256d q_tmp_1_1 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_40); + const __m256d q_tmp_1_2 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_40); + const __m256d q_tmp_1_3 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_40); + const __m256d q_tmp_1_4 = tmp_qloop_43; + const __m256d q_tmp_1_5 = _mm256_mul_pd(tmp_qloop_30,tmp_qloop_42); + const __m256d q_tmp_1_6 = _mm256_mul_pd(tmp_qloop_30,tmp_qloop_45); + const __m256d q_tmp_1_7 = _mm256_mul_pd(tmp_qloop_27,tmp_qloop_40); + const __m256d q_tmp_1_8 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_40); + const __m256d q_tmp_1_9 = _mm256_mul_pd(tmp_qloop_31,tmp_qloop_40); + const __m256d q_tmp_1_10 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_46); + const __m256d q_tmp_1_11 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_46); + const __m256d q_tmp_1_12 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_46); + const __m256d q_tmp_1_13 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_46); + const __m256d q_tmp_1_14 = tmp_qloop_48; + const __m256d q_tmp_1_15 = _mm256_mul_pd(tmp_qloop_30,tmp_qloop_47); + const __m256d q_tmp_1_16 = _mm256_mul_pd(tmp_qloop_30,tmp_qloop_49); + const __m256d q_tmp_1_17 = _mm256_mul_pd(tmp_qloop_27,tmp_qloop_46); + const __m256d q_tmp_1_18 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_46); + const __m256d q_tmp_1_19 = _mm256_mul_pd(tmp_qloop_31,tmp_qloop_46); + const __m256d q_tmp_1_20 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_50); + const __m256d q_tmp_1_21 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_50); + const __m256d q_tmp_1_22 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_50); + const __m256d q_tmp_1_23 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_50); + const __m256d q_tmp_1_24 = tmp_qloop_52; + const __m256d q_tmp_1_25 = _mm256_mul_pd(tmp_qloop_30,tmp_qloop_51); + const __m256d q_tmp_1_26 = _mm256_mul_pd(tmp_qloop_30,tmp_qloop_53); + const __m256d q_tmp_1_27 = _mm256_mul_pd(tmp_qloop_27,tmp_qloop_50); + const __m256d q_tmp_1_28 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_50); + const __m256d q_tmp_1_29 = _mm256_mul_pd(tmp_qloop_31,tmp_qloop_50); + const __m256d q_tmp_2_0 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_45); + const __m256d q_tmp_2_1 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_45); + const __m256d q_tmp_2_2 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_45); + const __m256d q_tmp_2_3 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_45); + const __m256d q_tmp_2_4 = _mm256_mul_pd(tmp_qloop_28,tmp_qloop_42); + const __m256d q_tmp_2_5 = tmp_qloop_43; + const __m256d q_tmp_2_6 = _mm256_mul_pd(tmp_qloop_54,tmp_qloop_55); + const __m256d q_tmp_2_7 = _mm256_mul_pd(tmp_qloop_27,tmp_qloop_45); + const __m256d q_tmp_2_8 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_45); + const __m256d q_tmp_2_9 = _mm256_mul_pd(tmp_qloop_31,tmp_qloop_45); + const __m256d q_tmp_2_10 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_49); + const __m256d q_tmp_2_11 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_49); + const __m256d q_tmp_2_12 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_49); + const __m256d q_tmp_2_13 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_49); + const __m256d q_tmp_2_14 = _mm256_mul_pd(tmp_qloop_28,tmp_qloop_47); + const __m256d q_tmp_2_15 = tmp_qloop_48; + const __m256d q_tmp_2_16 = _mm256_mul_pd(tmp_qloop_35,tmp_qloop_56); + const __m256d q_tmp_2_17 = _mm256_mul_pd(tmp_qloop_27,tmp_qloop_49); + const __m256d q_tmp_2_18 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_49); + const __m256d q_tmp_2_19 = _mm256_mul_pd(tmp_qloop_31,tmp_qloop_49); + const __m256d q_tmp_2_20 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_53); + const __m256d q_tmp_2_21 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_53); + const __m256d q_tmp_2_22 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_53); + const __m256d q_tmp_2_23 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_53); + const __m256d q_tmp_2_24 = _mm256_mul_pd(tmp_qloop_28,tmp_qloop_51); + const __m256d q_tmp_2_25 = tmp_qloop_52; + const __m256d q_tmp_2_26 = _mm256_mul_pd(tmp_qloop_37,tmp_qloop_56); + const __m256d q_tmp_2_27 = _mm256_mul_pd(tmp_qloop_27,tmp_qloop_53); + const __m256d q_tmp_2_28 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_53); + const __m256d q_tmp_2_29 = _mm256_mul_pd(tmp_qloop_31,tmp_qloop_53); + const __m256d q_tmp_3_0 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_42); + const __m256d q_tmp_3_1 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_42); + const __m256d q_tmp_3_2 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_42); + const __m256d q_tmp_3_3 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_42); + const __m256d q_tmp_3_4 = _mm256_mul_pd(tmp_qloop_57,tmp_qloop_6); + const __m256d q_tmp_3_5 = _mm256_mul_pd(tmp_qloop_57,tmp_qloop_8); + const __m256d q_tmp_3_6 = tmp_qloop_43; + const __m256d q_tmp_3_7 = _mm256_mul_pd(tmp_qloop_27,tmp_qloop_42); + const __m256d q_tmp_3_8 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_42); + const __m256d q_tmp_3_9 = _mm256_mul_pd(tmp_qloop_31,tmp_qloop_42); + const __m256d q_tmp_3_10 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_47); + const __m256d q_tmp_3_11 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_47); + const __m256d q_tmp_3_12 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_47); + const __m256d q_tmp_3_13 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_47); + const __m256d q_tmp_3_14 = _mm256_mul_pd(tmp_qloop_59,tmp_qloop_6); + const __m256d q_tmp_3_15 = _mm256_mul_pd(tmp_qloop_59,tmp_qloop_8); + const __m256d q_tmp_3_16 = tmp_qloop_48; + const __m256d q_tmp_3_17 = _mm256_mul_pd(tmp_qloop_27,tmp_qloop_47); + const __m256d q_tmp_3_18 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_47); + const __m256d q_tmp_3_19 = _mm256_mul_pd(tmp_qloop_31,tmp_qloop_47); + const __m256d q_tmp_3_20 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_51); + const __m256d q_tmp_3_21 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_51); + const __m256d q_tmp_3_22 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_51); + const __m256d q_tmp_3_23 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_51); + const __m256d q_tmp_3_24 = _mm256_mul_pd(tmp_qloop_6,tmp_qloop_60); + const __m256d q_tmp_3_25 = _mm256_mul_pd(tmp_qloop_60,tmp_qloop_8); + const __m256d q_tmp_3_26 = tmp_qloop_52; + const __m256d q_tmp_3_27 = _mm256_mul_pd(tmp_qloop_27,tmp_qloop_51); + const __m256d q_tmp_3_28 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_51); + const __m256d q_tmp_3_29 = _mm256_mul_pd(tmp_qloop_31,tmp_qloop_51); + q_acc_0_0 = _mm256_add_pd(q_acc_0_0,q_tmp_0_0); + q_acc_0_1 = _mm256_add_pd(q_acc_0_1,q_tmp_0_1); + q_acc_0_2 = _mm256_add_pd(q_acc_0_2,q_tmp_0_2); + q_acc_0_3 = _mm256_add_pd(q_acc_0_3,q_tmp_0_3); + q_acc_0_4 = _mm256_add_pd(q_acc_0_4,q_tmp_0_4); + q_acc_0_5 = _mm256_add_pd(q_acc_0_5,q_tmp_0_5); + q_acc_0_6 = _mm256_add_pd(q_acc_0_6,q_tmp_0_6); + q_acc_0_7 = _mm256_add_pd(q_acc_0_7,q_tmp_0_7); + q_acc_0_8 = _mm256_add_pd(q_acc_0_8,q_tmp_0_8); + q_acc_0_9 = _mm256_add_pd(q_acc_0_9,q_tmp_0_9); + q_acc_0_10 = _mm256_add_pd(q_acc_0_10,q_tmp_0_10); + q_acc_0_11 = _mm256_add_pd(q_acc_0_11,q_tmp_0_11); + q_acc_0_12 = _mm256_add_pd(q_acc_0_12,q_tmp_0_12); + q_acc_0_13 = _mm256_add_pd(q_acc_0_13,q_tmp_0_13); + q_acc_0_14 = _mm256_add_pd(q_acc_0_14,q_tmp_0_14); + q_acc_0_15 = _mm256_add_pd(q_acc_0_15,q_tmp_0_15); + q_acc_0_16 = _mm256_add_pd(q_acc_0_16,q_tmp_0_16); + q_acc_0_17 = _mm256_add_pd(q_acc_0_17,q_tmp_0_17); + q_acc_0_18 = _mm256_add_pd(q_acc_0_18,q_tmp_0_18); + q_acc_0_19 = _mm256_add_pd(q_acc_0_19,q_tmp_0_19); + q_acc_0_20 = _mm256_add_pd(q_acc_0_20,q_tmp_0_20); + q_acc_0_21 = _mm256_add_pd(q_acc_0_21,q_tmp_0_21); + q_acc_0_22 = _mm256_add_pd(q_acc_0_22,q_tmp_0_22); + q_acc_0_23 = _mm256_add_pd(q_acc_0_23,q_tmp_0_23); + q_acc_0_24 = _mm256_add_pd(q_acc_0_24,q_tmp_0_24); + q_acc_0_25 = _mm256_add_pd(q_acc_0_25,q_tmp_0_25); + q_acc_0_26 = _mm256_add_pd(q_acc_0_26,q_tmp_0_26); + q_acc_0_27 = _mm256_add_pd(q_acc_0_27,q_tmp_0_27); + q_acc_0_28 = _mm256_add_pd(q_acc_0_28,q_tmp_0_28); + q_acc_0_29 = _mm256_add_pd(q_acc_0_29,q_tmp_0_29); + q_acc_1_0 = _mm256_add_pd(q_acc_1_0,q_tmp_1_0); + q_acc_1_1 = _mm256_add_pd(q_acc_1_1,q_tmp_1_1); + q_acc_1_2 = _mm256_add_pd(q_acc_1_2,q_tmp_1_2); + q_acc_1_3 = _mm256_add_pd(q_acc_1_3,q_tmp_1_3); + q_acc_1_4 = _mm256_add_pd(q_acc_1_4,q_tmp_1_4); + q_acc_1_5 = _mm256_add_pd(q_acc_1_5,q_tmp_1_5); + q_acc_1_6 = _mm256_add_pd(q_acc_1_6,q_tmp_1_6); + q_acc_1_7 = _mm256_add_pd(q_acc_1_7,q_tmp_1_7); + q_acc_1_8 = _mm256_add_pd(q_acc_1_8,q_tmp_1_8); + q_acc_1_9 = _mm256_add_pd(q_acc_1_9,q_tmp_1_9); + q_acc_1_10 = _mm256_add_pd(q_acc_1_10,q_tmp_1_10); + q_acc_1_11 = _mm256_add_pd(q_acc_1_11,q_tmp_1_11); + q_acc_1_12 = _mm256_add_pd(q_acc_1_12,q_tmp_1_12); + q_acc_1_13 = _mm256_add_pd(q_acc_1_13,q_tmp_1_13); + q_acc_1_14 = _mm256_add_pd(q_acc_1_14,q_tmp_1_14); + q_acc_1_15 = _mm256_add_pd(q_acc_1_15,q_tmp_1_15); + q_acc_1_16 = _mm256_add_pd(q_acc_1_16,q_tmp_1_16); + q_acc_1_17 = _mm256_add_pd(q_acc_1_17,q_tmp_1_17); + q_acc_1_18 = _mm256_add_pd(q_acc_1_18,q_tmp_1_18); + q_acc_1_19 = _mm256_add_pd(q_acc_1_19,q_tmp_1_19); + q_acc_1_20 = _mm256_add_pd(q_acc_1_20,q_tmp_1_20); + q_acc_1_21 = _mm256_add_pd(q_acc_1_21,q_tmp_1_21); + q_acc_1_22 = _mm256_add_pd(q_acc_1_22,q_tmp_1_22); + q_acc_1_23 = _mm256_add_pd(q_acc_1_23,q_tmp_1_23); + q_acc_1_24 = _mm256_add_pd(q_acc_1_24,q_tmp_1_24); + q_acc_1_25 = _mm256_add_pd(q_acc_1_25,q_tmp_1_25); + q_acc_1_26 = _mm256_add_pd(q_acc_1_26,q_tmp_1_26); + q_acc_1_27 = _mm256_add_pd(q_acc_1_27,q_tmp_1_27); + q_acc_1_28 = _mm256_add_pd(q_acc_1_28,q_tmp_1_28); + q_acc_1_29 = _mm256_add_pd(q_acc_1_29,q_tmp_1_29); + q_acc_2_0 = _mm256_add_pd(q_acc_2_0,q_tmp_2_0); + q_acc_2_1 = _mm256_add_pd(q_acc_2_1,q_tmp_2_1); + q_acc_2_2 = _mm256_add_pd(q_acc_2_2,q_tmp_2_2); + q_acc_2_3 = _mm256_add_pd(q_acc_2_3,q_tmp_2_3); + q_acc_2_4 = _mm256_add_pd(q_acc_2_4,q_tmp_2_4); + q_acc_2_5 = _mm256_add_pd(q_acc_2_5,q_tmp_2_5); + q_acc_2_6 = _mm256_add_pd(q_acc_2_6,q_tmp_2_6); + q_acc_2_7 = _mm256_add_pd(q_acc_2_7,q_tmp_2_7); + q_acc_2_8 = _mm256_add_pd(q_acc_2_8,q_tmp_2_8); + q_acc_2_9 = _mm256_add_pd(q_acc_2_9,q_tmp_2_9); + q_acc_2_10 = _mm256_add_pd(q_acc_2_10,q_tmp_2_10); + q_acc_2_11 = _mm256_add_pd(q_acc_2_11,q_tmp_2_11); + q_acc_2_12 = _mm256_add_pd(q_acc_2_12,q_tmp_2_12); + q_acc_2_13 = _mm256_add_pd(q_acc_2_13,q_tmp_2_13); + q_acc_2_14 = _mm256_add_pd(q_acc_2_14,q_tmp_2_14); + q_acc_2_15 = _mm256_add_pd(q_acc_2_15,q_tmp_2_15); + q_acc_2_16 = _mm256_add_pd(q_acc_2_16,q_tmp_2_16); + q_acc_2_17 = _mm256_add_pd(q_acc_2_17,q_tmp_2_17); + q_acc_2_18 = _mm256_add_pd(q_acc_2_18,q_tmp_2_18); + q_acc_2_19 = _mm256_add_pd(q_acc_2_19,q_tmp_2_19); + q_acc_2_20 = _mm256_add_pd(q_acc_2_20,q_tmp_2_20); + q_acc_2_21 = _mm256_add_pd(q_acc_2_21,q_tmp_2_21); + q_acc_2_22 = _mm256_add_pd(q_acc_2_22,q_tmp_2_22); + q_acc_2_23 = _mm256_add_pd(q_acc_2_23,q_tmp_2_23); + q_acc_2_24 = _mm256_add_pd(q_acc_2_24,q_tmp_2_24); + q_acc_2_25 = _mm256_add_pd(q_acc_2_25,q_tmp_2_25); + q_acc_2_26 = _mm256_add_pd(q_acc_2_26,q_tmp_2_26); + q_acc_2_27 = _mm256_add_pd(q_acc_2_27,q_tmp_2_27); + q_acc_2_28 = _mm256_add_pd(q_acc_2_28,q_tmp_2_28); + q_acc_2_29 = _mm256_add_pd(q_acc_2_29,q_tmp_2_29); + q_acc_3_0 = _mm256_add_pd(q_acc_3_0,q_tmp_3_0); + q_acc_3_1 = _mm256_add_pd(q_acc_3_1,q_tmp_3_1); + q_acc_3_2 = _mm256_add_pd(q_acc_3_2,q_tmp_3_2); + q_acc_3_3 = _mm256_add_pd(q_acc_3_3,q_tmp_3_3); + q_acc_3_4 = _mm256_add_pd(q_acc_3_4,q_tmp_3_4); + q_acc_3_5 = _mm256_add_pd(q_acc_3_5,q_tmp_3_5); + q_acc_3_6 = _mm256_add_pd(q_acc_3_6,q_tmp_3_6); + q_acc_3_7 = _mm256_add_pd(q_acc_3_7,q_tmp_3_7); + q_acc_3_8 = _mm256_add_pd(q_acc_3_8,q_tmp_3_8); + q_acc_3_9 = _mm256_add_pd(q_acc_3_9,q_tmp_3_9); + q_acc_3_10 = _mm256_add_pd(q_acc_3_10,q_tmp_3_10); + q_acc_3_11 = _mm256_add_pd(q_acc_3_11,q_tmp_3_11); + q_acc_3_12 = _mm256_add_pd(q_acc_3_12,q_tmp_3_12); + q_acc_3_13 = _mm256_add_pd(q_acc_3_13,q_tmp_3_13); + q_acc_3_14 = _mm256_add_pd(q_acc_3_14,q_tmp_3_14); + q_acc_3_15 = _mm256_add_pd(q_acc_3_15,q_tmp_3_15); + q_acc_3_16 = _mm256_add_pd(q_acc_3_16,q_tmp_3_16); + q_acc_3_17 = _mm256_add_pd(q_acc_3_17,q_tmp_3_17); + q_acc_3_18 = _mm256_add_pd(q_acc_3_18,q_tmp_3_18); + q_acc_3_19 = _mm256_add_pd(q_acc_3_19,q_tmp_3_19); + q_acc_3_20 = _mm256_add_pd(q_acc_3_20,q_tmp_3_20); + q_acc_3_21 = _mm256_add_pd(q_acc_3_21,q_tmp_3_21); + q_acc_3_22 = _mm256_add_pd(q_acc_3_22,q_tmp_3_22); + q_acc_3_23 = _mm256_add_pd(q_acc_3_23,q_tmp_3_23); + q_acc_3_24 = _mm256_add_pd(q_acc_3_24,q_tmp_3_24); + q_acc_3_25 = _mm256_add_pd(q_acc_3_25,q_tmp_3_25); + q_acc_3_26 = _mm256_add_pd(q_acc_3_26,q_tmp_3_26); + q_acc_3_27 = _mm256_add_pd(q_acc_3_27,q_tmp_3_27); + q_acc_3_28 = _mm256_add_pd(q_acc_3_28,q_tmp_3_28); + q_acc_3_29 = _mm256_add_pd(q_acc_3_29,q_tmp_3_29); + } + const __m256d elMatVec_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_0_0,src_dof_0),_mm256_mul_pd(q_acc_0_1,src_dof_1)),_mm256_mul_pd(q_acc_0_10,src_dof_10)),_mm256_mul_pd(q_acc_0_11,src_dof_11)),_mm256_mul_pd(q_acc_0_12,src_dof_12)),_mm256_mul_pd(q_acc_0_13,src_dof_13)),_mm256_mul_pd(q_acc_0_14,src_dof_14)),_mm256_mul_pd(q_acc_0_15,src_dof_15)),_mm256_mul_pd(q_acc_0_16,src_dof_16)),_mm256_mul_pd(q_acc_0_17,src_dof_17)),_mm256_mul_pd(q_acc_0_18,src_dof_18)),_mm256_mul_pd(q_acc_0_19,src_dof_19)),_mm256_mul_pd(q_acc_0_2,src_dof_2)),_mm256_mul_pd(q_acc_0_20,src_dof_20)),_mm256_mul_pd(q_acc_0_21,src_dof_21)),_mm256_mul_pd(q_acc_0_22,src_dof_22)),_mm256_mul_pd(q_acc_0_23,src_dof_23)),_mm256_mul_pd(q_acc_0_24,src_dof_24)),_mm256_mul_pd(q_acc_0_25,src_dof_25)),_mm256_mul_pd(q_acc_0_26,src_dof_26)),_mm256_mul_pd(q_acc_0_27,src_dof_27)),_mm256_mul_pd(q_acc_0_28,src_dof_28)),_mm256_mul_pd(q_acc_0_29,src_dof_29)),_mm256_mul_pd(q_acc_0_3,src_dof_3)),_mm256_mul_pd(q_acc_0_4,src_dof_4)),_mm256_mul_pd(q_acc_0_5,src_dof_5)),_mm256_mul_pd(q_acc_0_6,src_dof_6)),_mm256_mul_pd(q_acc_0_7,src_dof_7)),_mm256_mul_pd(q_acc_0_8,src_dof_8)),_mm256_mul_pd(q_acc_0_9,src_dof_9)); + const __m256d elMatVec_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_1_0,src_dof_0),_mm256_mul_pd(q_acc_1_1,src_dof_1)),_mm256_mul_pd(q_acc_1_10,src_dof_10)),_mm256_mul_pd(q_acc_1_11,src_dof_11)),_mm256_mul_pd(q_acc_1_12,src_dof_12)),_mm256_mul_pd(q_acc_1_13,src_dof_13)),_mm256_mul_pd(q_acc_1_14,src_dof_14)),_mm256_mul_pd(q_acc_1_15,src_dof_15)),_mm256_mul_pd(q_acc_1_16,src_dof_16)),_mm256_mul_pd(q_acc_1_17,src_dof_17)),_mm256_mul_pd(q_acc_1_18,src_dof_18)),_mm256_mul_pd(q_acc_1_19,src_dof_19)),_mm256_mul_pd(q_acc_1_2,src_dof_2)),_mm256_mul_pd(q_acc_1_20,src_dof_20)),_mm256_mul_pd(q_acc_1_21,src_dof_21)),_mm256_mul_pd(q_acc_1_22,src_dof_22)),_mm256_mul_pd(q_acc_1_23,src_dof_23)),_mm256_mul_pd(q_acc_1_24,src_dof_24)),_mm256_mul_pd(q_acc_1_25,src_dof_25)),_mm256_mul_pd(q_acc_1_26,src_dof_26)),_mm256_mul_pd(q_acc_1_27,src_dof_27)),_mm256_mul_pd(q_acc_1_28,src_dof_28)),_mm256_mul_pd(q_acc_1_29,src_dof_29)),_mm256_mul_pd(q_acc_1_3,src_dof_3)),_mm256_mul_pd(q_acc_1_4,src_dof_4)),_mm256_mul_pd(q_acc_1_5,src_dof_5)),_mm256_mul_pd(q_acc_1_6,src_dof_6)),_mm256_mul_pd(q_acc_1_7,src_dof_7)),_mm256_mul_pd(q_acc_1_8,src_dof_8)),_mm256_mul_pd(q_acc_1_9,src_dof_9)); + const __m256d elMatVec_2 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_2_0,src_dof_0),_mm256_mul_pd(q_acc_2_1,src_dof_1)),_mm256_mul_pd(q_acc_2_10,src_dof_10)),_mm256_mul_pd(q_acc_2_11,src_dof_11)),_mm256_mul_pd(q_acc_2_12,src_dof_12)),_mm256_mul_pd(q_acc_2_13,src_dof_13)),_mm256_mul_pd(q_acc_2_14,src_dof_14)),_mm256_mul_pd(q_acc_2_15,src_dof_15)),_mm256_mul_pd(q_acc_2_16,src_dof_16)),_mm256_mul_pd(q_acc_2_17,src_dof_17)),_mm256_mul_pd(q_acc_2_18,src_dof_18)),_mm256_mul_pd(q_acc_2_19,src_dof_19)),_mm256_mul_pd(q_acc_2_2,src_dof_2)),_mm256_mul_pd(q_acc_2_20,src_dof_20)),_mm256_mul_pd(q_acc_2_21,src_dof_21)),_mm256_mul_pd(q_acc_2_22,src_dof_22)),_mm256_mul_pd(q_acc_2_23,src_dof_23)),_mm256_mul_pd(q_acc_2_24,src_dof_24)),_mm256_mul_pd(q_acc_2_25,src_dof_25)),_mm256_mul_pd(q_acc_2_26,src_dof_26)),_mm256_mul_pd(q_acc_2_27,src_dof_27)),_mm256_mul_pd(q_acc_2_28,src_dof_28)),_mm256_mul_pd(q_acc_2_29,src_dof_29)),_mm256_mul_pd(q_acc_2_3,src_dof_3)),_mm256_mul_pd(q_acc_2_4,src_dof_4)),_mm256_mul_pd(q_acc_2_5,src_dof_5)),_mm256_mul_pd(q_acc_2_6,src_dof_6)),_mm256_mul_pd(q_acc_2_7,src_dof_7)),_mm256_mul_pd(q_acc_2_8,src_dof_8)),_mm256_mul_pd(q_acc_2_9,src_dof_9)); + const __m256d elMatVec_3 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_3_0,src_dof_0),_mm256_mul_pd(q_acc_3_1,src_dof_1)),_mm256_mul_pd(q_acc_3_10,src_dof_10)),_mm256_mul_pd(q_acc_3_11,src_dof_11)),_mm256_mul_pd(q_acc_3_12,src_dof_12)),_mm256_mul_pd(q_acc_3_13,src_dof_13)),_mm256_mul_pd(q_acc_3_14,src_dof_14)),_mm256_mul_pd(q_acc_3_15,src_dof_15)),_mm256_mul_pd(q_acc_3_16,src_dof_16)),_mm256_mul_pd(q_acc_3_17,src_dof_17)),_mm256_mul_pd(q_acc_3_18,src_dof_18)),_mm256_mul_pd(q_acc_3_19,src_dof_19)),_mm256_mul_pd(q_acc_3_2,src_dof_2)),_mm256_mul_pd(q_acc_3_20,src_dof_20)),_mm256_mul_pd(q_acc_3_21,src_dof_21)),_mm256_mul_pd(q_acc_3_22,src_dof_22)),_mm256_mul_pd(q_acc_3_23,src_dof_23)),_mm256_mul_pd(q_acc_3_24,src_dof_24)),_mm256_mul_pd(q_acc_3_25,src_dof_25)),_mm256_mul_pd(q_acc_3_26,src_dof_26)),_mm256_mul_pd(q_acc_3_27,src_dof_27)),_mm256_mul_pd(q_acc_3_28,src_dof_28)),_mm256_mul_pd(q_acc_3_29,src_dof_29)),_mm256_mul_pd(q_acc_3_3,src_dof_3)),_mm256_mul_pd(q_acc_3_4,src_dof_4)),_mm256_mul_pd(q_acc_3_5,src_dof_5)),_mm256_mul_pd(q_acc_3_6,src_dof_6)),_mm256_mul_pd(q_acc_3_7,src_dof_7)),_mm256_mul_pd(q_acc_3_8,src_dof_8)),_mm256_mul_pd(q_acc_3_9,src_dof_9)); + _mm256_storeu_pd(&_data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1],_mm256_add_pd(elMatVec_0,_mm256_loadu_pd(& _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]))); + _mm256_storeu_pd(&_data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))],_mm256_add_pd(elMatVec_1,_mm256_loadu_pd(& _data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]))); + _mm256_storeu_pd(&_data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))],_mm256_add_pd(elMatVec_2,_mm256_loadu_pd(& _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]))); + _mm256_storeu_pd(&_data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1],_mm256_add_pd(elMatVec_3,_mm256_loadu_pd(& _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]))); + } + for (int64_t ctr_0 = (int64_t)((-ctr_1 - ctr_2 + micro_edges_per_macro_edge - 1) / (4)) * (4); ctr_0 < -ctr_1 - ctr_2 + micro_edges_per_macro_edge - 1; ctr_0 += 1) + { + + const int64_t phantom_ctr_0 = ctr_0; + real_t _data_float_loop_ctr_array_dim_0[4]; + _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0; + _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1; + _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2; + _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3; + real_t _data_float_loop_ctr_array_dim_1[4]; + _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1; + real_t _data_float_loop_ctr_array_dim_2[4]; + _data_float_loop_ctr_array_dim_2[0] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[1] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[2] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[3] = (real_t) ctr_2; + + const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_0_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_2_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_3_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_3_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_3_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t src_dof_0 = _data_src_vertex_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]; + const real_t src_dof_1 = _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]; + const real_t src_dof_2 = _data_src_vertex_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]; + const real_t src_dof_3 = _data_src_vertex_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]; + const real_t src_dof_4 = _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]; + const real_t src_dof_5 = _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge - 1)*(micro_edges_per_macro_edge + 1)) / (6)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]; + const real_t src_dof_6 = _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t src_dof_7 = _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]; + const real_t src_dof_8 = _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 5*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t src_dof_9 = _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t src_dof_10 = _data_src_vertex_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]; + const real_t src_dof_11 = _data_src_vertex_1[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]; + const real_t src_dof_12 = _data_src_vertex_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]; + const real_t src_dof_13 = _data_src_vertex_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]; + const real_t src_dof_14 = _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]; + const real_t src_dof_15 = _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge - 1)*(micro_edges_per_macro_edge + 1)) / (6)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]; + const real_t src_dof_16 = _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t src_dof_17 = _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]; + const real_t src_dof_18 = _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 5*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t src_dof_19 = _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t src_dof_20 = _data_src_vertex_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]; + const real_t src_dof_21 = _data_src_vertex_2[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]; + const real_t src_dof_22 = _data_src_vertex_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]; + const real_t src_dof_23 = _data_src_vertex_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]; + const real_t src_dof_24 = _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]; + const real_t src_dof_25 = _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge - 1)*(micro_edges_per_macro_edge + 1)) / (6)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]; + const real_t src_dof_26 = _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t src_dof_27 = _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]; + const real_t src_dof_28 = _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 5*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t src_dof_29 = _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t rho_dof_0 = _data_rhoVertex[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]; + const real_t rho_dof_1 = _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]; + const real_t rho_dof_2 = _data_rhoVertex[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]; + const real_t rho_dof_3 = _data_rhoVertex[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]; + const real_t rho_dof_4 = _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]; + const real_t rho_dof_5 = _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge - 1)*(micro_edges_per_macro_edge + 1)) / (6)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]; + const real_t rho_dof_6 = _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t rho_dof_7 = _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]; + const real_t rho_dof_8 = _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 5*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t rho_dof_9 = _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + real_t q_acc_0_0 = 0.0; + real_t q_acc_0_1 = 0.0; + real_t q_acc_0_2 = 0.0; + real_t q_acc_0_3 = 0.0; + real_t q_acc_0_4 = 0.0; + real_t q_acc_0_5 = 0.0; + real_t q_acc_0_6 = 0.0; + real_t q_acc_0_7 = 0.0; + real_t q_acc_0_8 = 0.0; + real_t q_acc_0_9 = 0.0; + real_t q_acc_0_10 = 0.0; + real_t q_acc_0_11 = 0.0; + real_t q_acc_0_12 = 0.0; + real_t q_acc_0_13 = 0.0; + real_t q_acc_0_14 = 0.0; + real_t q_acc_0_15 = 0.0; + real_t q_acc_0_16 = 0.0; + real_t q_acc_0_17 = 0.0; + real_t q_acc_0_18 = 0.0; + real_t q_acc_0_19 = 0.0; + real_t q_acc_0_20 = 0.0; + real_t q_acc_0_21 = 0.0; + real_t q_acc_0_22 = 0.0; + real_t q_acc_0_23 = 0.0; + real_t q_acc_0_24 = 0.0; + real_t q_acc_0_25 = 0.0; + real_t q_acc_0_26 = 0.0; + real_t q_acc_0_27 = 0.0; + real_t q_acc_0_28 = 0.0; + real_t q_acc_0_29 = 0.0; + real_t q_acc_1_0 = 0.0; + real_t q_acc_1_1 = 0.0; + real_t q_acc_1_2 = 0.0; + real_t q_acc_1_3 = 0.0; + real_t q_acc_1_4 = 0.0; + real_t q_acc_1_5 = 0.0; + real_t q_acc_1_6 = 0.0; + real_t q_acc_1_7 = 0.0; + real_t q_acc_1_8 = 0.0; + real_t q_acc_1_9 = 0.0; + real_t q_acc_1_10 = 0.0; + real_t q_acc_1_11 = 0.0; + real_t q_acc_1_12 = 0.0; + real_t q_acc_1_13 = 0.0; + real_t q_acc_1_14 = 0.0; + real_t q_acc_1_15 = 0.0; + real_t q_acc_1_16 = 0.0; + real_t q_acc_1_17 = 0.0; + real_t q_acc_1_18 = 0.0; + real_t q_acc_1_19 = 0.0; + real_t q_acc_1_20 = 0.0; + real_t q_acc_1_21 = 0.0; + real_t q_acc_1_22 = 0.0; + real_t q_acc_1_23 = 0.0; + real_t q_acc_1_24 = 0.0; + real_t q_acc_1_25 = 0.0; + real_t q_acc_1_26 = 0.0; + real_t q_acc_1_27 = 0.0; + real_t q_acc_1_28 = 0.0; + real_t q_acc_1_29 = 0.0; + real_t q_acc_2_0 = 0.0; + real_t q_acc_2_1 = 0.0; + real_t q_acc_2_2 = 0.0; + real_t q_acc_2_3 = 0.0; + real_t q_acc_2_4 = 0.0; + real_t q_acc_2_5 = 0.0; + real_t q_acc_2_6 = 0.0; + real_t q_acc_2_7 = 0.0; + real_t q_acc_2_8 = 0.0; + real_t q_acc_2_9 = 0.0; + real_t q_acc_2_10 = 0.0; + real_t q_acc_2_11 = 0.0; + real_t q_acc_2_12 = 0.0; + real_t q_acc_2_13 = 0.0; + real_t q_acc_2_14 = 0.0; + real_t q_acc_2_15 = 0.0; + real_t q_acc_2_16 = 0.0; + real_t q_acc_2_17 = 0.0; + real_t q_acc_2_18 = 0.0; + real_t q_acc_2_19 = 0.0; + real_t q_acc_2_20 = 0.0; + real_t q_acc_2_21 = 0.0; + real_t q_acc_2_22 = 0.0; + real_t q_acc_2_23 = 0.0; + real_t q_acc_2_24 = 0.0; + real_t q_acc_2_25 = 0.0; + real_t q_acc_2_26 = 0.0; + real_t q_acc_2_27 = 0.0; + real_t q_acc_2_28 = 0.0; + real_t q_acc_2_29 = 0.0; + real_t q_acc_3_0 = 0.0; + real_t q_acc_3_1 = 0.0; + real_t q_acc_3_2 = 0.0; + real_t q_acc_3_3 = 0.0; + real_t q_acc_3_4 = 0.0; + real_t q_acc_3_5 = 0.0; + real_t q_acc_3_6 = 0.0; + real_t q_acc_3_7 = 0.0; + real_t q_acc_3_8 = 0.0; + real_t q_acc_3_9 = 0.0; + real_t q_acc_3_10 = 0.0; + real_t q_acc_3_11 = 0.0; + real_t q_acc_3_12 = 0.0; + real_t q_acc_3_13 = 0.0; + real_t q_acc_3_14 = 0.0; + real_t q_acc_3_15 = 0.0; + real_t q_acc_3_16 = 0.0; + real_t q_acc_3_17 = 0.0; + real_t q_acc_3_18 = 0.0; + real_t q_acc_3_19 = 0.0; + real_t q_acc_3_20 = 0.0; + real_t q_acc_3_21 = 0.0; + real_t q_acc_3_22 = 0.0; + real_t q_acc_3_23 = 0.0; + real_t q_acc_3_24 = 0.0; + real_t q_acc_3_25 = 0.0; + real_t q_acc_3_26 = 0.0; + real_t q_acc_3_27 = 0.0; + real_t q_acc_3_28 = 0.0; + real_t q_acc_3_29 = 0.0; + for (int64_t q = 0; q < 5; q += 1) + { + const real_t tmp_qloop_0 = (_data_q_p_0[q]*_data_q_p_0[q]); + const real_t tmp_qloop_1 = tmp_qloop_0*2.0; + const real_t tmp_qloop_2 = (_data_q_p_1[q]*_data_q_p_1[q]); + const real_t tmp_qloop_3 = tmp_qloop_2*2.0; + const real_t tmp_qloop_4 = (_data_q_p_2[q]*_data_q_p_2[q]); + const real_t tmp_qloop_5 = tmp_qloop_4*2.0; + const real_t tmp_qloop_6 = 4.0*_data_q_p_1[q]; + const real_t tmp_qloop_7 = tmp_qloop_6*_data_q_p_2[q]; + const real_t tmp_qloop_8 = 4.0*_data_q_p_0[q]; + const real_t tmp_qloop_9 = tmp_qloop_8*_data_q_p_1[q]; + const real_t tmp_qloop_10 = tmp_qloop_8*_data_q_p_2[q]; + const real_t tmp_qloop_11 = tmp_qloop_10 + tmp_qloop_9; + const real_t tmp_qloop_12 = tmp_qloop_1 + tmp_qloop_11 + tmp_qloop_3 + tmp_qloop_5 + tmp_qloop_7 - 3.0*_data_q_p_0[q] - 3.0*_data_q_p_1[q] - 3.0*_data_q_p_2[q] + 1.0; + const real_t tmp_qloop_13 = 4.0*_data_q_p_2[q]; + const real_t tmp_qloop_14 = tmp_qloop_13 + tmp_qloop_6; + const real_t tmp_qloop_15 = -rho_dof_8*tmp_qloop_6; + const real_t tmp_qloop_16 = rho_dof_0*(tmp_qloop_14 + tmp_qloop_8 - 3.0); + const real_t tmp_qloop_17 = -rho_dof_7*tmp_qloop_13 + tmp_qloop_16; + const real_t tmp_qloop_18 = rho_dof_1*(tmp_qloop_8 - 1.0) + rho_dof_5*tmp_qloop_13 + rho_dof_6*tmp_qloop_6 + rho_dof_9*(-tmp_qloop_14 - 8.0*_data_q_p_0[q] + 4.0) + tmp_qloop_15 + tmp_qloop_17; + const real_t tmp_qloop_19 = tmp_qloop_8 - 4.0; + const real_t tmp_qloop_20 = -rho_dof_9*tmp_qloop_8; + const real_t tmp_qloop_21 = rho_dof_2*(tmp_qloop_6 - 1.0) + rho_dof_4*tmp_qloop_13 + rho_dof_6*tmp_qloop_8 + rho_dof_8*(-tmp_qloop_13 - tmp_qloop_19 - 8.0*_data_q_p_1[q]) + tmp_qloop_17 + tmp_qloop_20; + const real_t tmp_qloop_22 = rho_dof_3*(tmp_qloop_13 - 1.0) + rho_dof_4*tmp_qloop_6 + rho_dof_5*tmp_qloop_8 + rho_dof_7*(-tmp_qloop_19 - tmp_qloop_6 - 8.0*_data_q_p_2[q]) + tmp_qloop_15 + tmp_qloop_16 + tmp_qloop_20; + const real_t tmp_qloop_23 = jac_affine_inv_0_0_GREEN_UP*tmp_qloop_18 + jac_affine_inv_1_0_GREEN_UP*tmp_qloop_21 + jac_affine_inv_2_0_GREEN_UP*tmp_qloop_22; + const real_t tmp_qloop_24 = tmp_qloop_1 - _data_q_p_0[q]; + const real_t tmp_qloop_25 = tmp_qloop_3 - _data_q_p_1[q]; + const real_t tmp_qloop_26 = tmp_qloop_5 - _data_q_p_2[q]; + const real_t tmp_qloop_27 = -tmp_qloop_10 + tmp_qloop_13 + tmp_qloop_4*-4.0 - tmp_qloop_7; + const real_t tmp_qloop_28 = tmp_qloop_2*4.0; + const real_t tmp_qloop_29 = -tmp_qloop_28 + tmp_qloop_6 - tmp_qloop_7 - tmp_qloop_9; + const real_t tmp_qloop_30 = tmp_qloop_0*4.0; + const real_t tmp_qloop_31 = -tmp_qloop_11 - tmp_qloop_30 + tmp_qloop_8; + const real_t tmp_qloop_32 = abs_det_jac_affine_GREEN_UP*1.0 / (rho_dof_0*tmp_qloop_12 + rho_dof_1*tmp_qloop_24 + rho_dof_2*tmp_qloop_25 + rho_dof_3*tmp_qloop_26 + rho_dof_4*tmp_qloop_7 + rho_dof_5*tmp_qloop_10 + rho_dof_6*tmp_qloop_9 + rho_dof_7*tmp_qloop_27 + rho_dof_8*tmp_qloop_29 + rho_dof_9*tmp_qloop_31)*_data_q_w[q]; + const real_t tmp_qloop_33 = tmp_qloop_32*(1.0 - _data_q_p_0[q] - _data_q_p_1[q] - _data_q_p_2[q]); + const real_t tmp_qloop_34 = tmp_qloop_23*tmp_qloop_33; + const real_t tmp_qloop_35 = jac_affine_inv_0_1_GREEN_UP*tmp_qloop_18 + jac_affine_inv_1_1_GREEN_UP*tmp_qloop_21 + jac_affine_inv_2_1_GREEN_UP*tmp_qloop_22; + const real_t tmp_qloop_36 = tmp_qloop_33*tmp_qloop_35; + const real_t tmp_qloop_37 = jac_affine_inv_0_2_GREEN_UP*tmp_qloop_18 + jac_affine_inv_1_2_GREEN_UP*tmp_qloop_21 + jac_affine_inv_2_2_GREEN_UP*tmp_qloop_22; + const real_t tmp_qloop_38 = tmp_qloop_33*tmp_qloop_37; + const real_t tmp_qloop_39 = tmp_qloop_32*_data_q_p_0[q]; + const real_t tmp_qloop_40 = tmp_qloop_23*tmp_qloop_39; + const real_t tmp_qloop_41 = tmp_qloop_32*_data_q_p_2[q]; + const real_t tmp_qloop_42 = tmp_qloop_23*tmp_qloop_41; + const real_t tmp_qloop_43 = tmp_qloop_42*tmp_qloop_9; + const real_t tmp_qloop_44 = tmp_qloop_32*_data_q_p_1[q]; + const real_t tmp_qloop_45 = tmp_qloop_23*tmp_qloop_44; + const real_t tmp_qloop_46 = tmp_qloop_35*tmp_qloop_39; + const real_t tmp_qloop_47 = tmp_qloop_35*tmp_qloop_41; + const real_t tmp_qloop_48 = tmp_qloop_47*tmp_qloop_9; + const real_t tmp_qloop_49 = tmp_qloop_35*tmp_qloop_44; + const real_t tmp_qloop_50 = tmp_qloop_37*tmp_qloop_39; + const real_t tmp_qloop_51 = tmp_qloop_37*tmp_qloop_41; + const real_t tmp_qloop_52 = tmp_qloop_51*tmp_qloop_9; + const real_t tmp_qloop_53 = tmp_qloop_37*tmp_qloop_44; + const real_t tmp_qloop_54 = tmp_qloop_23*tmp_qloop_32; + const real_t tmp_qloop_55 = tmp_qloop_2*tmp_qloop_8; + const real_t tmp_qloop_56 = tmp_qloop_32*tmp_qloop_55; + const real_t tmp_qloop_57 = tmp_qloop_4*tmp_qloop_54; + const real_t tmp_qloop_58 = tmp_qloop_32*tmp_qloop_4; + const real_t tmp_qloop_59 = tmp_qloop_35*tmp_qloop_58; + const real_t tmp_qloop_60 = tmp_qloop_37*tmp_qloop_58; + const real_t q_tmp_0_0 = tmp_qloop_12*tmp_qloop_34; + const real_t q_tmp_0_1 = tmp_qloop_24*tmp_qloop_34; + const real_t q_tmp_0_2 = tmp_qloop_25*tmp_qloop_34; + const real_t q_tmp_0_3 = tmp_qloop_26*tmp_qloop_34; + const real_t q_tmp_0_4 = tmp_qloop_34*tmp_qloop_7; + const real_t q_tmp_0_5 = tmp_qloop_10*tmp_qloop_34; + const real_t q_tmp_0_6 = tmp_qloop_34*tmp_qloop_9; + const real_t q_tmp_0_7 = tmp_qloop_27*tmp_qloop_34; + const real_t q_tmp_0_8 = tmp_qloop_29*tmp_qloop_34; + const real_t q_tmp_0_9 = tmp_qloop_31*tmp_qloop_34; + const real_t q_tmp_0_10 = tmp_qloop_12*tmp_qloop_36; + const real_t q_tmp_0_11 = tmp_qloop_24*tmp_qloop_36; + const real_t q_tmp_0_12 = tmp_qloop_25*tmp_qloop_36; + const real_t q_tmp_0_13 = tmp_qloop_26*tmp_qloop_36; + const real_t q_tmp_0_14 = tmp_qloop_36*tmp_qloop_7; + const real_t q_tmp_0_15 = tmp_qloop_10*tmp_qloop_36; + const real_t q_tmp_0_16 = tmp_qloop_36*tmp_qloop_9; + const real_t q_tmp_0_17 = tmp_qloop_27*tmp_qloop_36; + const real_t q_tmp_0_18 = tmp_qloop_29*tmp_qloop_36; + const real_t q_tmp_0_19 = tmp_qloop_31*tmp_qloop_36; + const real_t q_tmp_0_20 = tmp_qloop_12*tmp_qloop_38; + const real_t q_tmp_0_21 = tmp_qloop_24*tmp_qloop_38; + const real_t q_tmp_0_22 = tmp_qloop_25*tmp_qloop_38; + const real_t q_tmp_0_23 = tmp_qloop_26*tmp_qloop_38; + const real_t q_tmp_0_24 = tmp_qloop_38*tmp_qloop_7; + const real_t q_tmp_0_25 = tmp_qloop_10*tmp_qloop_38; + const real_t q_tmp_0_26 = tmp_qloop_38*tmp_qloop_9; + const real_t q_tmp_0_27 = tmp_qloop_27*tmp_qloop_38; + const real_t q_tmp_0_28 = tmp_qloop_29*tmp_qloop_38; + const real_t q_tmp_0_29 = tmp_qloop_31*tmp_qloop_38; + const real_t q_tmp_1_0 = tmp_qloop_12*tmp_qloop_40; + const real_t q_tmp_1_1 = tmp_qloop_24*tmp_qloop_40; + const real_t q_tmp_1_2 = tmp_qloop_25*tmp_qloop_40; + const real_t q_tmp_1_3 = tmp_qloop_26*tmp_qloop_40; + const real_t q_tmp_1_4 = tmp_qloop_43; + const real_t q_tmp_1_5 = tmp_qloop_30*tmp_qloop_42; + const real_t q_tmp_1_6 = tmp_qloop_30*tmp_qloop_45; + const real_t q_tmp_1_7 = tmp_qloop_27*tmp_qloop_40; + const real_t q_tmp_1_8 = tmp_qloop_29*tmp_qloop_40; + const real_t q_tmp_1_9 = tmp_qloop_31*tmp_qloop_40; + const real_t q_tmp_1_10 = tmp_qloop_12*tmp_qloop_46; + const real_t q_tmp_1_11 = tmp_qloop_24*tmp_qloop_46; + const real_t q_tmp_1_12 = tmp_qloop_25*tmp_qloop_46; + const real_t q_tmp_1_13 = tmp_qloop_26*tmp_qloop_46; + const real_t q_tmp_1_14 = tmp_qloop_48; + const real_t q_tmp_1_15 = tmp_qloop_30*tmp_qloop_47; + const real_t q_tmp_1_16 = tmp_qloop_30*tmp_qloop_49; + const real_t q_tmp_1_17 = tmp_qloop_27*tmp_qloop_46; + const real_t q_tmp_1_18 = tmp_qloop_29*tmp_qloop_46; + const real_t q_tmp_1_19 = tmp_qloop_31*tmp_qloop_46; + const real_t q_tmp_1_20 = tmp_qloop_12*tmp_qloop_50; + const real_t q_tmp_1_21 = tmp_qloop_24*tmp_qloop_50; + const real_t q_tmp_1_22 = tmp_qloop_25*tmp_qloop_50; + const real_t q_tmp_1_23 = tmp_qloop_26*tmp_qloop_50; + const real_t q_tmp_1_24 = tmp_qloop_52; + const real_t q_tmp_1_25 = tmp_qloop_30*tmp_qloop_51; + const real_t q_tmp_1_26 = tmp_qloop_30*tmp_qloop_53; + const real_t q_tmp_1_27 = tmp_qloop_27*tmp_qloop_50; + const real_t q_tmp_1_28 = tmp_qloop_29*tmp_qloop_50; + const real_t q_tmp_1_29 = tmp_qloop_31*tmp_qloop_50; + const real_t q_tmp_2_0 = tmp_qloop_12*tmp_qloop_45; + const real_t q_tmp_2_1 = tmp_qloop_24*tmp_qloop_45; + const real_t q_tmp_2_2 = tmp_qloop_25*tmp_qloop_45; + const real_t q_tmp_2_3 = tmp_qloop_26*tmp_qloop_45; + const real_t q_tmp_2_4 = tmp_qloop_28*tmp_qloop_42; + const real_t q_tmp_2_5 = tmp_qloop_43; + const real_t q_tmp_2_6 = tmp_qloop_54*tmp_qloop_55; + const real_t q_tmp_2_7 = tmp_qloop_27*tmp_qloop_45; + const real_t q_tmp_2_8 = tmp_qloop_29*tmp_qloop_45; + const real_t q_tmp_2_9 = tmp_qloop_31*tmp_qloop_45; + const real_t q_tmp_2_10 = tmp_qloop_12*tmp_qloop_49; + const real_t q_tmp_2_11 = tmp_qloop_24*tmp_qloop_49; + const real_t q_tmp_2_12 = tmp_qloop_25*tmp_qloop_49; + const real_t q_tmp_2_13 = tmp_qloop_26*tmp_qloop_49; + const real_t q_tmp_2_14 = tmp_qloop_28*tmp_qloop_47; + const real_t q_tmp_2_15 = tmp_qloop_48; + const real_t q_tmp_2_16 = tmp_qloop_35*tmp_qloop_56; + const real_t q_tmp_2_17 = tmp_qloop_27*tmp_qloop_49; + const real_t q_tmp_2_18 = tmp_qloop_29*tmp_qloop_49; + const real_t q_tmp_2_19 = tmp_qloop_31*tmp_qloop_49; + const real_t q_tmp_2_20 = tmp_qloop_12*tmp_qloop_53; + const real_t q_tmp_2_21 = tmp_qloop_24*tmp_qloop_53; + const real_t q_tmp_2_22 = tmp_qloop_25*tmp_qloop_53; + const real_t q_tmp_2_23 = tmp_qloop_26*tmp_qloop_53; + const real_t q_tmp_2_24 = tmp_qloop_28*tmp_qloop_51; + const real_t q_tmp_2_25 = tmp_qloop_52; + const real_t q_tmp_2_26 = tmp_qloop_37*tmp_qloop_56; + const real_t q_tmp_2_27 = tmp_qloop_27*tmp_qloop_53; + const real_t q_tmp_2_28 = tmp_qloop_29*tmp_qloop_53; + const real_t q_tmp_2_29 = tmp_qloop_31*tmp_qloop_53; + const real_t q_tmp_3_0 = tmp_qloop_12*tmp_qloop_42; + const real_t q_tmp_3_1 = tmp_qloop_24*tmp_qloop_42; + const real_t q_tmp_3_2 = tmp_qloop_25*tmp_qloop_42; + const real_t q_tmp_3_3 = tmp_qloop_26*tmp_qloop_42; + const real_t q_tmp_3_4 = tmp_qloop_57*tmp_qloop_6; + const real_t q_tmp_3_5 = tmp_qloop_57*tmp_qloop_8; + const real_t q_tmp_3_6 = tmp_qloop_43; + const real_t q_tmp_3_7 = tmp_qloop_27*tmp_qloop_42; + const real_t q_tmp_3_8 = tmp_qloop_29*tmp_qloop_42; + const real_t q_tmp_3_9 = tmp_qloop_31*tmp_qloop_42; + const real_t q_tmp_3_10 = tmp_qloop_12*tmp_qloop_47; + const real_t q_tmp_3_11 = tmp_qloop_24*tmp_qloop_47; + const real_t q_tmp_3_12 = tmp_qloop_25*tmp_qloop_47; + const real_t q_tmp_3_13 = tmp_qloop_26*tmp_qloop_47; + const real_t q_tmp_3_14 = tmp_qloop_59*tmp_qloop_6; + const real_t q_tmp_3_15 = tmp_qloop_59*tmp_qloop_8; + const real_t q_tmp_3_16 = tmp_qloop_48; + const real_t q_tmp_3_17 = tmp_qloop_27*tmp_qloop_47; + const real_t q_tmp_3_18 = tmp_qloop_29*tmp_qloop_47; + const real_t q_tmp_3_19 = tmp_qloop_31*tmp_qloop_47; + const real_t q_tmp_3_20 = tmp_qloop_12*tmp_qloop_51; + const real_t q_tmp_3_21 = tmp_qloop_24*tmp_qloop_51; + const real_t q_tmp_3_22 = tmp_qloop_25*tmp_qloop_51; + const real_t q_tmp_3_23 = tmp_qloop_26*tmp_qloop_51; + const real_t q_tmp_3_24 = tmp_qloop_6*tmp_qloop_60; + const real_t q_tmp_3_25 = tmp_qloop_60*tmp_qloop_8; + const real_t q_tmp_3_26 = tmp_qloop_52; + const real_t q_tmp_3_27 = tmp_qloop_27*tmp_qloop_51; + const real_t q_tmp_3_28 = tmp_qloop_29*tmp_qloop_51; + const real_t q_tmp_3_29 = tmp_qloop_31*tmp_qloop_51; + q_acc_0_0 = q_acc_0_0 + q_tmp_0_0; + q_acc_0_1 = q_acc_0_1 + q_tmp_0_1; + q_acc_0_2 = q_acc_0_2 + q_tmp_0_2; + q_acc_0_3 = q_acc_0_3 + q_tmp_0_3; + q_acc_0_4 = q_acc_0_4 + q_tmp_0_4; + q_acc_0_5 = q_acc_0_5 + q_tmp_0_5; + q_acc_0_6 = q_acc_0_6 + q_tmp_0_6; + q_acc_0_7 = q_acc_0_7 + q_tmp_0_7; + q_acc_0_8 = q_acc_0_8 + q_tmp_0_8; + q_acc_0_9 = q_acc_0_9 + q_tmp_0_9; + q_acc_0_10 = q_acc_0_10 + q_tmp_0_10; + q_acc_0_11 = q_acc_0_11 + q_tmp_0_11; + q_acc_0_12 = q_acc_0_12 + q_tmp_0_12; + q_acc_0_13 = q_acc_0_13 + q_tmp_0_13; + q_acc_0_14 = q_acc_0_14 + q_tmp_0_14; + q_acc_0_15 = q_acc_0_15 + q_tmp_0_15; + q_acc_0_16 = q_acc_0_16 + q_tmp_0_16; + q_acc_0_17 = q_acc_0_17 + q_tmp_0_17; + q_acc_0_18 = q_acc_0_18 + q_tmp_0_18; + q_acc_0_19 = q_acc_0_19 + q_tmp_0_19; + q_acc_0_20 = q_acc_0_20 + q_tmp_0_20; + q_acc_0_21 = q_acc_0_21 + q_tmp_0_21; + q_acc_0_22 = q_acc_0_22 + q_tmp_0_22; + q_acc_0_23 = q_acc_0_23 + q_tmp_0_23; + q_acc_0_24 = q_acc_0_24 + q_tmp_0_24; + q_acc_0_25 = q_acc_0_25 + q_tmp_0_25; + q_acc_0_26 = q_acc_0_26 + q_tmp_0_26; + q_acc_0_27 = q_acc_0_27 + q_tmp_0_27; + q_acc_0_28 = q_acc_0_28 + q_tmp_0_28; + q_acc_0_29 = q_acc_0_29 + q_tmp_0_29; + q_acc_1_0 = q_acc_1_0 + q_tmp_1_0; + q_acc_1_1 = q_acc_1_1 + q_tmp_1_1; + q_acc_1_2 = q_acc_1_2 + q_tmp_1_2; + q_acc_1_3 = q_acc_1_3 + q_tmp_1_3; + q_acc_1_4 = q_acc_1_4 + q_tmp_1_4; + q_acc_1_5 = q_acc_1_5 + q_tmp_1_5; + q_acc_1_6 = q_acc_1_6 + q_tmp_1_6; + q_acc_1_7 = q_acc_1_7 + q_tmp_1_7; + q_acc_1_8 = q_acc_1_8 + q_tmp_1_8; + q_acc_1_9 = q_acc_1_9 + q_tmp_1_9; + q_acc_1_10 = q_acc_1_10 + q_tmp_1_10; + q_acc_1_11 = q_acc_1_11 + q_tmp_1_11; + q_acc_1_12 = q_acc_1_12 + q_tmp_1_12; + q_acc_1_13 = q_acc_1_13 + q_tmp_1_13; + q_acc_1_14 = q_acc_1_14 + q_tmp_1_14; + q_acc_1_15 = q_acc_1_15 + q_tmp_1_15; + q_acc_1_16 = q_acc_1_16 + q_tmp_1_16; + q_acc_1_17 = q_acc_1_17 + q_tmp_1_17; + q_acc_1_18 = q_acc_1_18 + q_tmp_1_18; + q_acc_1_19 = q_acc_1_19 + q_tmp_1_19; + q_acc_1_20 = q_acc_1_20 + q_tmp_1_20; + q_acc_1_21 = q_acc_1_21 + q_tmp_1_21; + q_acc_1_22 = q_acc_1_22 + q_tmp_1_22; + q_acc_1_23 = q_acc_1_23 + q_tmp_1_23; + q_acc_1_24 = q_acc_1_24 + q_tmp_1_24; + q_acc_1_25 = q_acc_1_25 + q_tmp_1_25; + q_acc_1_26 = q_acc_1_26 + q_tmp_1_26; + q_acc_1_27 = q_acc_1_27 + q_tmp_1_27; + q_acc_1_28 = q_acc_1_28 + q_tmp_1_28; + q_acc_1_29 = q_acc_1_29 + q_tmp_1_29; + q_acc_2_0 = q_acc_2_0 + q_tmp_2_0; + q_acc_2_1 = q_acc_2_1 + q_tmp_2_1; + q_acc_2_2 = q_acc_2_2 + q_tmp_2_2; + q_acc_2_3 = q_acc_2_3 + q_tmp_2_3; + q_acc_2_4 = q_acc_2_4 + q_tmp_2_4; + q_acc_2_5 = q_acc_2_5 + q_tmp_2_5; + q_acc_2_6 = q_acc_2_6 + q_tmp_2_6; + q_acc_2_7 = q_acc_2_7 + q_tmp_2_7; + q_acc_2_8 = q_acc_2_8 + q_tmp_2_8; + q_acc_2_9 = q_acc_2_9 + q_tmp_2_9; + q_acc_2_10 = q_acc_2_10 + q_tmp_2_10; + q_acc_2_11 = q_acc_2_11 + q_tmp_2_11; + q_acc_2_12 = q_acc_2_12 + q_tmp_2_12; + q_acc_2_13 = q_acc_2_13 + q_tmp_2_13; + q_acc_2_14 = q_acc_2_14 + q_tmp_2_14; + q_acc_2_15 = q_acc_2_15 + q_tmp_2_15; + q_acc_2_16 = q_acc_2_16 + q_tmp_2_16; + q_acc_2_17 = q_acc_2_17 + q_tmp_2_17; + q_acc_2_18 = q_acc_2_18 + q_tmp_2_18; + q_acc_2_19 = q_acc_2_19 + q_tmp_2_19; + q_acc_2_20 = q_acc_2_20 + q_tmp_2_20; + q_acc_2_21 = q_acc_2_21 + q_tmp_2_21; + q_acc_2_22 = q_acc_2_22 + q_tmp_2_22; + q_acc_2_23 = q_acc_2_23 + q_tmp_2_23; + q_acc_2_24 = q_acc_2_24 + q_tmp_2_24; + q_acc_2_25 = q_acc_2_25 + q_tmp_2_25; + q_acc_2_26 = q_acc_2_26 + q_tmp_2_26; + q_acc_2_27 = q_acc_2_27 + q_tmp_2_27; + q_acc_2_28 = q_acc_2_28 + q_tmp_2_28; + q_acc_2_29 = q_acc_2_29 + q_tmp_2_29; + q_acc_3_0 = q_acc_3_0 + q_tmp_3_0; + q_acc_3_1 = q_acc_3_1 + q_tmp_3_1; + q_acc_3_2 = q_acc_3_2 + q_tmp_3_2; + q_acc_3_3 = q_acc_3_3 + q_tmp_3_3; + q_acc_3_4 = q_acc_3_4 + q_tmp_3_4; + q_acc_3_5 = q_acc_3_5 + q_tmp_3_5; + q_acc_3_6 = q_acc_3_6 + q_tmp_3_6; + q_acc_3_7 = q_acc_3_7 + q_tmp_3_7; + q_acc_3_8 = q_acc_3_8 + q_tmp_3_8; + q_acc_3_9 = q_acc_3_9 + q_tmp_3_9; + q_acc_3_10 = q_acc_3_10 + q_tmp_3_10; + q_acc_3_11 = q_acc_3_11 + q_tmp_3_11; + q_acc_3_12 = q_acc_3_12 + q_tmp_3_12; + q_acc_3_13 = q_acc_3_13 + q_tmp_3_13; + q_acc_3_14 = q_acc_3_14 + q_tmp_3_14; + q_acc_3_15 = q_acc_3_15 + q_tmp_3_15; + q_acc_3_16 = q_acc_3_16 + q_tmp_3_16; + q_acc_3_17 = q_acc_3_17 + q_tmp_3_17; + q_acc_3_18 = q_acc_3_18 + q_tmp_3_18; + q_acc_3_19 = q_acc_3_19 + q_tmp_3_19; + q_acc_3_20 = q_acc_3_20 + q_tmp_3_20; + q_acc_3_21 = q_acc_3_21 + q_tmp_3_21; + q_acc_3_22 = q_acc_3_22 + q_tmp_3_22; + q_acc_3_23 = q_acc_3_23 + q_tmp_3_23; + q_acc_3_24 = q_acc_3_24 + q_tmp_3_24; + q_acc_3_25 = q_acc_3_25 + q_tmp_3_25; + q_acc_3_26 = q_acc_3_26 + q_tmp_3_26; + q_acc_3_27 = q_acc_3_27 + q_tmp_3_27; + q_acc_3_28 = q_acc_3_28 + q_tmp_3_28; + q_acc_3_29 = q_acc_3_29 + q_tmp_3_29; + } + const real_t elMatVec_0 = q_acc_0_0*src_dof_0 + q_acc_0_1*src_dof_1 + q_acc_0_10*src_dof_10 + q_acc_0_11*src_dof_11 + q_acc_0_12*src_dof_12 + q_acc_0_13*src_dof_13 + q_acc_0_14*src_dof_14 + q_acc_0_15*src_dof_15 + q_acc_0_16*src_dof_16 + q_acc_0_17*src_dof_17 + q_acc_0_18*src_dof_18 + q_acc_0_19*src_dof_19 + q_acc_0_2*src_dof_2 + q_acc_0_20*src_dof_20 + q_acc_0_21*src_dof_21 + q_acc_0_22*src_dof_22 + q_acc_0_23*src_dof_23 + q_acc_0_24*src_dof_24 + q_acc_0_25*src_dof_25 + q_acc_0_26*src_dof_26 + q_acc_0_27*src_dof_27 + q_acc_0_28*src_dof_28 + q_acc_0_29*src_dof_29 + q_acc_0_3*src_dof_3 + q_acc_0_4*src_dof_4 + q_acc_0_5*src_dof_5 + q_acc_0_6*src_dof_6 + q_acc_0_7*src_dof_7 + q_acc_0_8*src_dof_8 + q_acc_0_9*src_dof_9; + const real_t elMatVec_1 = q_acc_1_0*src_dof_0 + q_acc_1_1*src_dof_1 + q_acc_1_10*src_dof_10 + q_acc_1_11*src_dof_11 + q_acc_1_12*src_dof_12 + q_acc_1_13*src_dof_13 + q_acc_1_14*src_dof_14 + q_acc_1_15*src_dof_15 + q_acc_1_16*src_dof_16 + q_acc_1_17*src_dof_17 + q_acc_1_18*src_dof_18 + q_acc_1_19*src_dof_19 + q_acc_1_2*src_dof_2 + q_acc_1_20*src_dof_20 + q_acc_1_21*src_dof_21 + q_acc_1_22*src_dof_22 + q_acc_1_23*src_dof_23 + q_acc_1_24*src_dof_24 + q_acc_1_25*src_dof_25 + q_acc_1_26*src_dof_26 + q_acc_1_27*src_dof_27 + q_acc_1_28*src_dof_28 + q_acc_1_29*src_dof_29 + q_acc_1_3*src_dof_3 + q_acc_1_4*src_dof_4 + q_acc_1_5*src_dof_5 + q_acc_1_6*src_dof_6 + q_acc_1_7*src_dof_7 + q_acc_1_8*src_dof_8 + q_acc_1_9*src_dof_9; + const real_t elMatVec_2 = q_acc_2_0*src_dof_0 + q_acc_2_1*src_dof_1 + q_acc_2_10*src_dof_10 + q_acc_2_11*src_dof_11 + q_acc_2_12*src_dof_12 + q_acc_2_13*src_dof_13 + q_acc_2_14*src_dof_14 + q_acc_2_15*src_dof_15 + q_acc_2_16*src_dof_16 + q_acc_2_17*src_dof_17 + q_acc_2_18*src_dof_18 + q_acc_2_19*src_dof_19 + q_acc_2_2*src_dof_2 + q_acc_2_20*src_dof_20 + q_acc_2_21*src_dof_21 + q_acc_2_22*src_dof_22 + q_acc_2_23*src_dof_23 + q_acc_2_24*src_dof_24 + q_acc_2_25*src_dof_25 + q_acc_2_26*src_dof_26 + q_acc_2_27*src_dof_27 + q_acc_2_28*src_dof_28 + q_acc_2_29*src_dof_29 + q_acc_2_3*src_dof_3 + q_acc_2_4*src_dof_4 + q_acc_2_5*src_dof_5 + q_acc_2_6*src_dof_6 + q_acc_2_7*src_dof_7 + q_acc_2_8*src_dof_8 + q_acc_2_9*src_dof_9; + const real_t elMatVec_3 = q_acc_3_0*src_dof_0 + q_acc_3_1*src_dof_1 + q_acc_3_10*src_dof_10 + q_acc_3_11*src_dof_11 + q_acc_3_12*src_dof_12 + q_acc_3_13*src_dof_13 + q_acc_3_14*src_dof_14 + q_acc_3_15*src_dof_15 + q_acc_3_16*src_dof_16 + q_acc_3_17*src_dof_17 + q_acc_3_18*src_dof_18 + q_acc_3_19*src_dof_19 + q_acc_3_2*src_dof_2 + q_acc_3_20*src_dof_20 + q_acc_3_21*src_dof_21 + q_acc_3_22*src_dof_22 + q_acc_3_23*src_dof_23 + q_acc_3_24*src_dof_24 + q_acc_3_25*src_dof_25 + q_acc_3_26*src_dof_26 + q_acc_3_27*src_dof_27 + q_acc_3_28*src_dof_28 + q_acc_3_29*src_dof_29 + q_acc_3_3*src_dof_3 + q_acc_3_4*src_dof_4 + q_acc_3_5*src_dof_5 + q_acc_3_6*src_dof_6 + q_acc_3_7*src_dof_7 + q_acc_3_8*src_dof_8 + q_acc_3_9*src_dof_9; + _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1] = elMatVec_0 + _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]; + _data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))] = elMatVec_1 + _data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]; + _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))] = elMatVec_2 + _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]; + _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1] = elMatVec_3 + _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]; + } + } + } + const real_t tmp_coords_jac_0_GREEN_DOWN = 1.0 / (micro_edges_per_macro_edge_float)*1.0; + const real_t tmp_coords_jac_1_GREEN_DOWN = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0); + const real_t tmp_coords_jac_2_GREEN_DOWN = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1); + const real_t tmp_coords_jac_3_GREEN_DOWN = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2); + const real_t tmp_coords_jac_4_GREEN_DOWN = tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0); + const real_t tmp_coords_jac_5_GREEN_DOWN = tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1); + const real_t tmp_coords_jac_6_GREEN_DOWN = tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2); + const real_t tmp_coords_jac_7_GREEN_DOWN = tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0); + const real_t tmp_coords_jac_8_GREEN_DOWN = tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1); + const real_t tmp_coords_jac_9_GREEN_DOWN = tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2); + const real_t p_affine_const_0_0_GREEN_DOWN = tmp_coords_jac_1_GREEN_DOWN; + const real_t p_affine_const_0_1_GREEN_DOWN = tmp_coords_jac_2_GREEN_DOWN; + const real_t p_affine_const_0_2_GREEN_DOWN = tmp_coords_jac_3_GREEN_DOWN; + const real_t p_affine_const_1_0_GREEN_DOWN = tmp_coords_jac_1_GREEN_DOWN + tmp_coords_jac_4_GREEN_DOWN; + const real_t p_affine_const_1_1_GREEN_DOWN = tmp_coords_jac_2_GREEN_DOWN + tmp_coords_jac_5_GREEN_DOWN; + const real_t p_affine_const_1_2_GREEN_DOWN = tmp_coords_jac_3_GREEN_DOWN + tmp_coords_jac_6_GREEN_DOWN; + const real_t p_affine_const_2_0_GREEN_DOWN = macro_vertex_coord_id_0comp0 + tmp_coords_jac_4_GREEN_DOWN + tmp_coords_jac_7_GREEN_DOWN; + const real_t p_affine_const_2_1_GREEN_DOWN = macro_vertex_coord_id_0comp1 + tmp_coords_jac_5_GREEN_DOWN + tmp_coords_jac_8_GREEN_DOWN; + const real_t p_affine_const_2_2_GREEN_DOWN = macro_vertex_coord_id_0comp2 + tmp_coords_jac_6_GREEN_DOWN + tmp_coords_jac_9_GREEN_DOWN; + const real_t p_affine_const_3_0_GREEN_DOWN = tmp_coords_jac_1_GREEN_DOWN + tmp_coords_jac_7_GREEN_DOWN; + const real_t p_affine_const_3_1_GREEN_DOWN = tmp_coords_jac_2_GREEN_DOWN + tmp_coords_jac_8_GREEN_DOWN; + const real_t p_affine_const_3_2_GREEN_DOWN = tmp_coords_jac_3_GREEN_DOWN + tmp_coords_jac_9_GREEN_DOWN; + const real_t jac_affine_0_0_GREEN_DOWN = -p_affine_const_0_0_GREEN_DOWN + p_affine_const_1_0_GREEN_DOWN; + const real_t jac_affine_0_1_GREEN_DOWN = -p_affine_const_0_0_GREEN_DOWN + p_affine_const_2_0_GREEN_DOWN; + const real_t jac_affine_0_2_GREEN_DOWN = -p_affine_const_0_0_GREEN_DOWN + p_affine_const_3_0_GREEN_DOWN; + const real_t jac_affine_1_0_GREEN_DOWN = -p_affine_const_0_1_GREEN_DOWN + p_affine_const_1_1_GREEN_DOWN; + const real_t jac_affine_1_1_GREEN_DOWN = -p_affine_const_0_1_GREEN_DOWN + p_affine_const_2_1_GREEN_DOWN; + const real_t tmp_coords_jac_14_GREEN_DOWN = jac_affine_0_2_GREEN_DOWN*jac_affine_1_1_GREEN_DOWN; + const real_t jac_affine_1_2_GREEN_DOWN = -p_affine_const_0_1_GREEN_DOWN + p_affine_const_3_1_GREEN_DOWN; + const real_t tmp_coords_jac_12_GREEN_DOWN = jac_affine_0_1_GREEN_DOWN*jac_affine_1_2_GREEN_DOWN; + const real_t jac_affine_2_0_GREEN_DOWN = -p_affine_const_0_2_GREEN_DOWN + p_affine_const_1_2_GREEN_DOWN; + const real_t jac_affine_2_1_GREEN_DOWN = -p_affine_const_0_2_GREEN_DOWN + p_affine_const_2_2_GREEN_DOWN; + const real_t tmp_coords_jac_11_GREEN_DOWN = jac_affine_1_2_GREEN_DOWN*jac_affine_2_1_GREEN_DOWN; + const real_t jac_affine_2_2_GREEN_DOWN = -p_affine_const_0_2_GREEN_DOWN + p_affine_const_3_2_GREEN_DOWN; + const real_t tmp_coords_jac_10_GREEN_DOWN = jac_affine_1_1_GREEN_DOWN*jac_affine_2_2_GREEN_DOWN; + const real_t tmp_coords_jac_13_GREEN_DOWN = jac_affine_0_1_GREEN_DOWN*jac_affine_2_2_GREEN_DOWN; + const real_t tmp_coords_jac_15_GREEN_DOWN = jac_affine_0_0_GREEN_DOWN*tmp_coords_jac_10_GREEN_DOWN - jac_affine_0_0_GREEN_DOWN*tmp_coords_jac_11_GREEN_DOWN + jac_affine_0_2_GREEN_DOWN*jac_affine_1_0_GREEN_DOWN*jac_affine_2_1_GREEN_DOWN - jac_affine_1_0_GREEN_DOWN*tmp_coords_jac_13_GREEN_DOWN + jac_affine_2_0_GREEN_DOWN*tmp_coords_jac_12_GREEN_DOWN - jac_affine_2_0_GREEN_DOWN*tmp_coords_jac_14_GREEN_DOWN; + const real_t tmp_coords_jac_16_GREEN_DOWN = 1.0 / (tmp_coords_jac_15_GREEN_DOWN); + const real_t jac_affine_inv_0_0_GREEN_DOWN = tmp_coords_jac_16_GREEN_DOWN*(tmp_coords_jac_10_GREEN_DOWN - tmp_coords_jac_11_GREEN_DOWN); + const real_t jac_affine_inv_0_1_GREEN_DOWN = tmp_coords_jac_16_GREEN_DOWN*(jac_affine_0_2_GREEN_DOWN*jac_affine_2_1_GREEN_DOWN - tmp_coords_jac_13_GREEN_DOWN); + const real_t jac_affine_inv_0_2_GREEN_DOWN = tmp_coords_jac_16_GREEN_DOWN*(tmp_coords_jac_12_GREEN_DOWN - tmp_coords_jac_14_GREEN_DOWN); + const real_t jac_affine_inv_1_0_GREEN_DOWN = tmp_coords_jac_16_GREEN_DOWN*(-jac_affine_1_0_GREEN_DOWN*jac_affine_2_2_GREEN_DOWN + jac_affine_1_2_GREEN_DOWN*jac_affine_2_0_GREEN_DOWN); + const real_t jac_affine_inv_1_1_GREEN_DOWN = tmp_coords_jac_16_GREEN_DOWN*(jac_affine_0_0_GREEN_DOWN*jac_affine_2_2_GREEN_DOWN - jac_affine_0_2_GREEN_DOWN*jac_affine_2_0_GREEN_DOWN); + const real_t jac_affine_inv_1_2_GREEN_DOWN = tmp_coords_jac_16_GREEN_DOWN*(-jac_affine_0_0_GREEN_DOWN*jac_affine_1_2_GREEN_DOWN + jac_affine_0_2_GREEN_DOWN*jac_affine_1_0_GREEN_DOWN); + const real_t jac_affine_inv_2_0_GREEN_DOWN = tmp_coords_jac_16_GREEN_DOWN*(jac_affine_1_0_GREEN_DOWN*jac_affine_2_1_GREEN_DOWN - jac_affine_1_1_GREEN_DOWN*jac_affine_2_0_GREEN_DOWN); + const real_t jac_affine_inv_2_1_GREEN_DOWN = tmp_coords_jac_16_GREEN_DOWN*(-jac_affine_0_0_GREEN_DOWN*jac_affine_2_1_GREEN_DOWN + jac_affine_0_1_GREEN_DOWN*jac_affine_2_0_GREEN_DOWN); + const real_t jac_affine_inv_2_2_GREEN_DOWN = tmp_coords_jac_16_GREEN_DOWN*(jac_affine_0_0_GREEN_DOWN*jac_affine_1_1_GREEN_DOWN - jac_affine_0_1_GREEN_DOWN*jac_affine_1_0_GREEN_DOWN); + const real_t abs_det_jac_affine_GREEN_DOWN = abs(tmp_coords_jac_15_GREEN_DOWN); + { + /* CellType.GREEN_DOWN */ + for (int64_t ctr_2 = 0; ctr_2 < micro_edges_per_macro_edge; ctr_2 += 1) + for (int64_t ctr_1 = 0; ctr_1 < -ctr_2 + micro_edges_per_macro_edge; ctr_1 += 1) + { + for (int64_t ctr_0 = 0; ctr_0 < (int64_t)((-ctr_1 - ctr_2 + micro_edges_per_macro_edge - 1) / (4)) * (4); ctr_0 += 4) + { + + const int64_t phantom_ctr_0 = ctr_0; + real_t _data_float_loop_ctr_array_dim_0[4]; + _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0; + _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1; + _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2; + _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3; + real_t _data_float_loop_ctr_array_dim_1[4]; + _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1; + real_t _data_float_loop_ctr_array_dim_2[4]; + _data_float_loop_ctr_array_dim_2[0] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[1] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[2] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[3] = (real_t) ctr_2; + + const __m256d p_affine_0_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)); + const __m256d p_affine_0_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)); + const __m256d p_affine_0_2 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)); + const __m256d p_affine_1_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)); + const __m256d p_affine_1_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)); + const __m256d p_affine_1_2 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)); + const __m256d p_affine_2_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)); + const __m256d p_affine_2_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)); + const __m256d p_affine_2_2 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)); + const __m256d p_affine_3_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0,macro_vertex_coord_id_3comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)); + const __m256d p_affine_3_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1,macro_vertex_coord_id_3comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)); + const __m256d p_affine_3_2 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2,macro_vertex_coord_id_1comp2)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2,macro_vertex_coord_id_2comp2)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)),_mm256_set_pd(macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2,macro_vertex_coord_id_3comp2)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2,macro_vertex_coord_id_0comp2)); + const __m256d src_dof_0 = _mm256_loadu_pd(& _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]); + const __m256d src_dof_1 = _mm256_loadu_pd(& _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]); + const __m256d src_dof_2 = _mm256_loadu_pd(& _data_src_vertex_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]); + const __m256d src_dof_3 = _mm256_loadu_pd(& _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]); + const __m256d src_dof_4 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]); + const __m256d src_dof_5 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 5*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d src_dof_6 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]); + const __m256d src_dof_7 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d src_dof_8 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge - 1)*(micro_edges_per_macro_edge + 1)) / (6)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]); + const __m256d src_dof_9 = _mm256_loadu_pd(& _data_src_edge_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d src_dof_10 = _mm256_loadu_pd(& _data_src_vertex_1[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]); + const __m256d src_dof_11 = _mm256_loadu_pd(& _data_src_vertex_1[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]); + const __m256d src_dof_12 = _mm256_loadu_pd(& _data_src_vertex_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]); + const __m256d src_dof_13 = _mm256_loadu_pd(& _data_src_vertex_1[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]); + const __m256d src_dof_14 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]); + const __m256d src_dof_15 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 5*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d src_dof_16 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]); + const __m256d src_dof_17 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d src_dof_18 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge - 1)*(micro_edges_per_macro_edge + 1)) / (6)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]); + const __m256d src_dof_19 = _mm256_loadu_pd(& _data_src_edge_1[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d src_dof_20 = _mm256_loadu_pd(& _data_src_vertex_2[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]); + const __m256d src_dof_21 = _mm256_loadu_pd(& _data_src_vertex_2[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]); + const __m256d src_dof_22 = _mm256_loadu_pd(& _data_src_vertex_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]); + const __m256d src_dof_23 = _mm256_loadu_pd(& _data_src_vertex_2[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]); + const __m256d src_dof_24 = _mm256_loadu_pd(& _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]); + const __m256d src_dof_25 = _mm256_loadu_pd(& _data_src_edge_2[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 5*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d src_dof_26 = _mm256_loadu_pd(& _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]); + const __m256d src_dof_27 = _mm256_loadu_pd(& _data_src_edge_2[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d src_dof_28 = _mm256_loadu_pd(& _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge - 1)*(micro_edges_per_macro_edge + 1)) / (6)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]); + const __m256d src_dof_29 = _mm256_loadu_pd(& _data_src_edge_2[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d rho_dof_0 = _mm256_loadu_pd(& _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]); + const __m256d rho_dof_1 = _mm256_loadu_pd(& _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]); + const __m256d rho_dof_2 = _mm256_loadu_pd(& _data_rhoVertex[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]); + const __m256d rho_dof_3 = _mm256_loadu_pd(& _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]); + const __m256d rho_dof_4 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]); + const __m256d rho_dof_5 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 5*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d rho_dof_6 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]); + const __m256d rho_dof_7 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + const __m256d rho_dof_8 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge - 1)*(micro_edges_per_macro_edge + 1)) / (6)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]); + const __m256d rho_dof_9 = _mm256_loadu_pd(& _data_rhoEdge[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]); + __m256d q_acc_0_0 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_1 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_2 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_3 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_4 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_5 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_6 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_7 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_8 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_9 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_10 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_11 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_12 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_13 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_14 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_15 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_16 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_17 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_18 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_19 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_20 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_21 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_22 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_23 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_24 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_25 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_26 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_27 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_28 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_29 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_0 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_1 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_2 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_3 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_4 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_5 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_6 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_7 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_8 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_9 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_10 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_11 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_12 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_13 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_14 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_15 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_16 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_17 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_18 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_19 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_20 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_21 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_22 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_23 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_24 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_25 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_26 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_27 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_28 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_29 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_0 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_1 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_2 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_3 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_4 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_5 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_6 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_7 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_8 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_9 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_10 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_11 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_12 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_13 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_14 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_15 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_16 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_17 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_18 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_19 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_20 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_21 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_22 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_23 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_24 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_25 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_26 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_27 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_28 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_29 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_0 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_1 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_2 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_3 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_4 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_5 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_6 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_7 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_8 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_9 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_10 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_11 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_12 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_13 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_14 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_15 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_16 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_17 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_18 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_19 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_20 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_21 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_22 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_23 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_24 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_25 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_26 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_27 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_28 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_29 = _mm256_set_pd(0.0,0.0,0.0,0.0); + for (int64_t q = 0; q < 5; q += 1) + { + const __m256d tmp_qloop_0 = _mm256_mul_pd(_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])); + const __m256d tmp_qloop_1 = _mm256_mul_pd(tmp_qloop_0,_mm256_set_pd(2.0,2.0,2.0,2.0)); + const __m256d tmp_qloop_2 = _mm256_mul_pd(_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])); + const __m256d tmp_qloop_3 = _mm256_mul_pd(tmp_qloop_2,_mm256_set_pd(2.0,2.0,2.0,2.0)); + const __m256d tmp_qloop_4 = _mm256_mul_pd(_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q])); + const __m256d tmp_qloop_5 = _mm256_mul_pd(tmp_qloop_4,_mm256_set_pd(2.0,2.0,2.0,2.0)); + const __m256d tmp_qloop_6 = _mm256_mul_pd(_mm256_set_pd(4.0,4.0,4.0,4.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])); + const __m256d tmp_qloop_7 = _mm256_mul_pd(tmp_qloop_6,_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q])); + const __m256d tmp_qloop_8 = _mm256_mul_pd(_mm256_set_pd(4.0,4.0,4.0,4.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])); + const __m256d tmp_qloop_9 = _mm256_mul_pd(tmp_qloop_8,_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])); + const __m256d tmp_qloop_10 = _mm256_mul_pd(tmp_qloop_8,_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q])); + const __m256d tmp_qloop_11 = _mm256_add_pd(tmp_qloop_10,tmp_qloop_9); + const __m256d tmp_qloop_12 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),_mm256_mul_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),tmp_qloop_1),tmp_qloop_11),tmp_qloop_3),tmp_qloop_5),tmp_qloop_7); + const __m256d tmp_qloop_13 = _mm256_mul_pd(_mm256_set_pd(4.0,4.0,4.0,4.0),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q])); + const __m256d tmp_qloop_14 = _mm256_add_pd(tmp_qloop_13,tmp_qloop_6); + const __m256d tmp_qloop_15 = _mm256_mul_pd(_mm256_mul_pd(rho_dof_8,tmp_qloop_6),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)); + const __m256d tmp_qloop_16 = _mm256_mul_pd(rho_dof_0,_mm256_add_pd(_mm256_add_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),tmp_qloop_14),tmp_qloop_8)); + const __m256d tmp_qloop_17 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(rho_dof_7,tmp_qloop_13),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),tmp_qloop_16); + const __m256d tmp_qloop_18 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(rho_dof_1,_mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),tmp_qloop_8)),_mm256_mul_pd(rho_dof_9,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_14,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_set_pd(-8.0,-8.0,-8.0,-8.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]))),_mm256_set_pd(4.0,4.0,4.0,4.0)))),_mm256_mul_pd(rho_dof_5,tmp_qloop_13)),_mm256_mul_pd(rho_dof_6,tmp_qloop_6)),tmp_qloop_15),tmp_qloop_17); + const __m256d tmp_qloop_19 = _mm256_add_pd(_mm256_set_pd(-4.0,-4.0,-4.0,-4.0),tmp_qloop_8); + const __m256d tmp_qloop_20 = _mm256_mul_pd(_mm256_mul_pd(rho_dof_9,tmp_qloop_8),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)); + const __m256d tmp_qloop_21 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(rho_dof_2,_mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),tmp_qloop_6)),_mm256_mul_pd(rho_dof_8,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_13,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_19,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(_mm256_set_pd(-8.0,-8.0,-8.0,-8.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))))),_mm256_mul_pd(rho_dof_4,tmp_qloop_13)),_mm256_mul_pd(rho_dof_6,tmp_qloop_8)),tmp_qloop_17),tmp_qloop_20); + const __m256d tmp_qloop_22 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(rho_dof_3,_mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),tmp_qloop_13)),_mm256_mul_pd(rho_dof_7,_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_19,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_6,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(_mm256_set_pd(-8.0,-8.0,-8.0,-8.0),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]))))),_mm256_mul_pd(rho_dof_4,tmp_qloop_6)),_mm256_mul_pd(rho_dof_5,tmp_qloop_8)),tmp_qloop_15),tmp_qloop_16),tmp_qloop_20); + const __m256d tmp_qloop_23 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_18,_mm256_set_pd(jac_affine_inv_0_0_GREEN_DOWN,jac_affine_inv_0_0_GREEN_DOWN,jac_affine_inv_0_0_GREEN_DOWN,jac_affine_inv_0_0_GREEN_DOWN)),_mm256_mul_pd(tmp_qloop_21,_mm256_set_pd(jac_affine_inv_1_0_GREEN_DOWN,jac_affine_inv_1_0_GREEN_DOWN,jac_affine_inv_1_0_GREEN_DOWN,jac_affine_inv_1_0_GREEN_DOWN))),_mm256_mul_pd(tmp_qloop_22,_mm256_set_pd(jac_affine_inv_2_0_GREEN_DOWN,jac_affine_inv_2_0_GREEN_DOWN,jac_affine_inv_2_0_GREEN_DOWN,jac_affine_inv_2_0_GREEN_DOWN))); + const __m256d tmp_qloop_24 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),tmp_qloop_1); + const __m256d tmp_qloop_25 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])),tmp_qloop_3); + const __m256d tmp_qloop_26 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q])),tmp_qloop_5); + const __m256d tmp_qloop_27 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_10,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_7,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(tmp_qloop_4,_mm256_set_pd(-4.0,-4.0,-4.0,-4.0))),tmp_qloop_13); + const __m256d tmp_qloop_28 = _mm256_mul_pd(tmp_qloop_2,_mm256_set_pd(4.0,4.0,4.0,4.0)); + const __m256d tmp_qloop_29 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_28,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_7,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(tmp_qloop_9,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_6); + const __m256d tmp_qloop_30 = _mm256_mul_pd(tmp_qloop_0,_mm256_set_pd(4.0,4.0,4.0,4.0)); + const __m256d tmp_qloop_31 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_11,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_30,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_8); + const __m256d tmp_qloop_32 = _mm256_mul_pd(_mm256_mul_pd(_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(rho_dof_0,tmp_qloop_12),_mm256_mul_pd(rho_dof_1,tmp_qloop_24)),_mm256_mul_pd(rho_dof_2,tmp_qloop_25)),_mm256_mul_pd(rho_dof_3,tmp_qloop_26)),_mm256_mul_pd(rho_dof_4,tmp_qloop_7)),_mm256_mul_pd(rho_dof_5,tmp_qloop_10)),_mm256_mul_pd(rho_dof_6,tmp_qloop_9)),_mm256_mul_pd(rho_dof_7,tmp_qloop_27)),_mm256_mul_pd(rho_dof_8,tmp_qloop_29)),_mm256_mul_pd(rho_dof_9,tmp_qloop_31))),_mm256_set_pd(_data_q_w[q],_data_q_w[q],_data_q_w[q],_data_q_w[q])),_mm256_set_pd(abs_det_jac_affine_GREEN_DOWN,abs_det_jac_affine_GREEN_DOWN,abs_det_jac_affine_GREEN_DOWN,abs_det_jac_affine_GREEN_DOWN)); + const __m256d tmp_qloop_33 = _mm256_mul_pd(tmp_qloop_32,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q]))),_mm256_set_pd(1.0,1.0,1.0,1.0))); + const __m256d tmp_qloop_34 = _mm256_mul_pd(tmp_qloop_23,tmp_qloop_33); + const __m256d tmp_qloop_35 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_18,_mm256_set_pd(jac_affine_inv_0_1_GREEN_DOWN,jac_affine_inv_0_1_GREEN_DOWN,jac_affine_inv_0_1_GREEN_DOWN,jac_affine_inv_0_1_GREEN_DOWN)),_mm256_mul_pd(tmp_qloop_21,_mm256_set_pd(jac_affine_inv_1_1_GREEN_DOWN,jac_affine_inv_1_1_GREEN_DOWN,jac_affine_inv_1_1_GREEN_DOWN,jac_affine_inv_1_1_GREEN_DOWN))),_mm256_mul_pd(tmp_qloop_22,_mm256_set_pd(jac_affine_inv_2_1_GREEN_DOWN,jac_affine_inv_2_1_GREEN_DOWN,jac_affine_inv_2_1_GREEN_DOWN,jac_affine_inv_2_1_GREEN_DOWN))); + const __m256d tmp_qloop_36 = _mm256_mul_pd(tmp_qloop_33,tmp_qloop_35); + const __m256d tmp_qloop_37 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_18,_mm256_set_pd(jac_affine_inv_0_2_GREEN_DOWN,jac_affine_inv_0_2_GREEN_DOWN,jac_affine_inv_0_2_GREEN_DOWN,jac_affine_inv_0_2_GREEN_DOWN)),_mm256_mul_pd(tmp_qloop_21,_mm256_set_pd(jac_affine_inv_1_2_GREEN_DOWN,jac_affine_inv_1_2_GREEN_DOWN,jac_affine_inv_1_2_GREEN_DOWN,jac_affine_inv_1_2_GREEN_DOWN))),_mm256_mul_pd(tmp_qloop_22,_mm256_set_pd(jac_affine_inv_2_2_GREEN_DOWN,jac_affine_inv_2_2_GREEN_DOWN,jac_affine_inv_2_2_GREEN_DOWN,jac_affine_inv_2_2_GREEN_DOWN))); + const __m256d tmp_qloop_38 = _mm256_mul_pd(tmp_qloop_33,tmp_qloop_37); + const __m256d tmp_qloop_39 = _mm256_mul_pd(tmp_qloop_32,_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])); + const __m256d tmp_qloop_40 = _mm256_mul_pd(tmp_qloop_23,tmp_qloop_39); + const __m256d tmp_qloop_41 = _mm256_mul_pd(tmp_qloop_32,_mm256_set_pd(_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q],_data_q_p_2[q])); + const __m256d tmp_qloop_42 = _mm256_mul_pd(tmp_qloop_23,tmp_qloop_41); + const __m256d tmp_qloop_43 = _mm256_mul_pd(tmp_qloop_42,tmp_qloop_9); + const __m256d tmp_qloop_44 = _mm256_mul_pd(tmp_qloop_32,_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])); + const __m256d tmp_qloop_45 = _mm256_mul_pd(tmp_qloop_23,tmp_qloop_44); + const __m256d tmp_qloop_46 = _mm256_mul_pd(tmp_qloop_35,tmp_qloop_39); + const __m256d tmp_qloop_47 = _mm256_mul_pd(tmp_qloop_35,tmp_qloop_41); + const __m256d tmp_qloop_48 = _mm256_mul_pd(tmp_qloop_47,tmp_qloop_9); + const __m256d tmp_qloop_49 = _mm256_mul_pd(tmp_qloop_35,tmp_qloop_44); + const __m256d tmp_qloop_50 = _mm256_mul_pd(tmp_qloop_37,tmp_qloop_39); + const __m256d tmp_qloop_51 = _mm256_mul_pd(tmp_qloop_37,tmp_qloop_41); + const __m256d tmp_qloop_52 = _mm256_mul_pd(tmp_qloop_51,tmp_qloop_9); + const __m256d tmp_qloop_53 = _mm256_mul_pd(tmp_qloop_37,tmp_qloop_44); + const __m256d tmp_qloop_54 = _mm256_mul_pd(tmp_qloop_23,tmp_qloop_32); + const __m256d tmp_qloop_55 = _mm256_mul_pd(tmp_qloop_2,tmp_qloop_8); + const __m256d tmp_qloop_56 = _mm256_mul_pd(tmp_qloop_32,tmp_qloop_55); + const __m256d tmp_qloop_57 = _mm256_mul_pd(tmp_qloop_4,tmp_qloop_54); + const __m256d tmp_qloop_58 = _mm256_mul_pd(tmp_qloop_32,tmp_qloop_4); + const __m256d tmp_qloop_59 = _mm256_mul_pd(tmp_qloop_35,tmp_qloop_58); + const __m256d tmp_qloop_60 = _mm256_mul_pd(tmp_qloop_37,tmp_qloop_58); + const __m256d q_tmp_0_0 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_34); + const __m256d q_tmp_0_1 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_34); + const __m256d q_tmp_0_2 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_34); + const __m256d q_tmp_0_3 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_34); + const __m256d q_tmp_0_4 = _mm256_mul_pd(tmp_qloop_34,tmp_qloop_7); + const __m256d q_tmp_0_5 = _mm256_mul_pd(tmp_qloop_10,tmp_qloop_34); + const __m256d q_tmp_0_6 = _mm256_mul_pd(tmp_qloop_34,tmp_qloop_9); + const __m256d q_tmp_0_7 = _mm256_mul_pd(tmp_qloop_27,tmp_qloop_34); + const __m256d q_tmp_0_8 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_34); + const __m256d q_tmp_0_9 = _mm256_mul_pd(tmp_qloop_31,tmp_qloop_34); + const __m256d q_tmp_0_10 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_36); + const __m256d q_tmp_0_11 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_36); + const __m256d q_tmp_0_12 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_36); + const __m256d q_tmp_0_13 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_36); + const __m256d q_tmp_0_14 = _mm256_mul_pd(tmp_qloop_36,tmp_qloop_7); + const __m256d q_tmp_0_15 = _mm256_mul_pd(tmp_qloop_10,tmp_qloop_36); + const __m256d q_tmp_0_16 = _mm256_mul_pd(tmp_qloop_36,tmp_qloop_9); + const __m256d q_tmp_0_17 = _mm256_mul_pd(tmp_qloop_27,tmp_qloop_36); + const __m256d q_tmp_0_18 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_36); + const __m256d q_tmp_0_19 = _mm256_mul_pd(tmp_qloop_31,tmp_qloop_36); + const __m256d q_tmp_0_20 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_38); + const __m256d q_tmp_0_21 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_38); + const __m256d q_tmp_0_22 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_38); + const __m256d q_tmp_0_23 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_38); + const __m256d q_tmp_0_24 = _mm256_mul_pd(tmp_qloop_38,tmp_qloop_7); + const __m256d q_tmp_0_25 = _mm256_mul_pd(tmp_qloop_10,tmp_qloop_38); + const __m256d q_tmp_0_26 = _mm256_mul_pd(tmp_qloop_38,tmp_qloop_9); + const __m256d q_tmp_0_27 = _mm256_mul_pd(tmp_qloop_27,tmp_qloop_38); + const __m256d q_tmp_0_28 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_38); + const __m256d q_tmp_0_29 = _mm256_mul_pd(tmp_qloop_31,tmp_qloop_38); + const __m256d q_tmp_1_0 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_40); + const __m256d q_tmp_1_1 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_40); + const __m256d q_tmp_1_2 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_40); + const __m256d q_tmp_1_3 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_40); + const __m256d q_tmp_1_4 = tmp_qloop_43; + const __m256d q_tmp_1_5 = _mm256_mul_pd(tmp_qloop_30,tmp_qloop_42); + const __m256d q_tmp_1_6 = _mm256_mul_pd(tmp_qloop_30,tmp_qloop_45); + const __m256d q_tmp_1_7 = _mm256_mul_pd(tmp_qloop_27,tmp_qloop_40); + const __m256d q_tmp_1_8 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_40); + const __m256d q_tmp_1_9 = _mm256_mul_pd(tmp_qloop_31,tmp_qloop_40); + const __m256d q_tmp_1_10 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_46); + const __m256d q_tmp_1_11 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_46); + const __m256d q_tmp_1_12 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_46); + const __m256d q_tmp_1_13 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_46); + const __m256d q_tmp_1_14 = tmp_qloop_48; + const __m256d q_tmp_1_15 = _mm256_mul_pd(tmp_qloop_30,tmp_qloop_47); + const __m256d q_tmp_1_16 = _mm256_mul_pd(tmp_qloop_30,tmp_qloop_49); + const __m256d q_tmp_1_17 = _mm256_mul_pd(tmp_qloop_27,tmp_qloop_46); + const __m256d q_tmp_1_18 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_46); + const __m256d q_tmp_1_19 = _mm256_mul_pd(tmp_qloop_31,tmp_qloop_46); + const __m256d q_tmp_1_20 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_50); + const __m256d q_tmp_1_21 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_50); + const __m256d q_tmp_1_22 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_50); + const __m256d q_tmp_1_23 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_50); + const __m256d q_tmp_1_24 = tmp_qloop_52; + const __m256d q_tmp_1_25 = _mm256_mul_pd(tmp_qloop_30,tmp_qloop_51); + const __m256d q_tmp_1_26 = _mm256_mul_pd(tmp_qloop_30,tmp_qloop_53); + const __m256d q_tmp_1_27 = _mm256_mul_pd(tmp_qloop_27,tmp_qloop_50); + const __m256d q_tmp_1_28 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_50); + const __m256d q_tmp_1_29 = _mm256_mul_pd(tmp_qloop_31,tmp_qloop_50); + const __m256d q_tmp_2_0 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_45); + const __m256d q_tmp_2_1 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_45); + const __m256d q_tmp_2_2 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_45); + const __m256d q_tmp_2_3 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_45); + const __m256d q_tmp_2_4 = _mm256_mul_pd(tmp_qloop_28,tmp_qloop_42); + const __m256d q_tmp_2_5 = tmp_qloop_43; + const __m256d q_tmp_2_6 = _mm256_mul_pd(tmp_qloop_54,tmp_qloop_55); + const __m256d q_tmp_2_7 = _mm256_mul_pd(tmp_qloop_27,tmp_qloop_45); + const __m256d q_tmp_2_8 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_45); + const __m256d q_tmp_2_9 = _mm256_mul_pd(tmp_qloop_31,tmp_qloop_45); + const __m256d q_tmp_2_10 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_49); + const __m256d q_tmp_2_11 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_49); + const __m256d q_tmp_2_12 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_49); + const __m256d q_tmp_2_13 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_49); + const __m256d q_tmp_2_14 = _mm256_mul_pd(tmp_qloop_28,tmp_qloop_47); + const __m256d q_tmp_2_15 = tmp_qloop_48; + const __m256d q_tmp_2_16 = _mm256_mul_pd(tmp_qloop_35,tmp_qloop_56); + const __m256d q_tmp_2_17 = _mm256_mul_pd(tmp_qloop_27,tmp_qloop_49); + const __m256d q_tmp_2_18 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_49); + const __m256d q_tmp_2_19 = _mm256_mul_pd(tmp_qloop_31,tmp_qloop_49); + const __m256d q_tmp_2_20 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_53); + const __m256d q_tmp_2_21 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_53); + const __m256d q_tmp_2_22 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_53); + const __m256d q_tmp_2_23 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_53); + const __m256d q_tmp_2_24 = _mm256_mul_pd(tmp_qloop_28,tmp_qloop_51); + const __m256d q_tmp_2_25 = tmp_qloop_52; + const __m256d q_tmp_2_26 = _mm256_mul_pd(tmp_qloop_37,tmp_qloop_56); + const __m256d q_tmp_2_27 = _mm256_mul_pd(tmp_qloop_27,tmp_qloop_53); + const __m256d q_tmp_2_28 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_53); + const __m256d q_tmp_2_29 = _mm256_mul_pd(tmp_qloop_31,tmp_qloop_53); + const __m256d q_tmp_3_0 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_42); + const __m256d q_tmp_3_1 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_42); + const __m256d q_tmp_3_2 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_42); + const __m256d q_tmp_3_3 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_42); + const __m256d q_tmp_3_4 = _mm256_mul_pd(tmp_qloop_57,tmp_qloop_6); + const __m256d q_tmp_3_5 = _mm256_mul_pd(tmp_qloop_57,tmp_qloop_8); + const __m256d q_tmp_3_6 = tmp_qloop_43; + const __m256d q_tmp_3_7 = _mm256_mul_pd(tmp_qloop_27,tmp_qloop_42); + const __m256d q_tmp_3_8 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_42); + const __m256d q_tmp_3_9 = _mm256_mul_pd(tmp_qloop_31,tmp_qloop_42); + const __m256d q_tmp_3_10 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_47); + const __m256d q_tmp_3_11 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_47); + const __m256d q_tmp_3_12 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_47); + const __m256d q_tmp_3_13 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_47); + const __m256d q_tmp_3_14 = _mm256_mul_pd(tmp_qloop_59,tmp_qloop_6); + const __m256d q_tmp_3_15 = _mm256_mul_pd(tmp_qloop_59,tmp_qloop_8); + const __m256d q_tmp_3_16 = tmp_qloop_48; + const __m256d q_tmp_3_17 = _mm256_mul_pd(tmp_qloop_27,tmp_qloop_47); + const __m256d q_tmp_3_18 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_47); + const __m256d q_tmp_3_19 = _mm256_mul_pd(tmp_qloop_31,tmp_qloop_47); + const __m256d q_tmp_3_20 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_51); + const __m256d q_tmp_3_21 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_51); + const __m256d q_tmp_3_22 = _mm256_mul_pd(tmp_qloop_25,tmp_qloop_51); + const __m256d q_tmp_3_23 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_51); + const __m256d q_tmp_3_24 = _mm256_mul_pd(tmp_qloop_6,tmp_qloop_60); + const __m256d q_tmp_3_25 = _mm256_mul_pd(tmp_qloop_60,tmp_qloop_8); + const __m256d q_tmp_3_26 = tmp_qloop_52; + const __m256d q_tmp_3_27 = _mm256_mul_pd(tmp_qloop_27,tmp_qloop_51); + const __m256d q_tmp_3_28 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_51); + const __m256d q_tmp_3_29 = _mm256_mul_pd(tmp_qloop_31,tmp_qloop_51); + q_acc_0_0 = _mm256_add_pd(q_acc_0_0,q_tmp_0_0); + q_acc_0_1 = _mm256_add_pd(q_acc_0_1,q_tmp_0_1); + q_acc_0_2 = _mm256_add_pd(q_acc_0_2,q_tmp_0_2); + q_acc_0_3 = _mm256_add_pd(q_acc_0_3,q_tmp_0_3); + q_acc_0_4 = _mm256_add_pd(q_acc_0_4,q_tmp_0_4); + q_acc_0_5 = _mm256_add_pd(q_acc_0_5,q_tmp_0_5); + q_acc_0_6 = _mm256_add_pd(q_acc_0_6,q_tmp_0_6); + q_acc_0_7 = _mm256_add_pd(q_acc_0_7,q_tmp_0_7); + q_acc_0_8 = _mm256_add_pd(q_acc_0_8,q_tmp_0_8); + q_acc_0_9 = _mm256_add_pd(q_acc_0_9,q_tmp_0_9); + q_acc_0_10 = _mm256_add_pd(q_acc_0_10,q_tmp_0_10); + q_acc_0_11 = _mm256_add_pd(q_acc_0_11,q_tmp_0_11); + q_acc_0_12 = _mm256_add_pd(q_acc_0_12,q_tmp_0_12); + q_acc_0_13 = _mm256_add_pd(q_acc_0_13,q_tmp_0_13); + q_acc_0_14 = _mm256_add_pd(q_acc_0_14,q_tmp_0_14); + q_acc_0_15 = _mm256_add_pd(q_acc_0_15,q_tmp_0_15); + q_acc_0_16 = _mm256_add_pd(q_acc_0_16,q_tmp_0_16); + q_acc_0_17 = _mm256_add_pd(q_acc_0_17,q_tmp_0_17); + q_acc_0_18 = _mm256_add_pd(q_acc_0_18,q_tmp_0_18); + q_acc_0_19 = _mm256_add_pd(q_acc_0_19,q_tmp_0_19); + q_acc_0_20 = _mm256_add_pd(q_acc_0_20,q_tmp_0_20); + q_acc_0_21 = _mm256_add_pd(q_acc_0_21,q_tmp_0_21); + q_acc_0_22 = _mm256_add_pd(q_acc_0_22,q_tmp_0_22); + q_acc_0_23 = _mm256_add_pd(q_acc_0_23,q_tmp_0_23); + q_acc_0_24 = _mm256_add_pd(q_acc_0_24,q_tmp_0_24); + q_acc_0_25 = _mm256_add_pd(q_acc_0_25,q_tmp_0_25); + q_acc_0_26 = _mm256_add_pd(q_acc_0_26,q_tmp_0_26); + q_acc_0_27 = _mm256_add_pd(q_acc_0_27,q_tmp_0_27); + q_acc_0_28 = _mm256_add_pd(q_acc_0_28,q_tmp_0_28); + q_acc_0_29 = _mm256_add_pd(q_acc_0_29,q_tmp_0_29); + q_acc_1_0 = _mm256_add_pd(q_acc_1_0,q_tmp_1_0); + q_acc_1_1 = _mm256_add_pd(q_acc_1_1,q_tmp_1_1); + q_acc_1_2 = _mm256_add_pd(q_acc_1_2,q_tmp_1_2); + q_acc_1_3 = _mm256_add_pd(q_acc_1_3,q_tmp_1_3); + q_acc_1_4 = _mm256_add_pd(q_acc_1_4,q_tmp_1_4); + q_acc_1_5 = _mm256_add_pd(q_acc_1_5,q_tmp_1_5); + q_acc_1_6 = _mm256_add_pd(q_acc_1_6,q_tmp_1_6); + q_acc_1_7 = _mm256_add_pd(q_acc_1_7,q_tmp_1_7); + q_acc_1_8 = _mm256_add_pd(q_acc_1_8,q_tmp_1_8); + q_acc_1_9 = _mm256_add_pd(q_acc_1_9,q_tmp_1_9); + q_acc_1_10 = _mm256_add_pd(q_acc_1_10,q_tmp_1_10); + q_acc_1_11 = _mm256_add_pd(q_acc_1_11,q_tmp_1_11); + q_acc_1_12 = _mm256_add_pd(q_acc_1_12,q_tmp_1_12); + q_acc_1_13 = _mm256_add_pd(q_acc_1_13,q_tmp_1_13); + q_acc_1_14 = _mm256_add_pd(q_acc_1_14,q_tmp_1_14); + q_acc_1_15 = _mm256_add_pd(q_acc_1_15,q_tmp_1_15); + q_acc_1_16 = _mm256_add_pd(q_acc_1_16,q_tmp_1_16); + q_acc_1_17 = _mm256_add_pd(q_acc_1_17,q_tmp_1_17); + q_acc_1_18 = _mm256_add_pd(q_acc_1_18,q_tmp_1_18); + q_acc_1_19 = _mm256_add_pd(q_acc_1_19,q_tmp_1_19); + q_acc_1_20 = _mm256_add_pd(q_acc_1_20,q_tmp_1_20); + q_acc_1_21 = _mm256_add_pd(q_acc_1_21,q_tmp_1_21); + q_acc_1_22 = _mm256_add_pd(q_acc_1_22,q_tmp_1_22); + q_acc_1_23 = _mm256_add_pd(q_acc_1_23,q_tmp_1_23); + q_acc_1_24 = _mm256_add_pd(q_acc_1_24,q_tmp_1_24); + q_acc_1_25 = _mm256_add_pd(q_acc_1_25,q_tmp_1_25); + q_acc_1_26 = _mm256_add_pd(q_acc_1_26,q_tmp_1_26); + q_acc_1_27 = _mm256_add_pd(q_acc_1_27,q_tmp_1_27); + q_acc_1_28 = _mm256_add_pd(q_acc_1_28,q_tmp_1_28); + q_acc_1_29 = _mm256_add_pd(q_acc_1_29,q_tmp_1_29); + q_acc_2_0 = _mm256_add_pd(q_acc_2_0,q_tmp_2_0); + q_acc_2_1 = _mm256_add_pd(q_acc_2_1,q_tmp_2_1); + q_acc_2_2 = _mm256_add_pd(q_acc_2_2,q_tmp_2_2); + q_acc_2_3 = _mm256_add_pd(q_acc_2_3,q_tmp_2_3); + q_acc_2_4 = _mm256_add_pd(q_acc_2_4,q_tmp_2_4); + q_acc_2_5 = _mm256_add_pd(q_acc_2_5,q_tmp_2_5); + q_acc_2_6 = _mm256_add_pd(q_acc_2_6,q_tmp_2_6); + q_acc_2_7 = _mm256_add_pd(q_acc_2_7,q_tmp_2_7); + q_acc_2_8 = _mm256_add_pd(q_acc_2_8,q_tmp_2_8); + q_acc_2_9 = _mm256_add_pd(q_acc_2_9,q_tmp_2_9); + q_acc_2_10 = _mm256_add_pd(q_acc_2_10,q_tmp_2_10); + q_acc_2_11 = _mm256_add_pd(q_acc_2_11,q_tmp_2_11); + q_acc_2_12 = _mm256_add_pd(q_acc_2_12,q_tmp_2_12); + q_acc_2_13 = _mm256_add_pd(q_acc_2_13,q_tmp_2_13); + q_acc_2_14 = _mm256_add_pd(q_acc_2_14,q_tmp_2_14); + q_acc_2_15 = _mm256_add_pd(q_acc_2_15,q_tmp_2_15); + q_acc_2_16 = _mm256_add_pd(q_acc_2_16,q_tmp_2_16); + q_acc_2_17 = _mm256_add_pd(q_acc_2_17,q_tmp_2_17); + q_acc_2_18 = _mm256_add_pd(q_acc_2_18,q_tmp_2_18); + q_acc_2_19 = _mm256_add_pd(q_acc_2_19,q_tmp_2_19); + q_acc_2_20 = _mm256_add_pd(q_acc_2_20,q_tmp_2_20); + q_acc_2_21 = _mm256_add_pd(q_acc_2_21,q_tmp_2_21); + q_acc_2_22 = _mm256_add_pd(q_acc_2_22,q_tmp_2_22); + q_acc_2_23 = _mm256_add_pd(q_acc_2_23,q_tmp_2_23); + q_acc_2_24 = _mm256_add_pd(q_acc_2_24,q_tmp_2_24); + q_acc_2_25 = _mm256_add_pd(q_acc_2_25,q_tmp_2_25); + q_acc_2_26 = _mm256_add_pd(q_acc_2_26,q_tmp_2_26); + q_acc_2_27 = _mm256_add_pd(q_acc_2_27,q_tmp_2_27); + q_acc_2_28 = _mm256_add_pd(q_acc_2_28,q_tmp_2_28); + q_acc_2_29 = _mm256_add_pd(q_acc_2_29,q_tmp_2_29); + q_acc_3_0 = _mm256_add_pd(q_acc_3_0,q_tmp_3_0); + q_acc_3_1 = _mm256_add_pd(q_acc_3_1,q_tmp_3_1); + q_acc_3_2 = _mm256_add_pd(q_acc_3_2,q_tmp_3_2); + q_acc_3_3 = _mm256_add_pd(q_acc_3_3,q_tmp_3_3); + q_acc_3_4 = _mm256_add_pd(q_acc_3_4,q_tmp_3_4); + q_acc_3_5 = _mm256_add_pd(q_acc_3_5,q_tmp_3_5); + q_acc_3_6 = _mm256_add_pd(q_acc_3_6,q_tmp_3_6); + q_acc_3_7 = _mm256_add_pd(q_acc_3_7,q_tmp_3_7); + q_acc_3_8 = _mm256_add_pd(q_acc_3_8,q_tmp_3_8); + q_acc_3_9 = _mm256_add_pd(q_acc_3_9,q_tmp_3_9); + q_acc_3_10 = _mm256_add_pd(q_acc_3_10,q_tmp_3_10); + q_acc_3_11 = _mm256_add_pd(q_acc_3_11,q_tmp_3_11); + q_acc_3_12 = _mm256_add_pd(q_acc_3_12,q_tmp_3_12); + q_acc_3_13 = _mm256_add_pd(q_acc_3_13,q_tmp_3_13); + q_acc_3_14 = _mm256_add_pd(q_acc_3_14,q_tmp_3_14); + q_acc_3_15 = _mm256_add_pd(q_acc_3_15,q_tmp_3_15); + q_acc_3_16 = _mm256_add_pd(q_acc_3_16,q_tmp_3_16); + q_acc_3_17 = _mm256_add_pd(q_acc_3_17,q_tmp_3_17); + q_acc_3_18 = _mm256_add_pd(q_acc_3_18,q_tmp_3_18); + q_acc_3_19 = _mm256_add_pd(q_acc_3_19,q_tmp_3_19); + q_acc_3_20 = _mm256_add_pd(q_acc_3_20,q_tmp_3_20); + q_acc_3_21 = _mm256_add_pd(q_acc_3_21,q_tmp_3_21); + q_acc_3_22 = _mm256_add_pd(q_acc_3_22,q_tmp_3_22); + q_acc_3_23 = _mm256_add_pd(q_acc_3_23,q_tmp_3_23); + q_acc_3_24 = _mm256_add_pd(q_acc_3_24,q_tmp_3_24); + q_acc_3_25 = _mm256_add_pd(q_acc_3_25,q_tmp_3_25); + q_acc_3_26 = _mm256_add_pd(q_acc_3_26,q_tmp_3_26); + q_acc_3_27 = _mm256_add_pd(q_acc_3_27,q_tmp_3_27); + q_acc_3_28 = _mm256_add_pd(q_acc_3_28,q_tmp_3_28); + q_acc_3_29 = _mm256_add_pd(q_acc_3_29,q_tmp_3_29); + } + const __m256d elMatVec_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_0_0,src_dof_0),_mm256_mul_pd(q_acc_0_1,src_dof_1)),_mm256_mul_pd(q_acc_0_10,src_dof_10)),_mm256_mul_pd(q_acc_0_11,src_dof_11)),_mm256_mul_pd(q_acc_0_12,src_dof_12)),_mm256_mul_pd(q_acc_0_13,src_dof_13)),_mm256_mul_pd(q_acc_0_14,src_dof_14)),_mm256_mul_pd(q_acc_0_15,src_dof_15)),_mm256_mul_pd(q_acc_0_16,src_dof_16)),_mm256_mul_pd(q_acc_0_17,src_dof_17)),_mm256_mul_pd(q_acc_0_18,src_dof_18)),_mm256_mul_pd(q_acc_0_19,src_dof_19)),_mm256_mul_pd(q_acc_0_2,src_dof_2)),_mm256_mul_pd(q_acc_0_20,src_dof_20)),_mm256_mul_pd(q_acc_0_21,src_dof_21)),_mm256_mul_pd(q_acc_0_22,src_dof_22)),_mm256_mul_pd(q_acc_0_23,src_dof_23)),_mm256_mul_pd(q_acc_0_24,src_dof_24)),_mm256_mul_pd(q_acc_0_25,src_dof_25)),_mm256_mul_pd(q_acc_0_26,src_dof_26)),_mm256_mul_pd(q_acc_0_27,src_dof_27)),_mm256_mul_pd(q_acc_0_28,src_dof_28)),_mm256_mul_pd(q_acc_0_29,src_dof_29)),_mm256_mul_pd(q_acc_0_3,src_dof_3)),_mm256_mul_pd(q_acc_0_4,src_dof_4)),_mm256_mul_pd(q_acc_0_5,src_dof_5)),_mm256_mul_pd(q_acc_0_6,src_dof_6)),_mm256_mul_pd(q_acc_0_7,src_dof_7)),_mm256_mul_pd(q_acc_0_8,src_dof_8)),_mm256_mul_pd(q_acc_0_9,src_dof_9)); + const __m256d elMatVec_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_1_0,src_dof_0),_mm256_mul_pd(q_acc_1_1,src_dof_1)),_mm256_mul_pd(q_acc_1_10,src_dof_10)),_mm256_mul_pd(q_acc_1_11,src_dof_11)),_mm256_mul_pd(q_acc_1_12,src_dof_12)),_mm256_mul_pd(q_acc_1_13,src_dof_13)),_mm256_mul_pd(q_acc_1_14,src_dof_14)),_mm256_mul_pd(q_acc_1_15,src_dof_15)),_mm256_mul_pd(q_acc_1_16,src_dof_16)),_mm256_mul_pd(q_acc_1_17,src_dof_17)),_mm256_mul_pd(q_acc_1_18,src_dof_18)),_mm256_mul_pd(q_acc_1_19,src_dof_19)),_mm256_mul_pd(q_acc_1_2,src_dof_2)),_mm256_mul_pd(q_acc_1_20,src_dof_20)),_mm256_mul_pd(q_acc_1_21,src_dof_21)),_mm256_mul_pd(q_acc_1_22,src_dof_22)),_mm256_mul_pd(q_acc_1_23,src_dof_23)),_mm256_mul_pd(q_acc_1_24,src_dof_24)),_mm256_mul_pd(q_acc_1_25,src_dof_25)),_mm256_mul_pd(q_acc_1_26,src_dof_26)),_mm256_mul_pd(q_acc_1_27,src_dof_27)),_mm256_mul_pd(q_acc_1_28,src_dof_28)),_mm256_mul_pd(q_acc_1_29,src_dof_29)),_mm256_mul_pd(q_acc_1_3,src_dof_3)),_mm256_mul_pd(q_acc_1_4,src_dof_4)),_mm256_mul_pd(q_acc_1_5,src_dof_5)),_mm256_mul_pd(q_acc_1_6,src_dof_6)),_mm256_mul_pd(q_acc_1_7,src_dof_7)),_mm256_mul_pd(q_acc_1_8,src_dof_8)),_mm256_mul_pd(q_acc_1_9,src_dof_9)); + const __m256d elMatVec_2 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_2_0,src_dof_0),_mm256_mul_pd(q_acc_2_1,src_dof_1)),_mm256_mul_pd(q_acc_2_10,src_dof_10)),_mm256_mul_pd(q_acc_2_11,src_dof_11)),_mm256_mul_pd(q_acc_2_12,src_dof_12)),_mm256_mul_pd(q_acc_2_13,src_dof_13)),_mm256_mul_pd(q_acc_2_14,src_dof_14)),_mm256_mul_pd(q_acc_2_15,src_dof_15)),_mm256_mul_pd(q_acc_2_16,src_dof_16)),_mm256_mul_pd(q_acc_2_17,src_dof_17)),_mm256_mul_pd(q_acc_2_18,src_dof_18)),_mm256_mul_pd(q_acc_2_19,src_dof_19)),_mm256_mul_pd(q_acc_2_2,src_dof_2)),_mm256_mul_pd(q_acc_2_20,src_dof_20)),_mm256_mul_pd(q_acc_2_21,src_dof_21)),_mm256_mul_pd(q_acc_2_22,src_dof_22)),_mm256_mul_pd(q_acc_2_23,src_dof_23)),_mm256_mul_pd(q_acc_2_24,src_dof_24)),_mm256_mul_pd(q_acc_2_25,src_dof_25)),_mm256_mul_pd(q_acc_2_26,src_dof_26)),_mm256_mul_pd(q_acc_2_27,src_dof_27)),_mm256_mul_pd(q_acc_2_28,src_dof_28)),_mm256_mul_pd(q_acc_2_29,src_dof_29)),_mm256_mul_pd(q_acc_2_3,src_dof_3)),_mm256_mul_pd(q_acc_2_4,src_dof_4)),_mm256_mul_pd(q_acc_2_5,src_dof_5)),_mm256_mul_pd(q_acc_2_6,src_dof_6)),_mm256_mul_pd(q_acc_2_7,src_dof_7)),_mm256_mul_pd(q_acc_2_8,src_dof_8)),_mm256_mul_pd(q_acc_2_9,src_dof_9)); + const __m256d elMatVec_3 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_3_0,src_dof_0),_mm256_mul_pd(q_acc_3_1,src_dof_1)),_mm256_mul_pd(q_acc_3_10,src_dof_10)),_mm256_mul_pd(q_acc_3_11,src_dof_11)),_mm256_mul_pd(q_acc_3_12,src_dof_12)),_mm256_mul_pd(q_acc_3_13,src_dof_13)),_mm256_mul_pd(q_acc_3_14,src_dof_14)),_mm256_mul_pd(q_acc_3_15,src_dof_15)),_mm256_mul_pd(q_acc_3_16,src_dof_16)),_mm256_mul_pd(q_acc_3_17,src_dof_17)),_mm256_mul_pd(q_acc_3_18,src_dof_18)),_mm256_mul_pd(q_acc_3_19,src_dof_19)),_mm256_mul_pd(q_acc_3_2,src_dof_2)),_mm256_mul_pd(q_acc_3_20,src_dof_20)),_mm256_mul_pd(q_acc_3_21,src_dof_21)),_mm256_mul_pd(q_acc_3_22,src_dof_22)),_mm256_mul_pd(q_acc_3_23,src_dof_23)),_mm256_mul_pd(q_acc_3_24,src_dof_24)),_mm256_mul_pd(q_acc_3_25,src_dof_25)),_mm256_mul_pd(q_acc_3_26,src_dof_26)),_mm256_mul_pd(q_acc_3_27,src_dof_27)),_mm256_mul_pd(q_acc_3_28,src_dof_28)),_mm256_mul_pd(q_acc_3_29,src_dof_29)),_mm256_mul_pd(q_acc_3_3,src_dof_3)),_mm256_mul_pd(q_acc_3_4,src_dof_4)),_mm256_mul_pd(q_acc_3_5,src_dof_5)),_mm256_mul_pd(q_acc_3_6,src_dof_6)),_mm256_mul_pd(q_acc_3_7,src_dof_7)),_mm256_mul_pd(q_acc_3_8,src_dof_8)),_mm256_mul_pd(q_acc_3_9,src_dof_9)); + _mm256_storeu_pd(&_data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))],_mm256_add_pd(elMatVec_0,_mm256_loadu_pd(& _data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]))); + _mm256_storeu_pd(&_data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1],_mm256_add_pd(elMatVec_1,_mm256_loadu_pd(& _data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]))); + _mm256_storeu_pd(&_data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1],_mm256_add_pd(elMatVec_2,_mm256_loadu_pd(& _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]))); + _mm256_storeu_pd(&_data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))],_mm256_add_pd(elMatVec_3,_mm256_loadu_pd(& _data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]))); + } + for (int64_t ctr_0 = (int64_t)((-ctr_1 - ctr_2 + micro_edges_per_macro_edge - 1) / (4)) * (4); ctr_0 < -ctr_1 - ctr_2 + micro_edges_per_macro_edge - 1; ctr_0 += 1) + { + + const int64_t phantom_ctr_0 = ctr_0; + real_t _data_float_loop_ctr_array_dim_0[4]; + _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0; + _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1; + _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2; + _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3; + real_t _data_float_loop_ctr_array_dim_1[4]; + _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1; + real_t _data_float_loop_ctr_array_dim_2[4]; + _data_float_loop_ctr_array_dim_2[0] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[1] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[2] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[3] = (real_t) ctr_2; + + const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_0_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_2_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_3_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_3_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_3_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t src_dof_0 = _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]; + const real_t src_dof_1 = _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]; + const real_t src_dof_2 = _data_src_vertex_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]; + const real_t src_dof_3 = _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]; + const real_t src_dof_4 = _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]; + const real_t src_dof_5 = _data_src_edge_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 5*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t src_dof_6 = _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]; + const real_t src_dof_7 = _data_src_edge_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t src_dof_8 = _data_src_edge_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge - 1)*(micro_edges_per_macro_edge + 1)) / (6)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]; + const real_t src_dof_9 = _data_src_edge_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t src_dof_10 = _data_src_vertex_1[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]; + const real_t src_dof_11 = _data_src_vertex_1[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]; + const real_t src_dof_12 = _data_src_vertex_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]; + const real_t src_dof_13 = _data_src_vertex_1[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]; + const real_t src_dof_14 = _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]; + const real_t src_dof_15 = _data_src_edge_1[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 5*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t src_dof_16 = _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]; + const real_t src_dof_17 = _data_src_edge_1[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t src_dof_18 = _data_src_edge_1[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge - 1)*(micro_edges_per_macro_edge + 1)) / (6)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]; + const real_t src_dof_19 = _data_src_edge_1[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t src_dof_20 = _data_src_vertex_2[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]; + const real_t src_dof_21 = _data_src_vertex_2[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]; + const real_t src_dof_22 = _data_src_vertex_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]; + const real_t src_dof_23 = _data_src_vertex_2[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]; + const real_t src_dof_24 = _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]; + const real_t src_dof_25 = _data_src_edge_2[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 5*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t src_dof_26 = _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]; + const real_t src_dof_27 = _data_src_edge_2[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t src_dof_28 = _data_src_edge_2[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge - 1)*(micro_edges_per_macro_edge + 1)) / (6)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]; + const real_t src_dof_29 = _data_src_edge_2[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t rho_dof_0 = _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]; + const real_t rho_dof_1 = _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]; + const real_t rho_dof_2 = _data_rhoVertex[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]; + const real_t rho_dof_3 = _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]; + const real_t rho_dof_4 = _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + 4*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]; + const real_t rho_dof_5 = _data_rhoEdge[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 5*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t rho_dof_6 = _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + 1]; + const real_t rho_dof_7 = _data_rhoEdge[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 3*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + const real_t rho_dof_8 = _data_rhoEdge[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge - 1)*(micro_edges_per_macro_edge + 1)) / (6)) + 6*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge - 1)*(-ctr_2 + micro_edges_per_macro_edge + 1)) / (6))]; + const real_t rho_dof_9 = _data_rhoEdge[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6))]; + real_t q_acc_0_0 = 0.0; + real_t q_acc_0_1 = 0.0; + real_t q_acc_0_2 = 0.0; + real_t q_acc_0_3 = 0.0; + real_t q_acc_0_4 = 0.0; + real_t q_acc_0_5 = 0.0; + real_t q_acc_0_6 = 0.0; + real_t q_acc_0_7 = 0.0; + real_t q_acc_0_8 = 0.0; + real_t q_acc_0_9 = 0.0; + real_t q_acc_0_10 = 0.0; + real_t q_acc_0_11 = 0.0; + real_t q_acc_0_12 = 0.0; + real_t q_acc_0_13 = 0.0; + real_t q_acc_0_14 = 0.0; + real_t q_acc_0_15 = 0.0; + real_t q_acc_0_16 = 0.0; + real_t q_acc_0_17 = 0.0; + real_t q_acc_0_18 = 0.0; + real_t q_acc_0_19 = 0.0; + real_t q_acc_0_20 = 0.0; + real_t q_acc_0_21 = 0.0; + real_t q_acc_0_22 = 0.0; + real_t q_acc_0_23 = 0.0; + real_t q_acc_0_24 = 0.0; + real_t q_acc_0_25 = 0.0; + real_t q_acc_0_26 = 0.0; + real_t q_acc_0_27 = 0.0; + real_t q_acc_0_28 = 0.0; + real_t q_acc_0_29 = 0.0; + real_t q_acc_1_0 = 0.0; + real_t q_acc_1_1 = 0.0; + real_t q_acc_1_2 = 0.0; + real_t q_acc_1_3 = 0.0; + real_t q_acc_1_4 = 0.0; + real_t q_acc_1_5 = 0.0; + real_t q_acc_1_6 = 0.0; + real_t q_acc_1_7 = 0.0; + real_t q_acc_1_8 = 0.0; + real_t q_acc_1_9 = 0.0; + real_t q_acc_1_10 = 0.0; + real_t q_acc_1_11 = 0.0; + real_t q_acc_1_12 = 0.0; + real_t q_acc_1_13 = 0.0; + real_t q_acc_1_14 = 0.0; + real_t q_acc_1_15 = 0.0; + real_t q_acc_1_16 = 0.0; + real_t q_acc_1_17 = 0.0; + real_t q_acc_1_18 = 0.0; + real_t q_acc_1_19 = 0.0; + real_t q_acc_1_20 = 0.0; + real_t q_acc_1_21 = 0.0; + real_t q_acc_1_22 = 0.0; + real_t q_acc_1_23 = 0.0; + real_t q_acc_1_24 = 0.0; + real_t q_acc_1_25 = 0.0; + real_t q_acc_1_26 = 0.0; + real_t q_acc_1_27 = 0.0; + real_t q_acc_1_28 = 0.0; + real_t q_acc_1_29 = 0.0; + real_t q_acc_2_0 = 0.0; + real_t q_acc_2_1 = 0.0; + real_t q_acc_2_2 = 0.0; + real_t q_acc_2_3 = 0.0; + real_t q_acc_2_4 = 0.0; + real_t q_acc_2_5 = 0.0; + real_t q_acc_2_6 = 0.0; + real_t q_acc_2_7 = 0.0; + real_t q_acc_2_8 = 0.0; + real_t q_acc_2_9 = 0.0; + real_t q_acc_2_10 = 0.0; + real_t q_acc_2_11 = 0.0; + real_t q_acc_2_12 = 0.0; + real_t q_acc_2_13 = 0.0; + real_t q_acc_2_14 = 0.0; + real_t q_acc_2_15 = 0.0; + real_t q_acc_2_16 = 0.0; + real_t q_acc_2_17 = 0.0; + real_t q_acc_2_18 = 0.0; + real_t q_acc_2_19 = 0.0; + real_t q_acc_2_20 = 0.0; + real_t q_acc_2_21 = 0.0; + real_t q_acc_2_22 = 0.0; + real_t q_acc_2_23 = 0.0; + real_t q_acc_2_24 = 0.0; + real_t q_acc_2_25 = 0.0; + real_t q_acc_2_26 = 0.0; + real_t q_acc_2_27 = 0.0; + real_t q_acc_2_28 = 0.0; + real_t q_acc_2_29 = 0.0; + real_t q_acc_3_0 = 0.0; + real_t q_acc_3_1 = 0.0; + real_t q_acc_3_2 = 0.0; + real_t q_acc_3_3 = 0.0; + real_t q_acc_3_4 = 0.0; + real_t q_acc_3_5 = 0.0; + real_t q_acc_3_6 = 0.0; + real_t q_acc_3_7 = 0.0; + real_t q_acc_3_8 = 0.0; + real_t q_acc_3_9 = 0.0; + real_t q_acc_3_10 = 0.0; + real_t q_acc_3_11 = 0.0; + real_t q_acc_3_12 = 0.0; + real_t q_acc_3_13 = 0.0; + real_t q_acc_3_14 = 0.0; + real_t q_acc_3_15 = 0.0; + real_t q_acc_3_16 = 0.0; + real_t q_acc_3_17 = 0.0; + real_t q_acc_3_18 = 0.0; + real_t q_acc_3_19 = 0.0; + real_t q_acc_3_20 = 0.0; + real_t q_acc_3_21 = 0.0; + real_t q_acc_3_22 = 0.0; + real_t q_acc_3_23 = 0.0; + real_t q_acc_3_24 = 0.0; + real_t q_acc_3_25 = 0.0; + real_t q_acc_3_26 = 0.0; + real_t q_acc_3_27 = 0.0; + real_t q_acc_3_28 = 0.0; + real_t q_acc_3_29 = 0.0; + for (int64_t q = 0; q < 5; q += 1) + { + const real_t tmp_qloop_0 = (_data_q_p_0[q]*_data_q_p_0[q]); + const real_t tmp_qloop_1 = tmp_qloop_0*2.0; + const real_t tmp_qloop_2 = (_data_q_p_1[q]*_data_q_p_1[q]); + const real_t tmp_qloop_3 = tmp_qloop_2*2.0; + const real_t tmp_qloop_4 = (_data_q_p_2[q]*_data_q_p_2[q]); + const real_t tmp_qloop_5 = tmp_qloop_4*2.0; + const real_t tmp_qloop_6 = 4.0*_data_q_p_1[q]; + const real_t tmp_qloop_7 = tmp_qloop_6*_data_q_p_2[q]; + const real_t tmp_qloop_8 = 4.0*_data_q_p_0[q]; + const real_t tmp_qloop_9 = tmp_qloop_8*_data_q_p_1[q]; + const real_t tmp_qloop_10 = tmp_qloop_8*_data_q_p_2[q]; + const real_t tmp_qloop_11 = tmp_qloop_10 + tmp_qloop_9; + const real_t tmp_qloop_12 = tmp_qloop_1 + tmp_qloop_11 + tmp_qloop_3 + tmp_qloop_5 + tmp_qloop_7 - 3.0*_data_q_p_0[q] - 3.0*_data_q_p_1[q] - 3.0*_data_q_p_2[q] + 1.0; + const real_t tmp_qloop_13 = 4.0*_data_q_p_2[q]; + const real_t tmp_qloop_14 = tmp_qloop_13 + tmp_qloop_6; + const real_t tmp_qloop_15 = -rho_dof_8*tmp_qloop_6; + const real_t tmp_qloop_16 = rho_dof_0*(tmp_qloop_14 + tmp_qloop_8 - 3.0); + const real_t tmp_qloop_17 = -rho_dof_7*tmp_qloop_13 + tmp_qloop_16; + const real_t tmp_qloop_18 = rho_dof_1*(tmp_qloop_8 - 1.0) + rho_dof_5*tmp_qloop_13 + rho_dof_6*tmp_qloop_6 + rho_dof_9*(-tmp_qloop_14 - 8.0*_data_q_p_0[q] + 4.0) + tmp_qloop_15 + tmp_qloop_17; + const real_t tmp_qloop_19 = tmp_qloop_8 - 4.0; + const real_t tmp_qloop_20 = -rho_dof_9*tmp_qloop_8; + const real_t tmp_qloop_21 = rho_dof_2*(tmp_qloop_6 - 1.0) + rho_dof_4*tmp_qloop_13 + rho_dof_6*tmp_qloop_8 + rho_dof_8*(-tmp_qloop_13 - tmp_qloop_19 - 8.0*_data_q_p_1[q]) + tmp_qloop_17 + tmp_qloop_20; + const real_t tmp_qloop_22 = rho_dof_3*(tmp_qloop_13 - 1.0) + rho_dof_4*tmp_qloop_6 + rho_dof_5*tmp_qloop_8 + rho_dof_7*(-tmp_qloop_19 - tmp_qloop_6 - 8.0*_data_q_p_2[q]) + tmp_qloop_15 + tmp_qloop_16 + tmp_qloop_20; + const real_t tmp_qloop_23 = jac_affine_inv_0_0_GREEN_DOWN*tmp_qloop_18 + jac_affine_inv_1_0_GREEN_DOWN*tmp_qloop_21 + jac_affine_inv_2_0_GREEN_DOWN*tmp_qloop_22; + const real_t tmp_qloop_24 = tmp_qloop_1 - _data_q_p_0[q]; + const real_t tmp_qloop_25 = tmp_qloop_3 - _data_q_p_1[q]; + const real_t tmp_qloop_26 = tmp_qloop_5 - _data_q_p_2[q]; + const real_t tmp_qloop_27 = -tmp_qloop_10 + tmp_qloop_13 + tmp_qloop_4*-4.0 - tmp_qloop_7; + const real_t tmp_qloop_28 = tmp_qloop_2*4.0; + const real_t tmp_qloop_29 = -tmp_qloop_28 + tmp_qloop_6 - tmp_qloop_7 - tmp_qloop_9; + const real_t tmp_qloop_30 = tmp_qloop_0*4.0; + const real_t tmp_qloop_31 = -tmp_qloop_11 - tmp_qloop_30 + tmp_qloop_8; + const real_t tmp_qloop_32 = abs_det_jac_affine_GREEN_DOWN*1.0 / (rho_dof_0*tmp_qloop_12 + rho_dof_1*tmp_qloop_24 + rho_dof_2*tmp_qloop_25 + rho_dof_3*tmp_qloop_26 + rho_dof_4*tmp_qloop_7 + rho_dof_5*tmp_qloop_10 + rho_dof_6*tmp_qloop_9 + rho_dof_7*tmp_qloop_27 + rho_dof_8*tmp_qloop_29 + rho_dof_9*tmp_qloop_31)*_data_q_w[q]; + const real_t tmp_qloop_33 = tmp_qloop_32*(1.0 - _data_q_p_0[q] - _data_q_p_1[q] - _data_q_p_2[q]); + const real_t tmp_qloop_34 = tmp_qloop_23*tmp_qloop_33; + const real_t tmp_qloop_35 = jac_affine_inv_0_1_GREEN_DOWN*tmp_qloop_18 + jac_affine_inv_1_1_GREEN_DOWN*tmp_qloop_21 + jac_affine_inv_2_1_GREEN_DOWN*tmp_qloop_22; + const real_t tmp_qloop_36 = tmp_qloop_33*tmp_qloop_35; + const real_t tmp_qloop_37 = jac_affine_inv_0_2_GREEN_DOWN*tmp_qloop_18 + jac_affine_inv_1_2_GREEN_DOWN*tmp_qloop_21 + jac_affine_inv_2_2_GREEN_DOWN*tmp_qloop_22; + const real_t tmp_qloop_38 = tmp_qloop_33*tmp_qloop_37; + const real_t tmp_qloop_39 = tmp_qloop_32*_data_q_p_0[q]; + const real_t tmp_qloop_40 = tmp_qloop_23*tmp_qloop_39; + const real_t tmp_qloop_41 = tmp_qloop_32*_data_q_p_2[q]; + const real_t tmp_qloop_42 = tmp_qloop_23*tmp_qloop_41; + const real_t tmp_qloop_43 = tmp_qloop_42*tmp_qloop_9; + const real_t tmp_qloop_44 = tmp_qloop_32*_data_q_p_1[q]; + const real_t tmp_qloop_45 = tmp_qloop_23*tmp_qloop_44; + const real_t tmp_qloop_46 = tmp_qloop_35*tmp_qloop_39; + const real_t tmp_qloop_47 = tmp_qloop_35*tmp_qloop_41; + const real_t tmp_qloop_48 = tmp_qloop_47*tmp_qloop_9; + const real_t tmp_qloop_49 = tmp_qloop_35*tmp_qloop_44; + const real_t tmp_qloop_50 = tmp_qloop_37*tmp_qloop_39; + const real_t tmp_qloop_51 = tmp_qloop_37*tmp_qloop_41; + const real_t tmp_qloop_52 = tmp_qloop_51*tmp_qloop_9; + const real_t tmp_qloop_53 = tmp_qloop_37*tmp_qloop_44; + const real_t tmp_qloop_54 = tmp_qloop_23*tmp_qloop_32; + const real_t tmp_qloop_55 = tmp_qloop_2*tmp_qloop_8; + const real_t tmp_qloop_56 = tmp_qloop_32*tmp_qloop_55; + const real_t tmp_qloop_57 = tmp_qloop_4*tmp_qloop_54; + const real_t tmp_qloop_58 = tmp_qloop_32*tmp_qloop_4; + const real_t tmp_qloop_59 = tmp_qloop_35*tmp_qloop_58; + const real_t tmp_qloop_60 = tmp_qloop_37*tmp_qloop_58; + const real_t q_tmp_0_0 = tmp_qloop_12*tmp_qloop_34; + const real_t q_tmp_0_1 = tmp_qloop_24*tmp_qloop_34; + const real_t q_tmp_0_2 = tmp_qloop_25*tmp_qloop_34; + const real_t q_tmp_0_3 = tmp_qloop_26*tmp_qloop_34; + const real_t q_tmp_0_4 = tmp_qloop_34*tmp_qloop_7; + const real_t q_tmp_0_5 = tmp_qloop_10*tmp_qloop_34; + const real_t q_tmp_0_6 = tmp_qloop_34*tmp_qloop_9; + const real_t q_tmp_0_7 = tmp_qloop_27*tmp_qloop_34; + const real_t q_tmp_0_8 = tmp_qloop_29*tmp_qloop_34; + const real_t q_tmp_0_9 = tmp_qloop_31*tmp_qloop_34; + const real_t q_tmp_0_10 = tmp_qloop_12*tmp_qloop_36; + const real_t q_tmp_0_11 = tmp_qloop_24*tmp_qloop_36; + const real_t q_tmp_0_12 = tmp_qloop_25*tmp_qloop_36; + const real_t q_tmp_0_13 = tmp_qloop_26*tmp_qloop_36; + const real_t q_tmp_0_14 = tmp_qloop_36*tmp_qloop_7; + const real_t q_tmp_0_15 = tmp_qloop_10*tmp_qloop_36; + const real_t q_tmp_0_16 = tmp_qloop_36*tmp_qloop_9; + const real_t q_tmp_0_17 = tmp_qloop_27*tmp_qloop_36; + const real_t q_tmp_0_18 = tmp_qloop_29*tmp_qloop_36; + const real_t q_tmp_0_19 = tmp_qloop_31*tmp_qloop_36; + const real_t q_tmp_0_20 = tmp_qloop_12*tmp_qloop_38; + const real_t q_tmp_0_21 = tmp_qloop_24*tmp_qloop_38; + const real_t q_tmp_0_22 = tmp_qloop_25*tmp_qloop_38; + const real_t q_tmp_0_23 = tmp_qloop_26*tmp_qloop_38; + const real_t q_tmp_0_24 = tmp_qloop_38*tmp_qloop_7; + const real_t q_tmp_0_25 = tmp_qloop_10*tmp_qloop_38; + const real_t q_tmp_0_26 = tmp_qloop_38*tmp_qloop_9; + const real_t q_tmp_0_27 = tmp_qloop_27*tmp_qloop_38; + const real_t q_tmp_0_28 = tmp_qloop_29*tmp_qloop_38; + const real_t q_tmp_0_29 = tmp_qloop_31*tmp_qloop_38; + const real_t q_tmp_1_0 = tmp_qloop_12*tmp_qloop_40; + const real_t q_tmp_1_1 = tmp_qloop_24*tmp_qloop_40; + const real_t q_tmp_1_2 = tmp_qloop_25*tmp_qloop_40; + const real_t q_tmp_1_3 = tmp_qloop_26*tmp_qloop_40; + const real_t q_tmp_1_4 = tmp_qloop_43; + const real_t q_tmp_1_5 = tmp_qloop_30*tmp_qloop_42; + const real_t q_tmp_1_6 = tmp_qloop_30*tmp_qloop_45; + const real_t q_tmp_1_7 = tmp_qloop_27*tmp_qloop_40; + const real_t q_tmp_1_8 = tmp_qloop_29*tmp_qloop_40; + const real_t q_tmp_1_9 = tmp_qloop_31*tmp_qloop_40; + const real_t q_tmp_1_10 = tmp_qloop_12*tmp_qloop_46; + const real_t q_tmp_1_11 = tmp_qloop_24*tmp_qloop_46; + const real_t q_tmp_1_12 = tmp_qloop_25*tmp_qloop_46; + const real_t q_tmp_1_13 = tmp_qloop_26*tmp_qloop_46; + const real_t q_tmp_1_14 = tmp_qloop_48; + const real_t q_tmp_1_15 = tmp_qloop_30*tmp_qloop_47; + const real_t q_tmp_1_16 = tmp_qloop_30*tmp_qloop_49; + const real_t q_tmp_1_17 = tmp_qloop_27*tmp_qloop_46; + const real_t q_tmp_1_18 = tmp_qloop_29*tmp_qloop_46; + const real_t q_tmp_1_19 = tmp_qloop_31*tmp_qloop_46; + const real_t q_tmp_1_20 = tmp_qloop_12*tmp_qloop_50; + const real_t q_tmp_1_21 = tmp_qloop_24*tmp_qloop_50; + const real_t q_tmp_1_22 = tmp_qloop_25*tmp_qloop_50; + const real_t q_tmp_1_23 = tmp_qloop_26*tmp_qloop_50; + const real_t q_tmp_1_24 = tmp_qloop_52; + const real_t q_tmp_1_25 = tmp_qloop_30*tmp_qloop_51; + const real_t q_tmp_1_26 = tmp_qloop_30*tmp_qloop_53; + const real_t q_tmp_1_27 = tmp_qloop_27*tmp_qloop_50; + const real_t q_tmp_1_28 = tmp_qloop_29*tmp_qloop_50; + const real_t q_tmp_1_29 = tmp_qloop_31*tmp_qloop_50; + const real_t q_tmp_2_0 = tmp_qloop_12*tmp_qloop_45; + const real_t q_tmp_2_1 = tmp_qloop_24*tmp_qloop_45; + const real_t q_tmp_2_2 = tmp_qloop_25*tmp_qloop_45; + const real_t q_tmp_2_3 = tmp_qloop_26*tmp_qloop_45; + const real_t q_tmp_2_4 = tmp_qloop_28*tmp_qloop_42; + const real_t q_tmp_2_5 = tmp_qloop_43; + const real_t q_tmp_2_6 = tmp_qloop_54*tmp_qloop_55; + const real_t q_tmp_2_7 = tmp_qloop_27*tmp_qloop_45; + const real_t q_tmp_2_8 = tmp_qloop_29*tmp_qloop_45; + const real_t q_tmp_2_9 = tmp_qloop_31*tmp_qloop_45; + const real_t q_tmp_2_10 = tmp_qloop_12*tmp_qloop_49; + const real_t q_tmp_2_11 = tmp_qloop_24*tmp_qloop_49; + const real_t q_tmp_2_12 = tmp_qloop_25*tmp_qloop_49; + const real_t q_tmp_2_13 = tmp_qloop_26*tmp_qloop_49; + const real_t q_tmp_2_14 = tmp_qloop_28*tmp_qloop_47; + const real_t q_tmp_2_15 = tmp_qloop_48; + const real_t q_tmp_2_16 = tmp_qloop_35*tmp_qloop_56; + const real_t q_tmp_2_17 = tmp_qloop_27*tmp_qloop_49; + const real_t q_tmp_2_18 = tmp_qloop_29*tmp_qloop_49; + const real_t q_tmp_2_19 = tmp_qloop_31*tmp_qloop_49; + const real_t q_tmp_2_20 = tmp_qloop_12*tmp_qloop_53; + const real_t q_tmp_2_21 = tmp_qloop_24*tmp_qloop_53; + const real_t q_tmp_2_22 = tmp_qloop_25*tmp_qloop_53; + const real_t q_tmp_2_23 = tmp_qloop_26*tmp_qloop_53; + const real_t q_tmp_2_24 = tmp_qloop_28*tmp_qloop_51; + const real_t q_tmp_2_25 = tmp_qloop_52; + const real_t q_tmp_2_26 = tmp_qloop_37*tmp_qloop_56; + const real_t q_tmp_2_27 = tmp_qloop_27*tmp_qloop_53; + const real_t q_tmp_2_28 = tmp_qloop_29*tmp_qloop_53; + const real_t q_tmp_2_29 = tmp_qloop_31*tmp_qloop_53; + const real_t q_tmp_3_0 = tmp_qloop_12*tmp_qloop_42; + const real_t q_tmp_3_1 = tmp_qloop_24*tmp_qloop_42; + const real_t q_tmp_3_2 = tmp_qloop_25*tmp_qloop_42; + const real_t q_tmp_3_3 = tmp_qloop_26*tmp_qloop_42; + const real_t q_tmp_3_4 = tmp_qloop_57*tmp_qloop_6; + const real_t q_tmp_3_5 = tmp_qloop_57*tmp_qloop_8; + const real_t q_tmp_3_6 = tmp_qloop_43; + const real_t q_tmp_3_7 = tmp_qloop_27*tmp_qloop_42; + const real_t q_tmp_3_8 = tmp_qloop_29*tmp_qloop_42; + const real_t q_tmp_3_9 = tmp_qloop_31*tmp_qloop_42; + const real_t q_tmp_3_10 = tmp_qloop_12*tmp_qloop_47; + const real_t q_tmp_3_11 = tmp_qloop_24*tmp_qloop_47; + const real_t q_tmp_3_12 = tmp_qloop_25*tmp_qloop_47; + const real_t q_tmp_3_13 = tmp_qloop_26*tmp_qloop_47; + const real_t q_tmp_3_14 = tmp_qloop_59*tmp_qloop_6; + const real_t q_tmp_3_15 = tmp_qloop_59*tmp_qloop_8; + const real_t q_tmp_3_16 = tmp_qloop_48; + const real_t q_tmp_3_17 = tmp_qloop_27*tmp_qloop_47; + const real_t q_tmp_3_18 = tmp_qloop_29*tmp_qloop_47; + const real_t q_tmp_3_19 = tmp_qloop_31*tmp_qloop_47; + const real_t q_tmp_3_20 = tmp_qloop_12*tmp_qloop_51; + const real_t q_tmp_3_21 = tmp_qloop_24*tmp_qloop_51; + const real_t q_tmp_3_22 = tmp_qloop_25*tmp_qloop_51; + const real_t q_tmp_3_23 = tmp_qloop_26*tmp_qloop_51; + const real_t q_tmp_3_24 = tmp_qloop_6*tmp_qloop_60; + const real_t q_tmp_3_25 = tmp_qloop_60*tmp_qloop_8; + const real_t q_tmp_3_26 = tmp_qloop_52; + const real_t q_tmp_3_27 = tmp_qloop_27*tmp_qloop_51; + const real_t q_tmp_3_28 = tmp_qloop_29*tmp_qloop_51; + const real_t q_tmp_3_29 = tmp_qloop_31*tmp_qloop_51; + q_acc_0_0 = q_acc_0_0 + q_tmp_0_0; + q_acc_0_1 = q_acc_0_1 + q_tmp_0_1; + q_acc_0_2 = q_acc_0_2 + q_tmp_0_2; + q_acc_0_3 = q_acc_0_3 + q_tmp_0_3; + q_acc_0_4 = q_acc_0_4 + q_tmp_0_4; + q_acc_0_5 = q_acc_0_5 + q_tmp_0_5; + q_acc_0_6 = q_acc_0_6 + q_tmp_0_6; + q_acc_0_7 = q_acc_0_7 + q_tmp_0_7; + q_acc_0_8 = q_acc_0_8 + q_tmp_0_8; + q_acc_0_9 = q_acc_0_9 + q_tmp_0_9; + q_acc_0_10 = q_acc_0_10 + q_tmp_0_10; + q_acc_0_11 = q_acc_0_11 + q_tmp_0_11; + q_acc_0_12 = q_acc_0_12 + q_tmp_0_12; + q_acc_0_13 = q_acc_0_13 + q_tmp_0_13; + q_acc_0_14 = q_acc_0_14 + q_tmp_0_14; + q_acc_0_15 = q_acc_0_15 + q_tmp_0_15; + q_acc_0_16 = q_acc_0_16 + q_tmp_0_16; + q_acc_0_17 = q_acc_0_17 + q_tmp_0_17; + q_acc_0_18 = q_acc_0_18 + q_tmp_0_18; + q_acc_0_19 = q_acc_0_19 + q_tmp_0_19; + q_acc_0_20 = q_acc_0_20 + q_tmp_0_20; + q_acc_0_21 = q_acc_0_21 + q_tmp_0_21; + q_acc_0_22 = q_acc_0_22 + q_tmp_0_22; + q_acc_0_23 = q_acc_0_23 + q_tmp_0_23; + q_acc_0_24 = q_acc_0_24 + q_tmp_0_24; + q_acc_0_25 = q_acc_0_25 + q_tmp_0_25; + q_acc_0_26 = q_acc_0_26 + q_tmp_0_26; + q_acc_0_27 = q_acc_0_27 + q_tmp_0_27; + q_acc_0_28 = q_acc_0_28 + q_tmp_0_28; + q_acc_0_29 = q_acc_0_29 + q_tmp_0_29; + q_acc_1_0 = q_acc_1_0 + q_tmp_1_0; + q_acc_1_1 = q_acc_1_1 + q_tmp_1_1; + q_acc_1_2 = q_acc_1_2 + q_tmp_1_2; + q_acc_1_3 = q_acc_1_3 + q_tmp_1_3; + q_acc_1_4 = q_acc_1_4 + q_tmp_1_4; + q_acc_1_5 = q_acc_1_5 + q_tmp_1_5; + q_acc_1_6 = q_acc_1_6 + q_tmp_1_6; + q_acc_1_7 = q_acc_1_7 + q_tmp_1_7; + q_acc_1_8 = q_acc_1_8 + q_tmp_1_8; + q_acc_1_9 = q_acc_1_9 + q_tmp_1_9; + q_acc_1_10 = q_acc_1_10 + q_tmp_1_10; + q_acc_1_11 = q_acc_1_11 + q_tmp_1_11; + q_acc_1_12 = q_acc_1_12 + q_tmp_1_12; + q_acc_1_13 = q_acc_1_13 + q_tmp_1_13; + q_acc_1_14 = q_acc_1_14 + q_tmp_1_14; + q_acc_1_15 = q_acc_1_15 + q_tmp_1_15; + q_acc_1_16 = q_acc_1_16 + q_tmp_1_16; + q_acc_1_17 = q_acc_1_17 + q_tmp_1_17; + q_acc_1_18 = q_acc_1_18 + q_tmp_1_18; + q_acc_1_19 = q_acc_1_19 + q_tmp_1_19; + q_acc_1_20 = q_acc_1_20 + q_tmp_1_20; + q_acc_1_21 = q_acc_1_21 + q_tmp_1_21; + q_acc_1_22 = q_acc_1_22 + q_tmp_1_22; + q_acc_1_23 = q_acc_1_23 + q_tmp_1_23; + q_acc_1_24 = q_acc_1_24 + q_tmp_1_24; + q_acc_1_25 = q_acc_1_25 + q_tmp_1_25; + q_acc_1_26 = q_acc_1_26 + q_tmp_1_26; + q_acc_1_27 = q_acc_1_27 + q_tmp_1_27; + q_acc_1_28 = q_acc_1_28 + q_tmp_1_28; + q_acc_1_29 = q_acc_1_29 + q_tmp_1_29; + q_acc_2_0 = q_acc_2_0 + q_tmp_2_0; + q_acc_2_1 = q_acc_2_1 + q_tmp_2_1; + q_acc_2_2 = q_acc_2_2 + q_tmp_2_2; + q_acc_2_3 = q_acc_2_3 + q_tmp_2_3; + q_acc_2_4 = q_acc_2_4 + q_tmp_2_4; + q_acc_2_5 = q_acc_2_5 + q_tmp_2_5; + q_acc_2_6 = q_acc_2_6 + q_tmp_2_6; + q_acc_2_7 = q_acc_2_7 + q_tmp_2_7; + q_acc_2_8 = q_acc_2_8 + q_tmp_2_8; + q_acc_2_9 = q_acc_2_9 + q_tmp_2_9; + q_acc_2_10 = q_acc_2_10 + q_tmp_2_10; + q_acc_2_11 = q_acc_2_11 + q_tmp_2_11; + q_acc_2_12 = q_acc_2_12 + q_tmp_2_12; + q_acc_2_13 = q_acc_2_13 + q_tmp_2_13; + q_acc_2_14 = q_acc_2_14 + q_tmp_2_14; + q_acc_2_15 = q_acc_2_15 + q_tmp_2_15; + q_acc_2_16 = q_acc_2_16 + q_tmp_2_16; + q_acc_2_17 = q_acc_2_17 + q_tmp_2_17; + q_acc_2_18 = q_acc_2_18 + q_tmp_2_18; + q_acc_2_19 = q_acc_2_19 + q_tmp_2_19; + q_acc_2_20 = q_acc_2_20 + q_tmp_2_20; + q_acc_2_21 = q_acc_2_21 + q_tmp_2_21; + q_acc_2_22 = q_acc_2_22 + q_tmp_2_22; + q_acc_2_23 = q_acc_2_23 + q_tmp_2_23; + q_acc_2_24 = q_acc_2_24 + q_tmp_2_24; + q_acc_2_25 = q_acc_2_25 + q_tmp_2_25; + q_acc_2_26 = q_acc_2_26 + q_tmp_2_26; + q_acc_2_27 = q_acc_2_27 + q_tmp_2_27; + q_acc_2_28 = q_acc_2_28 + q_tmp_2_28; + q_acc_2_29 = q_acc_2_29 + q_tmp_2_29; + q_acc_3_0 = q_acc_3_0 + q_tmp_3_0; + q_acc_3_1 = q_acc_3_1 + q_tmp_3_1; + q_acc_3_2 = q_acc_3_2 + q_tmp_3_2; + q_acc_3_3 = q_acc_3_3 + q_tmp_3_3; + q_acc_3_4 = q_acc_3_4 + q_tmp_3_4; + q_acc_3_5 = q_acc_3_5 + q_tmp_3_5; + q_acc_3_6 = q_acc_3_6 + q_tmp_3_6; + q_acc_3_7 = q_acc_3_7 + q_tmp_3_7; + q_acc_3_8 = q_acc_3_8 + q_tmp_3_8; + q_acc_3_9 = q_acc_3_9 + q_tmp_3_9; + q_acc_3_10 = q_acc_3_10 + q_tmp_3_10; + q_acc_3_11 = q_acc_3_11 + q_tmp_3_11; + q_acc_3_12 = q_acc_3_12 + q_tmp_3_12; + q_acc_3_13 = q_acc_3_13 + q_tmp_3_13; + q_acc_3_14 = q_acc_3_14 + q_tmp_3_14; + q_acc_3_15 = q_acc_3_15 + q_tmp_3_15; + q_acc_3_16 = q_acc_3_16 + q_tmp_3_16; + q_acc_3_17 = q_acc_3_17 + q_tmp_3_17; + q_acc_3_18 = q_acc_3_18 + q_tmp_3_18; + q_acc_3_19 = q_acc_3_19 + q_tmp_3_19; + q_acc_3_20 = q_acc_3_20 + q_tmp_3_20; + q_acc_3_21 = q_acc_3_21 + q_tmp_3_21; + q_acc_3_22 = q_acc_3_22 + q_tmp_3_22; + q_acc_3_23 = q_acc_3_23 + q_tmp_3_23; + q_acc_3_24 = q_acc_3_24 + q_tmp_3_24; + q_acc_3_25 = q_acc_3_25 + q_tmp_3_25; + q_acc_3_26 = q_acc_3_26 + q_tmp_3_26; + q_acc_3_27 = q_acc_3_27 + q_tmp_3_27; + q_acc_3_28 = q_acc_3_28 + q_tmp_3_28; + q_acc_3_29 = q_acc_3_29 + q_tmp_3_29; + } + const real_t elMatVec_0 = q_acc_0_0*src_dof_0 + q_acc_0_1*src_dof_1 + q_acc_0_10*src_dof_10 + q_acc_0_11*src_dof_11 + q_acc_0_12*src_dof_12 + q_acc_0_13*src_dof_13 + q_acc_0_14*src_dof_14 + q_acc_0_15*src_dof_15 + q_acc_0_16*src_dof_16 + q_acc_0_17*src_dof_17 + q_acc_0_18*src_dof_18 + q_acc_0_19*src_dof_19 + q_acc_0_2*src_dof_2 + q_acc_0_20*src_dof_20 + q_acc_0_21*src_dof_21 + q_acc_0_22*src_dof_22 + q_acc_0_23*src_dof_23 + q_acc_0_24*src_dof_24 + q_acc_0_25*src_dof_25 + q_acc_0_26*src_dof_26 + q_acc_0_27*src_dof_27 + q_acc_0_28*src_dof_28 + q_acc_0_29*src_dof_29 + q_acc_0_3*src_dof_3 + q_acc_0_4*src_dof_4 + q_acc_0_5*src_dof_5 + q_acc_0_6*src_dof_6 + q_acc_0_7*src_dof_7 + q_acc_0_8*src_dof_8 + q_acc_0_9*src_dof_9; + const real_t elMatVec_1 = q_acc_1_0*src_dof_0 + q_acc_1_1*src_dof_1 + q_acc_1_10*src_dof_10 + q_acc_1_11*src_dof_11 + q_acc_1_12*src_dof_12 + q_acc_1_13*src_dof_13 + q_acc_1_14*src_dof_14 + q_acc_1_15*src_dof_15 + q_acc_1_16*src_dof_16 + q_acc_1_17*src_dof_17 + q_acc_1_18*src_dof_18 + q_acc_1_19*src_dof_19 + q_acc_1_2*src_dof_2 + q_acc_1_20*src_dof_20 + q_acc_1_21*src_dof_21 + q_acc_1_22*src_dof_22 + q_acc_1_23*src_dof_23 + q_acc_1_24*src_dof_24 + q_acc_1_25*src_dof_25 + q_acc_1_26*src_dof_26 + q_acc_1_27*src_dof_27 + q_acc_1_28*src_dof_28 + q_acc_1_29*src_dof_29 + q_acc_1_3*src_dof_3 + q_acc_1_4*src_dof_4 + q_acc_1_5*src_dof_5 + q_acc_1_6*src_dof_6 + q_acc_1_7*src_dof_7 + q_acc_1_8*src_dof_8 + q_acc_1_9*src_dof_9; + const real_t elMatVec_2 = q_acc_2_0*src_dof_0 + q_acc_2_1*src_dof_1 + q_acc_2_10*src_dof_10 + q_acc_2_11*src_dof_11 + q_acc_2_12*src_dof_12 + q_acc_2_13*src_dof_13 + q_acc_2_14*src_dof_14 + q_acc_2_15*src_dof_15 + q_acc_2_16*src_dof_16 + q_acc_2_17*src_dof_17 + q_acc_2_18*src_dof_18 + q_acc_2_19*src_dof_19 + q_acc_2_2*src_dof_2 + q_acc_2_20*src_dof_20 + q_acc_2_21*src_dof_21 + q_acc_2_22*src_dof_22 + q_acc_2_23*src_dof_23 + q_acc_2_24*src_dof_24 + q_acc_2_25*src_dof_25 + q_acc_2_26*src_dof_26 + q_acc_2_27*src_dof_27 + q_acc_2_28*src_dof_28 + q_acc_2_29*src_dof_29 + q_acc_2_3*src_dof_3 + q_acc_2_4*src_dof_4 + q_acc_2_5*src_dof_5 + q_acc_2_6*src_dof_6 + q_acc_2_7*src_dof_7 + q_acc_2_8*src_dof_8 + q_acc_2_9*src_dof_9; + const real_t elMatVec_3 = q_acc_3_0*src_dof_0 + q_acc_3_1*src_dof_1 + q_acc_3_10*src_dof_10 + q_acc_3_11*src_dof_11 + q_acc_3_12*src_dof_12 + q_acc_3_13*src_dof_13 + q_acc_3_14*src_dof_14 + q_acc_3_15*src_dof_15 + q_acc_3_16*src_dof_16 + q_acc_3_17*src_dof_17 + q_acc_3_18*src_dof_18 + q_acc_3_19*src_dof_19 + q_acc_3_2*src_dof_2 + q_acc_3_20*src_dof_20 + q_acc_3_21*src_dof_21 + q_acc_3_22*src_dof_22 + q_acc_3_23*src_dof_23 + q_acc_3_24*src_dof_24 + q_acc_3_25*src_dof_25 + q_acc_3_26*src_dof_26 + q_acc_3_27*src_dof_27 + q_acc_3_28*src_dof_28 + q_acc_3_29*src_dof_29 + q_acc_3_3*src_dof_3 + q_acc_3_4*src_dof_4 + q_acc_3_5*src_dof_5 + q_acc_3_6*src_dof_6 + q_acc_3_7*src_dof_7 + q_acc_3_8*src_dof_8 + q_acc_3_9*src_dof_9; + _data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))] = elMatVec_0 + _data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]; + _data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1] = elMatVec_1 + _data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]; + _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1] = elMatVec_2 + _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]; + _data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))] = elMatVec_3 + _data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]; + } + } + } + } +} +} // namespace operatorgeneration + +} // namespace hyteg diff --git a/operators/grad_rho_by_rho_dot_u/noarch/P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap_apply_macro_2D.cpp b/operators/grad_rho_by_rho_dot_u/noarch/P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap_apply_P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap_macro_2D.cpp similarity index 78% rename from operators/grad_rho_by_rho_dot_u/noarch/P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap_apply_macro_2D.cpp rename to operators/grad_rho_by_rho_dot_u/noarch/P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap_apply_P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap_macro_2D.cpp index 918e7cb3..8e09db72 100644 --- a/operators/grad_rho_by_rho_dot_u/noarch/P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap_apply_macro_2D.cpp +++ b/operators/grad_rho_by_rho_dot_u/noarch/P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap_apply_P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap_macro_2D.cpp @@ -47,6 +47,10 @@ + + + + @@ -60,7 +64,7 @@ namespace hyteg { namespace operatorgeneration { -void P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap::apply_macro_2D( real_t * RESTRICT _data_dst, real_t * RESTRICT _data_rhoEdge, real_t * RESTRICT _data_rhoVertex, real_t * RESTRICT _data_src_edge_0, real_t * RESTRICT _data_src_edge_1, real_t * RESTRICT _data_src_vertex_0, real_t * RESTRICT _data_src_vertex_1, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const +void P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap::apply_P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap_macro_2D( real_t * RESTRICT _data_dst, real_t * RESTRICT _data_rhoEdge, real_t * RESTRICT _data_rhoVertex, real_t * RESTRICT _data_src_edge_0, real_t * RESTRICT _data_src_edge_1, real_t * RESTRICT _data_src_vertex_0, real_t * RESTRICT _data_src_vertex_1, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const { { const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669}; @@ -69,33 +73,55 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap::apply_macro_2D( real_t * const real_t _data_q_p_1 [] = {0.33333333333333331, 0.59999999999999998, 0.20000000000000001, 0.20000000000000001}; + const real_t tmp_coords_jac_0_GRAY = 1.0 / (micro_edges_per_macro_edge_float)*1.0; + const real_t p_affine_const_0_0_GRAY = macro_vertex_coord_id_0comp0; + const real_t p_affine_const_0_1_GRAY = macro_vertex_coord_id_0comp1; + const real_t p_affine_const_1_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0); + const real_t p_affine_const_1_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1); + const real_t p_affine_const_2_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0); + const real_t p_affine_const_2_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1); + const real_t jac_affine_0_0_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_1_0_GRAY; + const real_t jac_affine_0_1_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_2_0_GRAY; + const real_t jac_affine_1_0_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_1_1_GRAY; + const real_t jac_affine_1_1_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_2_1_GRAY; + const real_t tmp_coords_jac_1_GRAY = jac_affine_0_0_GRAY*jac_affine_1_1_GRAY - jac_affine_0_1_GRAY*jac_affine_1_0_GRAY; + const real_t tmp_coords_jac_2_GRAY = 1.0 / (tmp_coords_jac_1_GRAY); + const real_t jac_affine_inv_0_0_GRAY = jac_affine_1_1_GRAY*tmp_coords_jac_2_GRAY; + const real_t jac_affine_inv_0_1_GRAY = -jac_affine_0_1_GRAY*tmp_coords_jac_2_GRAY; + const real_t jac_affine_inv_1_0_GRAY = -jac_affine_1_0_GRAY*tmp_coords_jac_2_GRAY; + const real_t jac_affine_inv_1_1_GRAY = jac_affine_0_0_GRAY*tmp_coords_jac_2_GRAY; + const real_t abs_det_jac_affine_GRAY = abs(tmp_coords_jac_1_GRAY); + const real_t tmp_qloop_1 = -rayVertex_1 + thrVertex_1; + const real_t tmp_qloop_7 = rayVertex_0 - thrVertex_0; + const real_t tmp_qloop_8 = -tmp_qloop_7; + const real_t tmp_qloop_9 = 1.0 / (tmp_qloop_1*(-rayVertex_0 + refVertex_0) - tmp_qloop_8*(-rayVertex_1 + refVertex_1)); + const real_t tmp_qloop_10 = -radRayVertex + radRefVertex; + const real_t tmp_qloop_11 = tmp_qloop_10*tmp_qloop_9; + const real_t tmp_qloop_12 = tmp_qloop_11*1.0; { /* FaceType.GRAY */ - const real_t tmp_coords_jac_0_GRAY = 1.0 / (micro_edges_per_macro_edge_float)*1.0; - const real_t p_affine_const_0_0_GRAY = macro_vertex_coord_id_0comp0; - const real_t p_affine_const_0_1_GRAY = macro_vertex_coord_id_0comp1; - const real_t p_affine_const_1_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0); - const real_t p_affine_const_1_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1); - const real_t p_affine_const_2_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0); - const real_t p_affine_const_2_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1); - const real_t jac_affine_0_0_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_1_0_GRAY; - const real_t jac_affine_0_1_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_2_0_GRAY; - const real_t jac_affine_1_0_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_1_1_GRAY; - const real_t jac_affine_1_1_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_2_1_GRAY; - const real_t tmp_coords_jac_1_GRAY = 1.0 / (jac_affine_0_0_GRAY*jac_affine_1_1_GRAY - jac_affine_0_1_GRAY*jac_affine_1_0_GRAY); - const real_t jac_affine_inv_0_0_GRAY = jac_affine_1_1_GRAY*tmp_coords_jac_1_GRAY; - const real_t jac_affine_inv_0_1_GRAY = -jac_affine_0_1_GRAY*tmp_coords_jac_1_GRAY; - const real_t jac_affine_inv_1_0_GRAY = -jac_affine_1_0_GRAY*tmp_coords_jac_1_GRAY; - const real_t jac_affine_inv_1_1_GRAY = jac_affine_0_0_GRAY*tmp_coords_jac_1_GRAY; for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1) for (int64_t ctr_0 = 0; ctr_0 < -ctr_1 + micro_edges_per_macro_edge; ctr_0 += 1) { - const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1)); - const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1)); - const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1)); - const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1)); - const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1 + 1)); - const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1 + 1)); + + const int64_t phantom_ctr_0 = ctr_0; + real_t _data_float_loop_ctr_array_dim_0[4]; + _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0; + _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1; + _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2; + _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3; + real_t _data_float_loop_ctr_array_dim_1[4]; + _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1; + + const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; const real_t src_dof_0 = _data_src_vertex_0[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]; const real_t src_dof_1 = _data_src_vertex_0[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; const real_t src_dof_2 = _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; @@ -153,18 +179,11 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap::apply_macro_2D( real_t * for (int64_t q = 0; q < 4; q += 1) { const real_t tmp_qloop_0 = p_affine_0_0 + (-p_affine_0_0 + p_affine_1_0)*_data_q_p_0[q] + (-p_affine_0_0 + p_affine_2_0)*_data_q_p_1[q]; - const real_t tmp_qloop_1 = -rayVertex_1 + thrVertex_1; const real_t tmp_qloop_2 = (tmp_qloop_0*tmp_qloop_0); const real_t tmp_qloop_3 = p_affine_0_1 + (-p_affine_0_1 + p_affine_1_1)*_data_q_p_0[q] + (-p_affine_0_1 + p_affine_2_1)*_data_q_p_1[q]; const real_t tmp_qloop_4 = (tmp_qloop_3*tmp_qloop_3); const real_t tmp_qloop_5 = tmp_qloop_2 + tmp_qloop_4; const real_t tmp_qloop_6 = pow(tmp_qloop_5, -0.50000000000000000); - const real_t tmp_qloop_7 = rayVertex_0 - thrVertex_0; - const real_t tmp_qloop_8 = -tmp_qloop_7; - const real_t tmp_qloop_9 = 1.0 / (tmp_qloop_1*(-rayVertex_0 + refVertex_0) - tmp_qloop_8*(-rayVertex_1 + refVertex_1)); - const real_t tmp_qloop_10 = -radRayVertex + radRefVertex; - const real_t tmp_qloop_11 = tmp_qloop_10*tmp_qloop_9; - const real_t tmp_qloop_12 = tmp_qloop_11*1.0; const real_t tmp_qloop_13 = tmp_qloop_12*tmp_qloop_6; const real_t tmp_qloop_14 = tmp_qloop_1*tmp_qloop_13; const real_t tmp_qloop_15 = pow(tmp_qloop_5, -1.5000000000000000); @@ -203,11 +222,6 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap::apply_macro_2D( real_t * const real_t tmp_qloop_51 = -tmp_qloop_38 + tmp_qloop_41*-4.0 + tmp_qloop_44; const real_t tmp_qloop_52 = tmp_qloop_39*4.0; const real_t tmp_qloop_53 = tmp_qloop_37 - tmp_qloop_38 - tmp_qloop_52; - const real_t tmp_qloop_54 = 1.0 / (rho_dof_0*tmp_qloop_43 + rho_dof_1*tmp_qloop_49 + rho_dof_2*tmp_qloop_50 + rho_dof_3*tmp_qloop_38 + rho_dof_4*tmp_qloop_51 + rho_dof_5*tmp_qloop_53)*_data_q_w[q]; - const real_t tmp_qloop_55 = tmp_qloop_54*(1.0 - _data_q_p_0[q] - _data_q_p_1[q]); - const real_t tmp_qloop_59 = tmp_qloop_54*_data_q_p_0[q]; - const real_t tmp_qloop_61 = tmp_qloop_54*_data_q_p_1[q]; - const real_t tmp_qloop_65 = tmp_qloop_37*tmp_qloop_41*tmp_qloop_54; const real_t jac_blending_0_0 = tmp_qloop_0*tmp_qloop_14 + tmp_qloop_18*tmp_qloop_4; const real_t jac_blending_0_1 = -tmp_qloop_0*tmp_qloop_15*tmp_qloop_16*tmp_qloop_3 - tmp_qloop_0*tmp_qloop_19; const real_t jac_blending_1_0 = tmp_qloop_14*tmp_qloop_3 - tmp_qloop_20*tmp_qloop_3; @@ -215,6 +229,11 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap::apply_macro_2D( real_t * const real_t tmp_qloop_21 = jac_blending_0_0*jac_blending_1_1 - jac_blending_0_1*jac_blending_1_0; const real_t tmp_qloop_22 = 1.0 / (tmp_qloop_21); const real_t abs_det_jac_blending = tmp_qloop_21; + const real_t tmp_qloop_54 = abs_det_jac_affine_GRAY*abs_det_jac_blending*1.0 / (rho_dof_0*tmp_qloop_43 + rho_dof_1*tmp_qloop_49 + rho_dof_2*tmp_qloop_50 + rho_dof_3*tmp_qloop_38 + rho_dof_4*tmp_qloop_51 + rho_dof_5*tmp_qloop_53)*_data_q_w[q]; + const real_t tmp_qloop_55 = tmp_qloop_54*(1.0 - _data_q_p_0[q] - _data_q_p_1[q]); + const real_t tmp_qloop_59 = tmp_qloop_54*_data_q_p_0[q]; + const real_t tmp_qloop_61 = tmp_qloop_54*_data_q_p_1[q]; + const real_t tmp_qloop_65 = tmp_qloop_37*tmp_qloop_41*tmp_qloop_54; const real_t jac_blending_inv_0_0 = jac_blending_1_1*tmp_qloop_22; const real_t jac_blending_inv_0_1 = -jac_blending_0_1*tmp_qloop_22; const real_t jac_blending_inv_1_0 = -jac_blending_1_0*tmp_qloop_22; @@ -316,37 +335,52 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap::apply_macro_2D( real_t * _data_dst[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatVec_2 + _data_dst[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; } } + const real_t tmp_coords_jac_0_BLUE = 1.0 / (micro_edges_per_macro_edge_float)*1.0; + const real_t tmp_coords_jac_1_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0); + const real_t tmp_coords_jac_2_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1); + const real_t tmp_coords_jac_3_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0); + const real_t tmp_coords_jac_4_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1); + const real_t p_affine_const_0_0_BLUE = tmp_coords_jac_1_BLUE; + const real_t p_affine_const_0_1_BLUE = tmp_coords_jac_2_BLUE; + const real_t p_affine_const_1_0_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_3_BLUE; + const real_t p_affine_const_1_1_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_4_BLUE; + const real_t p_affine_const_2_0_BLUE = tmp_coords_jac_1_BLUE + tmp_coords_jac_3_BLUE; + const real_t p_affine_const_2_1_BLUE = tmp_coords_jac_2_BLUE + tmp_coords_jac_4_BLUE; + const real_t jac_affine_0_0_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_1_0_BLUE; + const real_t jac_affine_0_1_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_2_0_BLUE; + const real_t jac_affine_1_0_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_1_1_BLUE; + const real_t jac_affine_1_1_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_2_1_BLUE; + const real_t tmp_coords_jac_5_BLUE = jac_affine_0_0_BLUE*jac_affine_1_1_BLUE - jac_affine_0_1_BLUE*jac_affine_1_0_BLUE; + const real_t tmp_coords_jac_6_BLUE = 1.0 / (tmp_coords_jac_5_BLUE); + const real_t jac_affine_inv_0_0_BLUE = jac_affine_1_1_BLUE*tmp_coords_jac_6_BLUE; + const real_t jac_affine_inv_0_1_BLUE = -jac_affine_0_1_BLUE*tmp_coords_jac_6_BLUE; + const real_t jac_affine_inv_1_0_BLUE = -jac_affine_1_0_BLUE*tmp_coords_jac_6_BLUE; + const real_t jac_affine_inv_1_1_BLUE = jac_affine_0_0_BLUE*tmp_coords_jac_6_BLUE; + const real_t abs_det_jac_affine_BLUE = abs(tmp_coords_jac_5_BLUE); { /* FaceType.BLUE */ - const real_t tmp_coords_jac_0_BLUE = 1.0 / (micro_edges_per_macro_edge_float)*1.0; - const real_t tmp_coords_jac_1_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0); - const real_t tmp_coords_jac_2_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1); - const real_t tmp_coords_jac_3_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0); - const real_t tmp_coords_jac_4_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1); - const real_t p_affine_const_0_0_BLUE = tmp_coords_jac_1_BLUE; - const real_t p_affine_const_0_1_BLUE = tmp_coords_jac_2_BLUE; - const real_t p_affine_const_1_0_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_3_BLUE; - const real_t p_affine_const_1_1_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_4_BLUE; - const real_t p_affine_const_2_0_BLUE = tmp_coords_jac_1_BLUE + tmp_coords_jac_3_BLUE; - const real_t p_affine_const_2_1_BLUE = tmp_coords_jac_2_BLUE + tmp_coords_jac_4_BLUE; - const real_t jac_affine_0_0_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_1_0_BLUE; - const real_t jac_affine_0_1_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_2_0_BLUE; - const real_t jac_affine_1_0_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_1_1_BLUE; - const real_t jac_affine_1_1_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_2_1_BLUE; - const real_t tmp_coords_jac_5_BLUE = 1.0 / (jac_affine_0_0_BLUE*jac_affine_1_1_BLUE - jac_affine_0_1_BLUE*jac_affine_1_0_BLUE); - const real_t jac_affine_inv_0_0_BLUE = jac_affine_1_1_BLUE*tmp_coords_jac_5_BLUE; - const real_t jac_affine_inv_0_1_BLUE = -jac_affine_0_1_BLUE*tmp_coords_jac_5_BLUE; - const real_t jac_affine_inv_1_0_BLUE = -jac_affine_1_0_BLUE*tmp_coords_jac_5_BLUE; - const real_t jac_affine_inv_1_1_BLUE = jac_affine_0_0_BLUE*tmp_coords_jac_5_BLUE; for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1) for (int64_t ctr_0 = 0; ctr_0 < -ctr_1 + micro_edges_per_macro_edge - 1; ctr_0 += 1) { - const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1)); - const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1)); - const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1 + 1)); - const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1 + 1)); - const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1 + 1)); - const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1 + 1)); + + const int64_t phantom_ctr_0 = ctr_0; + real_t _data_float_loop_ctr_array_dim_0[4]; + _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0; + _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1; + _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2; + _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3; + real_t _data_float_loop_ctr_array_dim_1[4]; + _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1; + + const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; const real_t src_dof_0 = _data_src_vertex_0[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; const real_t src_dof_1 = _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; const real_t src_dof_2 = _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]; @@ -404,18 +438,11 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap::apply_macro_2D( real_t * for (int64_t q = 0; q < 4; q += 1) { const real_t tmp_qloop_0 = p_affine_0_0 + (-p_affine_0_0 + p_affine_1_0)*_data_q_p_0[q] + (-p_affine_0_0 + p_affine_2_0)*_data_q_p_1[q]; - const real_t tmp_qloop_1 = -rayVertex_1 + thrVertex_1; const real_t tmp_qloop_2 = (tmp_qloop_0*tmp_qloop_0); const real_t tmp_qloop_3 = p_affine_0_1 + (-p_affine_0_1 + p_affine_1_1)*_data_q_p_0[q] + (-p_affine_0_1 + p_affine_2_1)*_data_q_p_1[q]; const real_t tmp_qloop_4 = (tmp_qloop_3*tmp_qloop_3); const real_t tmp_qloop_5 = tmp_qloop_2 + tmp_qloop_4; const real_t tmp_qloop_6 = pow(tmp_qloop_5, -0.50000000000000000); - const real_t tmp_qloop_7 = rayVertex_0 - thrVertex_0; - const real_t tmp_qloop_8 = -tmp_qloop_7; - const real_t tmp_qloop_9 = 1.0 / (tmp_qloop_1*(-rayVertex_0 + refVertex_0) - tmp_qloop_8*(-rayVertex_1 + refVertex_1)); - const real_t tmp_qloop_10 = -radRayVertex + radRefVertex; - const real_t tmp_qloop_11 = tmp_qloop_10*tmp_qloop_9; - const real_t tmp_qloop_12 = tmp_qloop_11*1.0; const real_t tmp_qloop_13 = tmp_qloop_12*tmp_qloop_6; const real_t tmp_qloop_14 = tmp_qloop_1*tmp_qloop_13; const real_t tmp_qloop_15 = pow(tmp_qloop_5, -1.5000000000000000); @@ -454,11 +481,6 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap::apply_macro_2D( real_t * const real_t tmp_qloop_51 = -tmp_qloop_38 + tmp_qloop_41*-4.0 + tmp_qloop_44; const real_t tmp_qloop_52 = tmp_qloop_39*4.0; const real_t tmp_qloop_53 = tmp_qloop_37 - tmp_qloop_38 - tmp_qloop_52; - const real_t tmp_qloop_54 = 1.0 / (rho_dof_0*tmp_qloop_43 + rho_dof_1*tmp_qloop_49 + rho_dof_2*tmp_qloop_50 + rho_dof_3*tmp_qloop_38 + rho_dof_4*tmp_qloop_51 + rho_dof_5*tmp_qloop_53)*_data_q_w[q]; - const real_t tmp_qloop_55 = tmp_qloop_54*(1.0 - _data_q_p_0[q] - _data_q_p_1[q]); - const real_t tmp_qloop_59 = tmp_qloop_54*_data_q_p_0[q]; - const real_t tmp_qloop_61 = tmp_qloop_54*_data_q_p_1[q]; - const real_t tmp_qloop_65 = tmp_qloop_37*tmp_qloop_41*tmp_qloop_54; const real_t jac_blending_0_0 = tmp_qloop_0*tmp_qloop_14 + tmp_qloop_18*tmp_qloop_4; const real_t jac_blending_0_1 = -tmp_qloop_0*tmp_qloop_15*tmp_qloop_16*tmp_qloop_3 - tmp_qloop_0*tmp_qloop_19; const real_t jac_blending_1_0 = tmp_qloop_14*tmp_qloop_3 - tmp_qloop_20*tmp_qloop_3; @@ -466,6 +488,11 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap::apply_macro_2D( real_t * const real_t tmp_qloop_21 = jac_blending_0_0*jac_blending_1_1 - jac_blending_0_1*jac_blending_1_0; const real_t tmp_qloop_22 = 1.0 / (tmp_qloop_21); const real_t abs_det_jac_blending = tmp_qloop_21; + const real_t tmp_qloop_54 = abs_det_jac_affine_BLUE*abs_det_jac_blending*1.0 / (rho_dof_0*tmp_qloop_43 + rho_dof_1*tmp_qloop_49 + rho_dof_2*tmp_qloop_50 + rho_dof_3*tmp_qloop_38 + rho_dof_4*tmp_qloop_51 + rho_dof_5*tmp_qloop_53)*_data_q_w[q]; + const real_t tmp_qloop_55 = tmp_qloop_54*(1.0 - _data_q_p_0[q] - _data_q_p_1[q]); + const real_t tmp_qloop_59 = tmp_qloop_54*_data_q_p_0[q]; + const real_t tmp_qloop_61 = tmp_qloop_54*_data_q_p_1[q]; + const real_t tmp_qloop_65 = tmp_qloop_37*tmp_qloop_41*tmp_qloop_54; const real_t jac_blending_inv_0_0 = jac_blending_1_1*tmp_qloop_22; const real_t jac_blending_inv_0_1 = -jac_blending_0_1*tmp_qloop_22; const real_t jac_blending_inv_1_0 = -jac_blending_1_0*tmp_qloop_22; diff --git a/operators/grad_rho_by_rho_dot_u/noarch/P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap_toMatrix_macro_2D.cpp b/operators/grad_rho_by_rho_dot_u/noarch/P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap_toMatrix_P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap_macro_2D.cpp similarity index 80% rename from operators/grad_rho_by_rho_dot_u/noarch/P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap_toMatrix_macro_2D.cpp rename to operators/grad_rho_by_rho_dot_u/noarch/P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap_toMatrix_P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap_macro_2D.cpp index 832f7305..e9465ae2 100644 --- a/operators/grad_rho_by_rho_dot_u/noarch/P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap_toMatrix_macro_2D.cpp +++ b/operators/grad_rho_by_rho_dot_u/noarch/P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap_toMatrix_P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap_macro_2D.cpp @@ -47,6 +47,10 @@ + + + + @@ -60,7 +64,7 @@ namespace hyteg { namespace operatorgeneration { -void P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap::toMatrix_macro_2D( idx_t * RESTRICT _data_dst, real_t * RESTRICT _data_rhoEdge, real_t * RESTRICT _data_rhoVertex, idx_t * RESTRICT _data_src_edge_0, idx_t * RESTRICT _data_src_edge_1, idx_t * RESTRICT _data_src_vertex_0, idx_t * RESTRICT _data_src_vertex_1, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const +void P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap::toMatrix_P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap_macro_2D( idx_t * RESTRICT _data_dst, real_t * RESTRICT _data_rhoEdge, real_t * RESTRICT _data_rhoVertex, idx_t * RESTRICT _data_src_edge_0, idx_t * RESTRICT _data_src_edge_1, idx_t * RESTRICT _data_src_vertex_0, idx_t * RESTRICT _data_src_vertex_1, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const { { const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669}; @@ -69,33 +73,55 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap::toMatrix_macro_2D( idx_t const real_t _data_q_p_1 [] = {0.33333333333333331, 0.59999999999999998, 0.20000000000000001, 0.20000000000000001}; + const real_t tmp_coords_jac_0_GRAY = 1.0 / (micro_edges_per_macro_edge_float)*1.0; + const real_t p_affine_const_0_0_GRAY = macro_vertex_coord_id_0comp0; + const real_t p_affine_const_0_1_GRAY = macro_vertex_coord_id_0comp1; + const real_t p_affine_const_1_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0); + const real_t p_affine_const_1_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1); + const real_t p_affine_const_2_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0); + const real_t p_affine_const_2_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1); + const real_t jac_affine_0_0_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_1_0_GRAY; + const real_t jac_affine_0_1_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_2_0_GRAY; + const real_t jac_affine_1_0_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_1_1_GRAY; + const real_t jac_affine_1_1_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_2_1_GRAY; + const real_t tmp_coords_jac_1_GRAY = jac_affine_0_0_GRAY*jac_affine_1_1_GRAY - jac_affine_0_1_GRAY*jac_affine_1_0_GRAY; + const real_t tmp_coords_jac_2_GRAY = 1.0 / (tmp_coords_jac_1_GRAY); + const real_t jac_affine_inv_0_0_GRAY = jac_affine_1_1_GRAY*tmp_coords_jac_2_GRAY; + const real_t jac_affine_inv_0_1_GRAY = -jac_affine_0_1_GRAY*tmp_coords_jac_2_GRAY; + const real_t jac_affine_inv_1_0_GRAY = -jac_affine_1_0_GRAY*tmp_coords_jac_2_GRAY; + const real_t jac_affine_inv_1_1_GRAY = jac_affine_0_0_GRAY*tmp_coords_jac_2_GRAY; + const real_t abs_det_jac_affine_GRAY = abs(tmp_coords_jac_1_GRAY); + const real_t tmp_qloop_1 = -rayVertex_1 + thrVertex_1; + const real_t tmp_qloop_7 = rayVertex_0 - thrVertex_0; + const real_t tmp_qloop_8 = -tmp_qloop_7; + const real_t tmp_qloop_9 = 1.0 / (tmp_qloop_1*(-rayVertex_0 + refVertex_0) - tmp_qloop_8*(-rayVertex_1 + refVertex_1)); + const real_t tmp_qloop_10 = -radRayVertex + radRefVertex; + const real_t tmp_qloop_11 = tmp_qloop_10*tmp_qloop_9; + const real_t tmp_qloop_12 = tmp_qloop_11*1.0; { /* FaceType.GRAY */ - const real_t tmp_coords_jac_0_GRAY = 1.0 / (micro_edges_per_macro_edge_float)*1.0; - const real_t p_affine_const_0_0_GRAY = macro_vertex_coord_id_0comp0; - const real_t p_affine_const_0_1_GRAY = macro_vertex_coord_id_0comp1; - const real_t p_affine_const_1_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0); - const real_t p_affine_const_1_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1); - const real_t p_affine_const_2_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0); - const real_t p_affine_const_2_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1); - const real_t jac_affine_0_0_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_1_0_GRAY; - const real_t jac_affine_0_1_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_2_0_GRAY; - const real_t jac_affine_1_0_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_1_1_GRAY; - const real_t jac_affine_1_1_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_2_1_GRAY; - const real_t tmp_coords_jac_1_GRAY = 1.0 / (jac_affine_0_0_GRAY*jac_affine_1_1_GRAY - jac_affine_0_1_GRAY*jac_affine_1_0_GRAY); - const real_t jac_affine_inv_0_0_GRAY = jac_affine_1_1_GRAY*tmp_coords_jac_1_GRAY; - const real_t jac_affine_inv_0_1_GRAY = -jac_affine_0_1_GRAY*tmp_coords_jac_1_GRAY; - const real_t jac_affine_inv_1_0_GRAY = -jac_affine_1_0_GRAY*tmp_coords_jac_1_GRAY; - const real_t jac_affine_inv_1_1_GRAY = jac_affine_0_0_GRAY*tmp_coords_jac_1_GRAY; for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1) for (int64_t ctr_0 = 0; ctr_0 < -ctr_1 + micro_edges_per_macro_edge; ctr_0 += 1) { - const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1)); - const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1)); - const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1)); - const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1)); - const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1 + 1)); - const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1 + 1)); + + const int64_t phantom_ctr_0 = ctr_0; + real_t _data_float_loop_ctr_array_dim_0[4]; + _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0; + _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1; + _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2; + _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3; + real_t _data_float_loop_ctr_array_dim_1[4]; + _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1; + + const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; const real_t rho_dof_0 = _data_rhoVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]; const real_t rho_dof_1 = _data_rhoVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; const real_t rho_dof_2 = _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; @@ -141,18 +167,11 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap::toMatrix_macro_2D( idx_t for (int64_t q = 0; q < 4; q += 1) { const real_t tmp_qloop_0 = p_affine_0_0 + (-p_affine_0_0 + p_affine_1_0)*_data_q_p_0[q] + (-p_affine_0_0 + p_affine_2_0)*_data_q_p_1[q]; - const real_t tmp_qloop_1 = -rayVertex_1 + thrVertex_1; const real_t tmp_qloop_2 = (tmp_qloop_0*tmp_qloop_0); const real_t tmp_qloop_3 = p_affine_0_1 + (-p_affine_0_1 + p_affine_1_1)*_data_q_p_0[q] + (-p_affine_0_1 + p_affine_2_1)*_data_q_p_1[q]; const real_t tmp_qloop_4 = (tmp_qloop_3*tmp_qloop_3); const real_t tmp_qloop_5 = tmp_qloop_2 + tmp_qloop_4; const real_t tmp_qloop_6 = pow(tmp_qloop_5, -0.50000000000000000); - const real_t tmp_qloop_7 = rayVertex_0 - thrVertex_0; - const real_t tmp_qloop_8 = -tmp_qloop_7; - const real_t tmp_qloop_9 = 1.0 / (tmp_qloop_1*(-rayVertex_0 + refVertex_0) - tmp_qloop_8*(-rayVertex_1 + refVertex_1)); - const real_t tmp_qloop_10 = -radRayVertex + radRefVertex; - const real_t tmp_qloop_11 = tmp_qloop_10*tmp_qloop_9; - const real_t tmp_qloop_12 = tmp_qloop_11*1.0; const real_t tmp_qloop_13 = tmp_qloop_12*tmp_qloop_6; const real_t tmp_qloop_14 = tmp_qloop_1*tmp_qloop_13; const real_t tmp_qloop_15 = pow(tmp_qloop_5, -1.5000000000000000); @@ -191,11 +210,6 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap::toMatrix_macro_2D( idx_t const real_t tmp_qloop_51 = -tmp_qloop_38 + tmp_qloop_41*-4.0 + tmp_qloop_44; const real_t tmp_qloop_52 = tmp_qloop_39*4.0; const real_t tmp_qloop_53 = tmp_qloop_37 - tmp_qloop_38 - tmp_qloop_52; - const real_t tmp_qloop_54 = 1.0 / (rho_dof_0*tmp_qloop_43 + rho_dof_1*tmp_qloop_49 + rho_dof_2*tmp_qloop_50 + rho_dof_3*tmp_qloop_38 + rho_dof_4*tmp_qloop_51 + rho_dof_5*tmp_qloop_53)*_data_q_w[q]; - const real_t tmp_qloop_55 = tmp_qloop_54*(1.0 - _data_q_p_0[q] - _data_q_p_1[q]); - const real_t tmp_qloop_59 = tmp_qloop_54*_data_q_p_0[q]; - const real_t tmp_qloop_61 = tmp_qloop_54*_data_q_p_1[q]; - const real_t tmp_qloop_65 = tmp_qloop_37*tmp_qloop_41*tmp_qloop_54; const real_t jac_blending_0_0 = tmp_qloop_0*tmp_qloop_14 + tmp_qloop_18*tmp_qloop_4; const real_t jac_blending_0_1 = -tmp_qloop_0*tmp_qloop_15*tmp_qloop_16*tmp_qloop_3 - tmp_qloop_0*tmp_qloop_19; const real_t jac_blending_1_0 = tmp_qloop_14*tmp_qloop_3 - tmp_qloop_20*tmp_qloop_3; @@ -203,6 +217,11 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap::toMatrix_macro_2D( idx_t const real_t tmp_qloop_21 = jac_blending_0_0*jac_blending_1_1 - jac_blending_0_1*jac_blending_1_0; const real_t tmp_qloop_22 = 1.0 / (tmp_qloop_21); const real_t abs_det_jac_blending = tmp_qloop_21; + const real_t tmp_qloop_54 = abs_det_jac_affine_GRAY*abs_det_jac_blending*1.0 / (rho_dof_0*tmp_qloop_43 + rho_dof_1*tmp_qloop_49 + rho_dof_2*tmp_qloop_50 + rho_dof_3*tmp_qloop_38 + rho_dof_4*tmp_qloop_51 + rho_dof_5*tmp_qloop_53)*_data_q_w[q]; + const real_t tmp_qloop_55 = tmp_qloop_54*(1.0 - _data_q_p_0[q] - _data_q_p_1[q]); + const real_t tmp_qloop_59 = tmp_qloop_54*_data_q_p_0[q]; + const real_t tmp_qloop_61 = tmp_qloop_54*_data_q_p_1[q]; + const real_t tmp_qloop_65 = tmp_qloop_37*tmp_qloop_41*tmp_qloop_54; const real_t jac_blending_inv_0_0 = jac_blending_1_1*tmp_qloop_22; const real_t jac_blending_inv_0_1 = -jac_blending_0_1*tmp_qloop_22; const real_t jac_blending_inv_1_0 = -jac_blending_1_0*tmp_qloop_22; @@ -398,37 +417,52 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap::toMatrix_macro_2D( idx_t mat->addValues( _data_rowIdx, _data_colIdx, _data_mat ); } } + const real_t tmp_coords_jac_0_BLUE = 1.0 / (micro_edges_per_macro_edge_float)*1.0; + const real_t tmp_coords_jac_1_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0); + const real_t tmp_coords_jac_2_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1); + const real_t tmp_coords_jac_3_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0); + const real_t tmp_coords_jac_4_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1); + const real_t p_affine_const_0_0_BLUE = tmp_coords_jac_1_BLUE; + const real_t p_affine_const_0_1_BLUE = tmp_coords_jac_2_BLUE; + const real_t p_affine_const_1_0_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_3_BLUE; + const real_t p_affine_const_1_1_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_4_BLUE; + const real_t p_affine_const_2_0_BLUE = tmp_coords_jac_1_BLUE + tmp_coords_jac_3_BLUE; + const real_t p_affine_const_2_1_BLUE = tmp_coords_jac_2_BLUE + tmp_coords_jac_4_BLUE; + const real_t jac_affine_0_0_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_1_0_BLUE; + const real_t jac_affine_0_1_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_2_0_BLUE; + const real_t jac_affine_1_0_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_1_1_BLUE; + const real_t jac_affine_1_1_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_2_1_BLUE; + const real_t tmp_coords_jac_5_BLUE = jac_affine_0_0_BLUE*jac_affine_1_1_BLUE - jac_affine_0_1_BLUE*jac_affine_1_0_BLUE; + const real_t tmp_coords_jac_6_BLUE = 1.0 / (tmp_coords_jac_5_BLUE); + const real_t jac_affine_inv_0_0_BLUE = jac_affine_1_1_BLUE*tmp_coords_jac_6_BLUE; + const real_t jac_affine_inv_0_1_BLUE = -jac_affine_0_1_BLUE*tmp_coords_jac_6_BLUE; + const real_t jac_affine_inv_1_0_BLUE = -jac_affine_1_0_BLUE*tmp_coords_jac_6_BLUE; + const real_t jac_affine_inv_1_1_BLUE = jac_affine_0_0_BLUE*tmp_coords_jac_6_BLUE; + const real_t abs_det_jac_affine_BLUE = abs(tmp_coords_jac_5_BLUE); { /* FaceType.BLUE */ - const real_t tmp_coords_jac_0_BLUE = 1.0 / (micro_edges_per_macro_edge_float)*1.0; - const real_t tmp_coords_jac_1_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0); - const real_t tmp_coords_jac_2_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1); - const real_t tmp_coords_jac_3_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0); - const real_t tmp_coords_jac_4_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1); - const real_t p_affine_const_0_0_BLUE = tmp_coords_jac_1_BLUE; - const real_t p_affine_const_0_1_BLUE = tmp_coords_jac_2_BLUE; - const real_t p_affine_const_1_0_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_3_BLUE; - const real_t p_affine_const_1_1_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_4_BLUE; - const real_t p_affine_const_2_0_BLUE = tmp_coords_jac_1_BLUE + tmp_coords_jac_3_BLUE; - const real_t p_affine_const_2_1_BLUE = tmp_coords_jac_2_BLUE + tmp_coords_jac_4_BLUE; - const real_t jac_affine_0_0_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_1_0_BLUE; - const real_t jac_affine_0_1_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_2_0_BLUE; - const real_t jac_affine_1_0_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_1_1_BLUE; - const real_t jac_affine_1_1_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_2_1_BLUE; - const real_t tmp_coords_jac_5_BLUE = 1.0 / (jac_affine_0_0_BLUE*jac_affine_1_1_BLUE - jac_affine_0_1_BLUE*jac_affine_1_0_BLUE); - const real_t jac_affine_inv_0_0_BLUE = jac_affine_1_1_BLUE*tmp_coords_jac_5_BLUE; - const real_t jac_affine_inv_0_1_BLUE = -jac_affine_0_1_BLUE*tmp_coords_jac_5_BLUE; - const real_t jac_affine_inv_1_0_BLUE = -jac_affine_1_0_BLUE*tmp_coords_jac_5_BLUE; - const real_t jac_affine_inv_1_1_BLUE = jac_affine_0_0_BLUE*tmp_coords_jac_5_BLUE; for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1) for (int64_t ctr_0 = 0; ctr_0 < -ctr_1 + micro_edges_per_macro_edge - 1; ctr_0 += 1) { - const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1)); - const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1)); - const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1 + 1)); - const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1 + 1)); - const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1 + 1)); - const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1 + 1)); + + const int64_t phantom_ctr_0 = ctr_0; + real_t _data_float_loop_ctr_array_dim_0[4]; + _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0; + _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1; + _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2; + _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3; + real_t _data_float_loop_ctr_array_dim_1[4]; + _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1; + + const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; const real_t rho_dof_0 = _data_rhoVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; const real_t rho_dof_1 = _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; const real_t rho_dof_2 = _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]; @@ -474,18 +508,11 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap::toMatrix_macro_2D( idx_t for (int64_t q = 0; q < 4; q += 1) { const real_t tmp_qloop_0 = p_affine_0_0 + (-p_affine_0_0 + p_affine_1_0)*_data_q_p_0[q] + (-p_affine_0_0 + p_affine_2_0)*_data_q_p_1[q]; - const real_t tmp_qloop_1 = -rayVertex_1 + thrVertex_1; const real_t tmp_qloop_2 = (tmp_qloop_0*tmp_qloop_0); const real_t tmp_qloop_3 = p_affine_0_1 + (-p_affine_0_1 + p_affine_1_1)*_data_q_p_0[q] + (-p_affine_0_1 + p_affine_2_1)*_data_q_p_1[q]; const real_t tmp_qloop_4 = (tmp_qloop_3*tmp_qloop_3); const real_t tmp_qloop_5 = tmp_qloop_2 + tmp_qloop_4; const real_t tmp_qloop_6 = pow(tmp_qloop_5, -0.50000000000000000); - const real_t tmp_qloop_7 = rayVertex_0 - thrVertex_0; - const real_t tmp_qloop_8 = -tmp_qloop_7; - const real_t tmp_qloop_9 = 1.0 / (tmp_qloop_1*(-rayVertex_0 + refVertex_0) - tmp_qloop_8*(-rayVertex_1 + refVertex_1)); - const real_t tmp_qloop_10 = -radRayVertex + radRefVertex; - const real_t tmp_qloop_11 = tmp_qloop_10*tmp_qloop_9; - const real_t tmp_qloop_12 = tmp_qloop_11*1.0; const real_t tmp_qloop_13 = tmp_qloop_12*tmp_qloop_6; const real_t tmp_qloop_14 = tmp_qloop_1*tmp_qloop_13; const real_t tmp_qloop_15 = pow(tmp_qloop_5, -1.5000000000000000); @@ -524,11 +551,6 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap::toMatrix_macro_2D( idx_t const real_t tmp_qloop_51 = -tmp_qloop_38 + tmp_qloop_41*-4.0 + tmp_qloop_44; const real_t tmp_qloop_52 = tmp_qloop_39*4.0; const real_t tmp_qloop_53 = tmp_qloop_37 - tmp_qloop_38 - tmp_qloop_52; - const real_t tmp_qloop_54 = 1.0 / (rho_dof_0*tmp_qloop_43 + rho_dof_1*tmp_qloop_49 + rho_dof_2*tmp_qloop_50 + rho_dof_3*tmp_qloop_38 + rho_dof_4*tmp_qloop_51 + rho_dof_5*tmp_qloop_53)*_data_q_w[q]; - const real_t tmp_qloop_55 = tmp_qloop_54*(1.0 - _data_q_p_0[q] - _data_q_p_1[q]); - const real_t tmp_qloop_59 = tmp_qloop_54*_data_q_p_0[q]; - const real_t tmp_qloop_61 = tmp_qloop_54*_data_q_p_1[q]; - const real_t tmp_qloop_65 = tmp_qloop_37*tmp_qloop_41*tmp_qloop_54; const real_t jac_blending_0_0 = tmp_qloop_0*tmp_qloop_14 + tmp_qloop_18*tmp_qloop_4; const real_t jac_blending_0_1 = -tmp_qloop_0*tmp_qloop_15*tmp_qloop_16*tmp_qloop_3 - tmp_qloop_0*tmp_qloop_19; const real_t jac_blending_1_0 = tmp_qloop_14*tmp_qloop_3 - tmp_qloop_20*tmp_qloop_3; @@ -536,6 +558,11 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUAnnulusMap::toMatrix_macro_2D( idx_t const real_t tmp_qloop_21 = jac_blending_0_0*jac_blending_1_1 - jac_blending_0_1*jac_blending_1_0; const real_t tmp_qloop_22 = 1.0 / (tmp_qloop_21); const real_t abs_det_jac_blending = tmp_qloop_21; + const real_t tmp_qloop_54 = abs_det_jac_affine_BLUE*abs_det_jac_blending*1.0 / (rho_dof_0*tmp_qloop_43 + rho_dof_1*tmp_qloop_49 + rho_dof_2*tmp_qloop_50 + rho_dof_3*tmp_qloop_38 + rho_dof_4*tmp_qloop_51 + rho_dof_5*tmp_qloop_53)*_data_q_w[q]; + const real_t tmp_qloop_55 = tmp_qloop_54*(1.0 - _data_q_p_0[q] - _data_q_p_1[q]); + const real_t tmp_qloop_59 = tmp_qloop_54*_data_q_p_0[q]; + const real_t tmp_qloop_61 = tmp_qloop_54*_data_q_p_1[q]; + const real_t tmp_qloop_65 = tmp_qloop_37*tmp_qloop_41*tmp_qloop_54; const real_t jac_blending_inv_0_0 = jac_blending_1_1*tmp_qloop_22; const real_t jac_blending_inv_0_1 = -jac_blending_0_1*tmp_qloop_22; const real_t jac_blending_inv_1_0 = -jac_blending_1_0*tmp_qloop_22; diff --git a/operators/grad_rho_by_rho_dot_u/noarch/P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap_apply_macro_3D.cpp b/operators/grad_rho_by_rho_dot_u/noarch/P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap_apply_P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap_macro_3D.cpp similarity index 82% rename from operators/grad_rho_by_rho_dot_u/noarch/P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap_apply_macro_3D.cpp rename to operators/grad_rho_by_rho_dot_u/noarch/P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap_apply_P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap_macro_3D.cpp index 77f66b1a..dc411f79 100644 --- a/operators/grad_rho_by_rho_dot_u/noarch/P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap_apply_macro_3D.cpp +++ b/operators/grad_rho_by_rho_dot_u/noarch/P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap_apply_P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap_macro_3D.cpp @@ -47,6 +47,10 @@ + + + + @@ -60,7 +64,7 @@ namespace hyteg { namespace operatorgeneration { -void P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap::apply_macro_3D( real_t * RESTRICT _data_dst, real_t * RESTRICT _data_rhoEdge, real_t * RESTRICT _data_rhoVertex, real_t * RESTRICT _data_src_edge_0, real_t * RESTRICT _data_src_edge_1, real_t * RESTRICT _data_src_edge_2, real_t * RESTRICT _data_src_vertex_0, real_t * RESTRICT _data_src_vertex_1, real_t * RESTRICT _data_src_vertex_2, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const +void P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap::apply_P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap_macro_3D( real_t * RESTRICT _data_dst, real_t * RESTRICT _data_rhoEdge, real_t * RESTRICT _data_rhoVertex, real_t * RESTRICT _data_src_edge_0, real_t * RESTRICT _data_src_edge_1, real_t * RESTRICT _data_src_edge_2, real_t * RESTRICT _data_src_vertex_0, real_t * RESTRICT _data_src_vertex_1, real_t * RESTRICT _data_src_vertex_2, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const { { const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983}; @@ -71,61 +75,110 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap::apply_macro_3D( const real_t _data_q_p_2 [] = {0.25, 0.5, 0.16666666666666666, 0.16666666666666666, 0.16666666666666666}; + const real_t tmp_coords_jac_0_WHITE_UP = 1.0 / (micro_edges_per_macro_edge_float)*1.0; + const real_t p_affine_const_0_0_WHITE_UP = macro_vertex_coord_id_0comp0; + const real_t p_affine_const_0_1_WHITE_UP = macro_vertex_coord_id_0comp1; + const real_t p_affine_const_0_2_WHITE_UP = macro_vertex_coord_id_0comp2; + const real_t p_affine_const_1_0_WHITE_UP = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0); + const real_t p_affine_const_1_1_WHITE_UP = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1); + const real_t p_affine_const_1_2_WHITE_UP = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2); + const real_t p_affine_const_2_0_WHITE_UP = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0); + const real_t p_affine_const_2_1_WHITE_UP = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1); + const real_t p_affine_const_2_2_WHITE_UP = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2); + const real_t p_affine_const_3_0_WHITE_UP = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0); + const real_t p_affine_const_3_1_WHITE_UP = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1); + const real_t p_affine_const_3_2_WHITE_UP = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2); + const real_t jac_affine_0_0_WHITE_UP = -p_affine_const_0_0_WHITE_UP + p_affine_const_1_0_WHITE_UP; + const real_t jac_affine_0_1_WHITE_UP = -p_affine_const_0_0_WHITE_UP + p_affine_const_2_0_WHITE_UP; + const real_t jac_affine_0_2_WHITE_UP = -p_affine_const_0_0_WHITE_UP + p_affine_const_3_0_WHITE_UP; + const real_t jac_affine_1_0_WHITE_UP = -p_affine_const_0_1_WHITE_UP + p_affine_const_1_1_WHITE_UP; + const real_t jac_affine_1_1_WHITE_UP = -p_affine_const_0_1_WHITE_UP + p_affine_const_2_1_WHITE_UP; + const real_t tmp_coords_jac_5_WHITE_UP = jac_affine_0_2_WHITE_UP*jac_affine_1_1_WHITE_UP; + const real_t jac_affine_1_2_WHITE_UP = -p_affine_const_0_1_WHITE_UP + p_affine_const_3_1_WHITE_UP; + const real_t tmp_coords_jac_3_WHITE_UP = jac_affine_0_1_WHITE_UP*jac_affine_1_2_WHITE_UP; + const real_t jac_affine_2_0_WHITE_UP = -p_affine_const_0_2_WHITE_UP + p_affine_const_1_2_WHITE_UP; + const real_t jac_affine_2_1_WHITE_UP = -p_affine_const_0_2_WHITE_UP + p_affine_const_2_2_WHITE_UP; + const real_t tmp_coords_jac_2_WHITE_UP = jac_affine_1_2_WHITE_UP*jac_affine_2_1_WHITE_UP; + const real_t jac_affine_2_2_WHITE_UP = -p_affine_const_0_2_WHITE_UP + p_affine_const_3_2_WHITE_UP; + const real_t tmp_coords_jac_1_WHITE_UP = jac_affine_1_1_WHITE_UP*jac_affine_2_2_WHITE_UP; + const real_t tmp_coords_jac_4_WHITE_UP = jac_affine_0_1_WHITE_UP*jac_affine_2_2_WHITE_UP; + const real_t tmp_coords_jac_6_WHITE_UP = jac_affine_0_0_WHITE_UP*tmp_coords_jac_1_WHITE_UP - jac_affine_0_0_WHITE_UP*tmp_coords_jac_2_WHITE_UP + jac_affine_0_2_WHITE_UP*jac_affine_1_0_WHITE_UP*jac_affine_2_1_WHITE_UP - jac_affine_1_0_WHITE_UP*tmp_coords_jac_4_WHITE_UP + jac_affine_2_0_WHITE_UP*tmp_coords_jac_3_WHITE_UP - jac_affine_2_0_WHITE_UP*tmp_coords_jac_5_WHITE_UP; + const real_t tmp_coords_jac_7_WHITE_UP = 1.0 / (tmp_coords_jac_6_WHITE_UP); + const real_t jac_affine_inv_0_0_WHITE_UP = tmp_coords_jac_7_WHITE_UP*(tmp_coords_jac_1_WHITE_UP - tmp_coords_jac_2_WHITE_UP); + const real_t jac_affine_inv_0_1_WHITE_UP = tmp_coords_jac_7_WHITE_UP*(jac_affine_0_2_WHITE_UP*jac_affine_2_1_WHITE_UP - tmp_coords_jac_4_WHITE_UP); + const real_t jac_affine_inv_0_2_WHITE_UP = tmp_coords_jac_7_WHITE_UP*(tmp_coords_jac_3_WHITE_UP - tmp_coords_jac_5_WHITE_UP); + const real_t jac_affine_inv_1_0_WHITE_UP = tmp_coords_jac_7_WHITE_UP*(-jac_affine_1_0_WHITE_UP*jac_affine_2_2_WHITE_UP + jac_affine_1_2_WHITE_UP*jac_affine_2_0_WHITE_UP); + const real_t jac_affine_inv_1_1_WHITE_UP = tmp_coords_jac_7_WHITE_UP*(jac_affine_0_0_WHITE_UP*jac_affine_2_2_WHITE_UP - jac_affine_0_2_WHITE_UP*jac_affine_2_0_WHITE_UP); + const real_t jac_affine_inv_1_2_WHITE_UP = tmp_coords_jac_7_WHITE_UP*(-jac_affine_0_0_WHITE_UP*jac_affine_1_2_WHITE_UP + jac_affine_0_2_WHITE_UP*jac_affine_1_0_WHITE_UP); + const real_t jac_affine_inv_2_0_WHITE_UP = tmp_coords_jac_7_WHITE_UP*(jac_affine_1_0_WHITE_UP*jac_affine_2_1_WHITE_UP - jac_affine_1_1_WHITE_UP*jac_affine_2_0_WHITE_UP); + const real_t jac_affine_inv_2_1_WHITE_UP = tmp_coords_jac_7_WHITE_UP*(-jac_affine_0_0_WHITE_UP*jac_affine_2_1_WHITE_UP + jac_affine_0_1_WHITE_UP*jac_affine_2_0_WHITE_UP); + const real_t jac_affine_inv_2_2_WHITE_UP = tmp_coords_jac_7_WHITE_UP*(jac_affine_0_0_WHITE_UP*jac_affine_1_1_WHITE_UP - jac_affine_0_1_WHITE_UP*jac_affine_1_0_WHITE_UP); + const real_t abs_det_jac_affine_WHITE_UP = abs(tmp_coords_jac_6_WHITE_UP); + const real_t tmp_qloop_0 = rayVertex_1 - refVertex_1; + const real_t tmp_qloop_1 = -rayVertex_0; + const real_t tmp_qloop_2 = -forVertex_0 - tmp_qloop_1; + const real_t tmp_qloop_3 = rayVertex_2 - thrVertex_2; + const real_t tmp_qloop_4 = tmp_qloop_2*tmp_qloop_3; + const real_t tmp_qloop_5 = rayVertex_2 - refVertex_2; + const real_t tmp_qloop_6 = rayVertex_1 - thrVertex_1; + const real_t tmp_qloop_7 = tmp_qloop_2*tmp_qloop_6; + const real_t tmp_qloop_8 = -rayVertex_1; + const real_t tmp_qloop_9 = -forVertex_1 - tmp_qloop_8; + const real_t tmp_qloop_10 = rayVertex_0 - thrVertex_0; + const real_t tmp_qloop_11 = rayVertex_0 - refVertex_0; + const real_t tmp_qloop_12 = -rayVertex_2; + const real_t tmp_qloop_13 = -forVertex_2 - tmp_qloop_12; + const real_t tmp_qloop_14 = tmp_qloop_3*tmp_qloop_9; + const real_t tmp_qloop_15 = tmp_qloop_10*tmp_qloop_13; + const real_t tmp_qloop_16 = -tmp_qloop_0*tmp_qloop_15 + tmp_qloop_0*tmp_qloop_4 + tmp_qloop_10*tmp_qloop_5*tmp_qloop_9 + tmp_qloop_11*tmp_qloop_13*tmp_qloop_6 - tmp_qloop_11*tmp_qloop_14 - tmp_qloop_5*tmp_qloop_7; + const real_t tmp_qloop_17 = 1.0 / (tmp_qloop_16); + const real_t tmp_qloop_20 = radRayVertex*tmp_qloop_16; + const real_t tmp_qloop_21 = radRayVertex - radRefVertex; + const real_t tmp_qloop_32 = -tmp_qloop_13*tmp_qloop_6 + tmp_qloop_14; + const real_t tmp_qloop_33 = -tmp_qloop_21*tmp_qloop_32; + const real_t tmp_qloop_39 = tmp_qloop_10*tmp_qloop_13 - tmp_qloop_4; + const real_t tmp_qloop_40 = tmp_qloop_21*tmp_qloop_39; + const real_t tmp_qloop_45 = -tmp_qloop_10*tmp_qloop_9 + tmp_qloop_7; + const real_t tmp_qloop_46 = -tmp_qloop_21*tmp_qloop_45; + const real_t tmp_qloop_53 = -tmp_qloop_21*tmp_qloop_39; + const real_t tmp_qloop_68 = tmp_qloop_21*tmp_qloop_32; + const real_t tmp_qloop_72 = tmp_qloop_21*tmp_qloop_45; { /* CellType.WHITE_UP */ - const real_t tmp_coords_jac_0_WHITE_UP = 1.0 / (micro_edges_per_macro_edge_float)*1.0; - const real_t p_affine_const_0_0_WHITE_UP = macro_vertex_coord_id_0comp0; - const real_t p_affine_const_0_1_WHITE_UP = macro_vertex_coord_id_0comp1; - const real_t p_affine_const_0_2_WHITE_UP = macro_vertex_coord_id_0comp2; - const real_t p_affine_const_1_0_WHITE_UP = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0); - const real_t p_affine_const_1_1_WHITE_UP = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1); - const real_t p_affine_const_1_2_WHITE_UP = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2); - const real_t p_affine_const_2_0_WHITE_UP = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0); - const real_t p_affine_const_2_1_WHITE_UP = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1); - const real_t p_affine_const_2_2_WHITE_UP = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2); - const real_t p_affine_const_3_0_WHITE_UP = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0); - const real_t p_affine_const_3_1_WHITE_UP = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1); - const real_t p_affine_const_3_2_WHITE_UP = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2); - const real_t jac_affine_0_0_WHITE_UP = -p_affine_const_0_0_WHITE_UP + p_affine_const_1_0_WHITE_UP; - const real_t jac_affine_0_1_WHITE_UP = -p_affine_const_0_0_WHITE_UP + p_affine_const_2_0_WHITE_UP; - const real_t jac_affine_0_2_WHITE_UP = -p_affine_const_0_0_WHITE_UP + p_affine_const_3_0_WHITE_UP; - const real_t jac_affine_1_0_WHITE_UP = -p_affine_const_0_1_WHITE_UP + p_affine_const_1_1_WHITE_UP; - const real_t jac_affine_1_1_WHITE_UP = -p_affine_const_0_1_WHITE_UP + p_affine_const_2_1_WHITE_UP; - const real_t tmp_coords_jac_5_WHITE_UP = jac_affine_0_2_WHITE_UP*jac_affine_1_1_WHITE_UP; - const real_t jac_affine_1_2_WHITE_UP = -p_affine_const_0_1_WHITE_UP + p_affine_const_3_1_WHITE_UP; - const real_t tmp_coords_jac_3_WHITE_UP = jac_affine_0_1_WHITE_UP*jac_affine_1_2_WHITE_UP; - const real_t jac_affine_2_0_WHITE_UP = -p_affine_const_0_2_WHITE_UP + p_affine_const_1_2_WHITE_UP; - const real_t jac_affine_2_1_WHITE_UP = -p_affine_const_0_2_WHITE_UP + p_affine_const_2_2_WHITE_UP; - const real_t tmp_coords_jac_2_WHITE_UP = jac_affine_1_2_WHITE_UP*jac_affine_2_1_WHITE_UP; - const real_t jac_affine_2_2_WHITE_UP = -p_affine_const_0_2_WHITE_UP + p_affine_const_3_2_WHITE_UP; - const real_t tmp_coords_jac_1_WHITE_UP = jac_affine_1_1_WHITE_UP*jac_affine_2_2_WHITE_UP; - const real_t tmp_coords_jac_4_WHITE_UP = jac_affine_0_1_WHITE_UP*jac_affine_2_2_WHITE_UP; - const real_t tmp_coords_jac_6_WHITE_UP = 1.0 / (jac_affine_0_0_WHITE_UP*tmp_coords_jac_1_WHITE_UP - jac_affine_0_0_WHITE_UP*tmp_coords_jac_2_WHITE_UP + jac_affine_0_2_WHITE_UP*jac_affine_1_0_WHITE_UP*jac_affine_2_1_WHITE_UP - jac_affine_1_0_WHITE_UP*tmp_coords_jac_4_WHITE_UP + jac_affine_2_0_WHITE_UP*tmp_coords_jac_3_WHITE_UP - jac_affine_2_0_WHITE_UP*tmp_coords_jac_5_WHITE_UP); - const real_t jac_affine_inv_0_0_WHITE_UP = tmp_coords_jac_6_WHITE_UP*(tmp_coords_jac_1_WHITE_UP - tmp_coords_jac_2_WHITE_UP); - const real_t jac_affine_inv_0_1_WHITE_UP = tmp_coords_jac_6_WHITE_UP*(jac_affine_0_2_WHITE_UP*jac_affine_2_1_WHITE_UP - tmp_coords_jac_4_WHITE_UP); - const real_t jac_affine_inv_0_2_WHITE_UP = tmp_coords_jac_6_WHITE_UP*(tmp_coords_jac_3_WHITE_UP - tmp_coords_jac_5_WHITE_UP); - const real_t jac_affine_inv_1_0_WHITE_UP = tmp_coords_jac_6_WHITE_UP*(-jac_affine_1_0_WHITE_UP*jac_affine_2_2_WHITE_UP + jac_affine_1_2_WHITE_UP*jac_affine_2_0_WHITE_UP); - const real_t jac_affine_inv_1_1_WHITE_UP = tmp_coords_jac_6_WHITE_UP*(jac_affine_0_0_WHITE_UP*jac_affine_2_2_WHITE_UP - jac_affine_0_2_WHITE_UP*jac_affine_2_0_WHITE_UP); - const real_t jac_affine_inv_1_2_WHITE_UP = tmp_coords_jac_6_WHITE_UP*(-jac_affine_0_0_WHITE_UP*jac_affine_1_2_WHITE_UP + jac_affine_0_2_WHITE_UP*jac_affine_1_0_WHITE_UP); - const real_t jac_affine_inv_2_0_WHITE_UP = tmp_coords_jac_6_WHITE_UP*(jac_affine_1_0_WHITE_UP*jac_affine_2_1_WHITE_UP - jac_affine_1_1_WHITE_UP*jac_affine_2_0_WHITE_UP); - const real_t jac_affine_inv_2_1_WHITE_UP = tmp_coords_jac_6_WHITE_UP*(-jac_affine_0_0_WHITE_UP*jac_affine_2_1_WHITE_UP + jac_affine_0_1_WHITE_UP*jac_affine_2_0_WHITE_UP); - const real_t jac_affine_inv_2_2_WHITE_UP = tmp_coords_jac_6_WHITE_UP*(jac_affine_0_0_WHITE_UP*jac_affine_1_1_WHITE_UP - jac_affine_0_1_WHITE_UP*jac_affine_1_0_WHITE_UP); for (int64_t ctr_2 = 0; ctr_2 < micro_edges_per_macro_edge; ctr_2 += 1) for (int64_t ctr_1 = 0; ctr_1 < -ctr_2 + micro_edges_per_macro_edge; ctr_1 += 1) for (int64_t ctr_0 = 0; ctr_0 < -ctr_1 - ctr_2 + micro_edges_per_macro_edge; ctr_0 += 1) { - const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2)); - const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2)); - const real_t p_affine_0_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2)); - const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2)); - const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2)); - const real_t p_affine_1_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2)); - const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2)); - const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2)); - const real_t p_affine_2_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2)); - const real_t p_affine_3_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2 + 1)); - const real_t p_affine_3_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2 + 1)); - const real_t p_affine_3_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2 + 1)); + + const int64_t phantom_ctr_0 = ctr_0; + real_t _data_float_loop_ctr_array_dim_0[4]; + _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0; + _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1; + _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2; + _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3; + real_t _data_float_loop_ctr_array_dim_1[4]; + _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1; + real_t _data_float_loop_ctr_array_dim_2[4]; + _data_float_loop_ctr_array_dim_2[0] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[1] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[2] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[3] = (real_t) ctr_2; + + const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_0_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_2_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_3_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_3_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_3_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; const real_t src_dof_0 = _data_src_vertex_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]; const real_t src_dof_1 = _data_src_vertex_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]; const real_t src_dof_2 = _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]; @@ -288,28 +341,8 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap::apply_macro_3D( real_t q_acc_3_29 = 0.0; for (int64_t q = 0; q < 5; q += 1) { - const real_t tmp_qloop_0 = rayVertex_1 - refVertex_1; - const real_t tmp_qloop_1 = -rayVertex_0; - const real_t tmp_qloop_2 = -forVertex_0 - tmp_qloop_1; - const real_t tmp_qloop_3 = rayVertex_2 - thrVertex_2; - const real_t tmp_qloop_4 = tmp_qloop_2*tmp_qloop_3; - const real_t tmp_qloop_5 = rayVertex_2 - refVertex_2; - const real_t tmp_qloop_6 = rayVertex_1 - thrVertex_1; - const real_t tmp_qloop_7 = tmp_qloop_2*tmp_qloop_6; - const real_t tmp_qloop_8 = -rayVertex_1; - const real_t tmp_qloop_9 = -forVertex_1 - tmp_qloop_8; - const real_t tmp_qloop_10 = rayVertex_0 - thrVertex_0; - const real_t tmp_qloop_11 = rayVertex_0 - refVertex_0; - const real_t tmp_qloop_12 = -rayVertex_2; - const real_t tmp_qloop_13 = -forVertex_2 - tmp_qloop_12; - const real_t tmp_qloop_14 = tmp_qloop_3*tmp_qloop_9; - const real_t tmp_qloop_15 = tmp_qloop_10*tmp_qloop_13; - const real_t tmp_qloop_16 = -tmp_qloop_0*tmp_qloop_15 + tmp_qloop_0*tmp_qloop_4 + tmp_qloop_10*tmp_qloop_5*tmp_qloop_9 + tmp_qloop_11*tmp_qloop_13*tmp_qloop_6 - tmp_qloop_11*tmp_qloop_14 - tmp_qloop_5*tmp_qloop_7; - const real_t tmp_qloop_17 = 1.0 / (tmp_qloop_16); const real_t tmp_qloop_18 = p_affine_0_0 + (-p_affine_0_0 + p_affine_1_0)*_data_q_p_0[q] + (-p_affine_0_0 + p_affine_2_0)*_data_q_p_1[q] + (-p_affine_0_0 + p_affine_3_0)*_data_q_p_2[q]; const real_t tmp_qloop_19 = (tmp_qloop_18*tmp_qloop_18); - const real_t tmp_qloop_20 = radRayVertex*tmp_qloop_16; - const real_t tmp_qloop_21 = radRayVertex - radRefVertex; const real_t tmp_qloop_22 = p_affine_0_2 + (-p_affine_0_2 + p_affine_1_2)*_data_q_p_0[q] + (-p_affine_0_2 + p_affine_2_2)*_data_q_p_1[q] + (-p_affine_0_2 + p_affine_3_2)*_data_q_p_2[q]; const real_t tmp_qloop_23 = -tmp_qloop_12 - tmp_qloop_22; const real_t tmp_qloop_24 = -tmp_qloop_1 - tmp_qloop_18; @@ -320,28 +353,21 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap::apply_macro_3D( const real_t tmp_qloop_29 = (tmp_qloop_25*tmp_qloop_25); const real_t tmp_qloop_30 = (tmp_qloop_22*tmp_qloop_22); const real_t tmp_qloop_31 = tmp_qloop_19 + tmp_qloop_29 + tmp_qloop_30; - const real_t tmp_qloop_32 = -tmp_qloop_13*tmp_qloop_6 + tmp_qloop_14; - const real_t tmp_qloop_33 = -tmp_qloop_21*tmp_qloop_32; const real_t tmp_qloop_34 = tmp_qloop_18*tmp_qloop_33; const real_t tmp_qloop_35 = tmp_qloop_28 + tmp_qloop_34; const real_t tmp_qloop_36 = tmp_qloop_17*(-tmp_qloop_19*tmp_qloop_28 + tmp_qloop_31*tmp_qloop_35); const real_t tmp_qloop_37 = pow(tmp_qloop_31, -1.5000000000000000); const real_t tmp_qloop_38 = tmp_qloop_37*1.0; - const real_t tmp_qloop_39 = tmp_qloop_10*tmp_qloop_13 - tmp_qloop_4; - const real_t tmp_qloop_40 = tmp_qloop_21*tmp_qloop_39; const real_t tmp_qloop_41 = tmp_qloop_31*tmp_qloop_40; const real_t tmp_qloop_42 = -tmp_qloop_25*tmp_qloop_28 - tmp_qloop_41; const real_t tmp_qloop_43 = tmp_qloop_17*tmp_qloop_38; const real_t tmp_qloop_44 = tmp_qloop_42*tmp_qloop_43; - const real_t tmp_qloop_45 = -tmp_qloop_10*tmp_qloop_9 + tmp_qloop_7; - const real_t tmp_qloop_46 = -tmp_qloop_21*tmp_qloop_45; const real_t tmp_qloop_47 = -tmp_qloop_28; const real_t tmp_qloop_48 = tmp_qloop_22*tmp_qloop_47; const real_t tmp_qloop_49 = tmp_qloop_31*tmp_qloop_46 + tmp_qloop_48; const real_t tmp_qloop_50 = tmp_qloop_43*tmp_qloop_49; const real_t tmp_qloop_51 = tmp_qloop_18*tmp_qloop_47 + tmp_qloop_31*tmp_qloop_33; const real_t tmp_qloop_52 = tmp_qloop_43*tmp_qloop_51; - const real_t tmp_qloop_53 = -tmp_qloop_21*tmp_qloop_39; const real_t tmp_qloop_54 = tmp_qloop_25*tmp_qloop_53 + tmp_qloop_28; const real_t tmp_qloop_55 = tmp_qloop_29*tmp_qloop_47 + tmp_qloop_31*tmp_qloop_54; const real_t tmp_qloop_56 = tmp_qloop_22*tmp_qloop_46; @@ -349,11 +375,9 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap::apply_macro_3D( const real_t tmp_qloop_58 = tmp_qloop_30*tmp_qloop_47 + tmp_qloop_31*tmp_qloop_57; const real_t tmp_qloop_66 = pow(tmp_qloop_31, -2.5000000000000000)*3.0; const real_t tmp_qloop_67 = tmp_qloop_36*tmp_qloop_66; - const real_t tmp_qloop_68 = tmp_qloop_21*tmp_qloop_32; const real_t tmp_qloop_69 = tmp_qloop_18*2.0; const real_t tmp_qloop_70 = -tmp_qloop_41; const real_t tmp_qloop_71 = tmp_qloop_35*2.0; - const real_t tmp_qloop_72 = tmp_qloop_21*tmp_qloop_45; const real_t tmp_qloop_73 = -tmp_qloop_31*tmp_qloop_72; const real_t tmp_qloop_74 = -tmp_qloop_20 + tmp_qloop_27; const real_t tmp_qloop_75 = tmp_qloop_18*tmp_qloop_68 + tmp_qloop_34*2.0 + tmp_qloop_74; @@ -415,14 +439,7 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap::apply_macro_3D( const real_t tmp_qloop_132 = tmp_qloop_109 - tmp_qloop_110 - tmp_qloop_112 - tmp_qloop_131; const real_t tmp_qloop_133 = tmp_qloop_103*4.0; const real_t tmp_qloop_134 = tmp_qloop_111 - tmp_qloop_114 - tmp_qloop_133; - const real_t tmp_qloop_135 = 1.0 / (rho_dof_0*tmp_qloop_115 + rho_dof_1*tmp_qloop_127 + rho_dof_2*tmp_qloop_128 + rho_dof_3*tmp_qloop_129 + rho_dof_4*tmp_qloop_110 + rho_dof_5*tmp_qloop_113 + rho_dof_6*tmp_qloop_112 + rho_dof_7*tmp_qloop_130 + rho_dof_8*tmp_qloop_132 + rho_dof_9*tmp_qloop_134)*_data_q_w[q]; - const real_t tmp_qloop_136 = tmp_qloop_135*(1.0 - _data_q_p_0[q] - _data_q_p_1[q] - _data_q_p_2[q]); - const real_t tmp_qloop_142 = tmp_qloop_135*_data_q_p_0[q]; - const real_t tmp_qloop_144 = tmp_qloop_135*_data_q_p_2[q]; - const real_t tmp_qloop_147 = tmp_qloop_135*_data_q_p_1[q]; const real_t tmp_qloop_158 = tmp_qloop_105*tmp_qloop_111; - const real_t tmp_qloop_159 = tmp_qloop_135*tmp_qloop_158; - const real_t tmp_qloop_161 = tmp_qloop_107*tmp_qloop_135; const real_t jac_blending_0_0 = tmp_qloop_36*tmp_qloop_38; const real_t jac_blending_0_1 = tmp_qloop_18*tmp_qloop_44; const real_t jac_blending_0_2 = tmp_qloop_18*tmp_qloop_50; @@ -440,6 +457,13 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap::apply_macro_3D( const real_t tmp_qloop_64 = jac_blending_0_0*tmp_qloop_59 - jac_blending_0_0*tmp_qloop_61 + jac_blending_0_2*jac_blending_1_0*jac_blending_2_1 - jac_blending_1_0*tmp_qloop_62 + jac_blending_2_0*tmp_qloop_60 - jac_blending_2_0*tmp_qloop_63; const real_t tmp_qloop_65 = 1.0 / (tmp_qloop_64); const real_t abs_det_jac_blending = tmp_qloop_64; + const real_t tmp_qloop_135 = abs_det_jac_affine_WHITE_UP*abs_det_jac_blending*1.0 / (rho_dof_0*tmp_qloop_115 + rho_dof_1*tmp_qloop_127 + rho_dof_2*tmp_qloop_128 + rho_dof_3*tmp_qloop_129 + rho_dof_4*tmp_qloop_110 + rho_dof_5*tmp_qloop_113 + rho_dof_6*tmp_qloop_112 + rho_dof_7*tmp_qloop_130 + rho_dof_8*tmp_qloop_132 + rho_dof_9*tmp_qloop_134)*_data_q_w[q]; + const real_t tmp_qloop_136 = tmp_qloop_135*(1.0 - _data_q_p_0[q] - _data_q_p_1[q] - _data_q_p_2[q]); + const real_t tmp_qloop_142 = tmp_qloop_135*_data_q_p_0[q]; + const real_t tmp_qloop_144 = tmp_qloop_135*_data_q_p_2[q]; + const real_t tmp_qloop_147 = tmp_qloop_135*_data_q_p_1[q]; + const real_t tmp_qloop_159 = tmp_qloop_135*tmp_qloop_158; + const real_t tmp_qloop_161 = tmp_qloop_107*tmp_qloop_135; const real_t jac_blending_inv_0_0 = tmp_qloop_65*(tmp_qloop_59 - tmp_qloop_61); const real_t jac_blending_inv_0_1 = tmp_qloop_65*(jac_blending_0_2*jac_blending_2_1 - tmp_qloop_62); const real_t jac_blending_inv_0_2 = tmp_qloop_65*(tmp_qloop_60 - tmp_qloop_63); @@ -749,73 +773,93 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap::apply_macro_3D( _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))] = elMatVec_3 + _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]; } } + const real_t tmp_coords_jac_0_WHITE_DOWN = 1.0 / (micro_edges_per_macro_edge_float)*1.0; + const real_t tmp_coords_jac_1_WHITE_DOWN = tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0); + const real_t tmp_coords_jac_2_WHITE_DOWN = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0); + const real_t tmp_coords_jac_3_WHITE_DOWN = tmp_coords_jac_1_WHITE_DOWN + tmp_coords_jac_2_WHITE_DOWN; + const real_t tmp_coords_jac_4_WHITE_DOWN = tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1); + const real_t tmp_coords_jac_5_WHITE_DOWN = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1); + const real_t tmp_coords_jac_6_WHITE_DOWN = tmp_coords_jac_4_WHITE_DOWN + tmp_coords_jac_5_WHITE_DOWN; + const real_t tmp_coords_jac_7_WHITE_DOWN = tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2); + const real_t tmp_coords_jac_8_WHITE_DOWN = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2); + const real_t tmp_coords_jac_9_WHITE_DOWN = tmp_coords_jac_7_WHITE_DOWN + tmp_coords_jac_8_WHITE_DOWN; + const real_t tmp_coords_jac_10_WHITE_DOWN = tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0); + const real_t tmp_coords_jac_11_WHITE_DOWN = tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1); + const real_t tmp_coords_jac_12_WHITE_DOWN = tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2); + const real_t p_affine_const_0_0_WHITE_DOWN = tmp_coords_jac_3_WHITE_DOWN; + const real_t p_affine_const_0_1_WHITE_DOWN = tmp_coords_jac_6_WHITE_DOWN; + const real_t p_affine_const_0_2_WHITE_DOWN = tmp_coords_jac_9_WHITE_DOWN; + const real_t p_affine_const_1_0_WHITE_DOWN = tmp_coords_jac_10_WHITE_DOWN + tmp_coords_jac_2_WHITE_DOWN; + const real_t p_affine_const_1_1_WHITE_DOWN = tmp_coords_jac_11_WHITE_DOWN + tmp_coords_jac_5_WHITE_DOWN; + const real_t p_affine_const_1_2_WHITE_DOWN = tmp_coords_jac_12_WHITE_DOWN + tmp_coords_jac_8_WHITE_DOWN; + const real_t p_affine_const_2_0_WHITE_DOWN = macro_vertex_coord_id_0comp0 + tmp_coords_jac_10_WHITE_DOWN + tmp_coords_jac_1_WHITE_DOWN; + const real_t p_affine_const_2_1_WHITE_DOWN = macro_vertex_coord_id_0comp1 + tmp_coords_jac_11_WHITE_DOWN + tmp_coords_jac_4_WHITE_DOWN; + const real_t p_affine_const_2_2_WHITE_DOWN = macro_vertex_coord_id_0comp2 + tmp_coords_jac_12_WHITE_DOWN + tmp_coords_jac_7_WHITE_DOWN; + const real_t p_affine_const_3_0_WHITE_DOWN = tmp_coords_jac_10_WHITE_DOWN + tmp_coords_jac_3_WHITE_DOWN; + const real_t p_affine_const_3_1_WHITE_DOWN = tmp_coords_jac_11_WHITE_DOWN + tmp_coords_jac_6_WHITE_DOWN; + const real_t p_affine_const_3_2_WHITE_DOWN = tmp_coords_jac_12_WHITE_DOWN + tmp_coords_jac_9_WHITE_DOWN; + const real_t jac_affine_0_0_WHITE_DOWN = -p_affine_const_0_0_WHITE_DOWN + p_affine_const_1_0_WHITE_DOWN; + const real_t jac_affine_0_1_WHITE_DOWN = -p_affine_const_0_0_WHITE_DOWN + p_affine_const_2_0_WHITE_DOWN; + const real_t jac_affine_0_2_WHITE_DOWN = -p_affine_const_0_0_WHITE_DOWN + p_affine_const_3_0_WHITE_DOWN; + const real_t jac_affine_1_0_WHITE_DOWN = -p_affine_const_0_1_WHITE_DOWN + p_affine_const_1_1_WHITE_DOWN; + const real_t jac_affine_1_1_WHITE_DOWN = -p_affine_const_0_1_WHITE_DOWN + p_affine_const_2_1_WHITE_DOWN; + const real_t tmp_coords_jac_17_WHITE_DOWN = jac_affine_0_2_WHITE_DOWN*jac_affine_1_1_WHITE_DOWN; + const real_t jac_affine_1_2_WHITE_DOWN = -p_affine_const_0_1_WHITE_DOWN + p_affine_const_3_1_WHITE_DOWN; + const real_t tmp_coords_jac_15_WHITE_DOWN = jac_affine_0_1_WHITE_DOWN*jac_affine_1_2_WHITE_DOWN; + const real_t jac_affine_2_0_WHITE_DOWN = -p_affine_const_0_2_WHITE_DOWN + p_affine_const_1_2_WHITE_DOWN; + const real_t jac_affine_2_1_WHITE_DOWN = -p_affine_const_0_2_WHITE_DOWN + p_affine_const_2_2_WHITE_DOWN; + const real_t tmp_coords_jac_14_WHITE_DOWN = jac_affine_1_2_WHITE_DOWN*jac_affine_2_1_WHITE_DOWN; + const real_t jac_affine_2_2_WHITE_DOWN = -p_affine_const_0_2_WHITE_DOWN + p_affine_const_3_2_WHITE_DOWN; + const real_t tmp_coords_jac_13_WHITE_DOWN = jac_affine_1_1_WHITE_DOWN*jac_affine_2_2_WHITE_DOWN; + const real_t tmp_coords_jac_16_WHITE_DOWN = jac_affine_0_1_WHITE_DOWN*jac_affine_2_2_WHITE_DOWN; + const real_t tmp_coords_jac_18_WHITE_DOWN = jac_affine_0_0_WHITE_DOWN*tmp_coords_jac_13_WHITE_DOWN - jac_affine_0_0_WHITE_DOWN*tmp_coords_jac_14_WHITE_DOWN + jac_affine_0_2_WHITE_DOWN*jac_affine_1_0_WHITE_DOWN*jac_affine_2_1_WHITE_DOWN - jac_affine_1_0_WHITE_DOWN*tmp_coords_jac_16_WHITE_DOWN + jac_affine_2_0_WHITE_DOWN*tmp_coords_jac_15_WHITE_DOWN - jac_affine_2_0_WHITE_DOWN*tmp_coords_jac_17_WHITE_DOWN; + const real_t tmp_coords_jac_19_WHITE_DOWN = 1.0 / (tmp_coords_jac_18_WHITE_DOWN); + const real_t jac_affine_inv_0_0_WHITE_DOWN = tmp_coords_jac_19_WHITE_DOWN*(tmp_coords_jac_13_WHITE_DOWN - tmp_coords_jac_14_WHITE_DOWN); + const real_t jac_affine_inv_0_1_WHITE_DOWN = tmp_coords_jac_19_WHITE_DOWN*(jac_affine_0_2_WHITE_DOWN*jac_affine_2_1_WHITE_DOWN - tmp_coords_jac_16_WHITE_DOWN); + const real_t jac_affine_inv_0_2_WHITE_DOWN = tmp_coords_jac_19_WHITE_DOWN*(tmp_coords_jac_15_WHITE_DOWN - tmp_coords_jac_17_WHITE_DOWN); + const real_t jac_affine_inv_1_0_WHITE_DOWN = tmp_coords_jac_19_WHITE_DOWN*(-jac_affine_1_0_WHITE_DOWN*jac_affine_2_2_WHITE_DOWN + jac_affine_1_2_WHITE_DOWN*jac_affine_2_0_WHITE_DOWN); + const real_t jac_affine_inv_1_1_WHITE_DOWN = tmp_coords_jac_19_WHITE_DOWN*(jac_affine_0_0_WHITE_DOWN*jac_affine_2_2_WHITE_DOWN - jac_affine_0_2_WHITE_DOWN*jac_affine_2_0_WHITE_DOWN); + const real_t jac_affine_inv_1_2_WHITE_DOWN = tmp_coords_jac_19_WHITE_DOWN*(-jac_affine_0_0_WHITE_DOWN*jac_affine_1_2_WHITE_DOWN + jac_affine_0_2_WHITE_DOWN*jac_affine_1_0_WHITE_DOWN); + const real_t jac_affine_inv_2_0_WHITE_DOWN = tmp_coords_jac_19_WHITE_DOWN*(jac_affine_1_0_WHITE_DOWN*jac_affine_2_1_WHITE_DOWN - jac_affine_1_1_WHITE_DOWN*jac_affine_2_0_WHITE_DOWN); + const real_t jac_affine_inv_2_1_WHITE_DOWN = tmp_coords_jac_19_WHITE_DOWN*(-jac_affine_0_0_WHITE_DOWN*jac_affine_2_1_WHITE_DOWN + jac_affine_0_1_WHITE_DOWN*jac_affine_2_0_WHITE_DOWN); + const real_t jac_affine_inv_2_2_WHITE_DOWN = tmp_coords_jac_19_WHITE_DOWN*(jac_affine_0_0_WHITE_DOWN*jac_affine_1_1_WHITE_DOWN - jac_affine_0_1_WHITE_DOWN*jac_affine_1_0_WHITE_DOWN); + const real_t abs_det_jac_affine_WHITE_DOWN = abs(tmp_coords_jac_18_WHITE_DOWN); { /* CellType.WHITE_DOWN */ - const real_t tmp_coords_jac_0_WHITE_DOWN = 1.0 / (micro_edges_per_macro_edge_float)*1.0; - const real_t tmp_coords_jac_1_WHITE_DOWN = tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0); - const real_t tmp_coords_jac_2_WHITE_DOWN = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0); - const real_t tmp_coords_jac_3_WHITE_DOWN = tmp_coords_jac_1_WHITE_DOWN + tmp_coords_jac_2_WHITE_DOWN; - const real_t tmp_coords_jac_4_WHITE_DOWN = tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1); - const real_t tmp_coords_jac_5_WHITE_DOWN = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1); - const real_t tmp_coords_jac_6_WHITE_DOWN = tmp_coords_jac_4_WHITE_DOWN + tmp_coords_jac_5_WHITE_DOWN; - const real_t tmp_coords_jac_7_WHITE_DOWN = tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2); - const real_t tmp_coords_jac_8_WHITE_DOWN = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2); - const real_t tmp_coords_jac_9_WHITE_DOWN = tmp_coords_jac_7_WHITE_DOWN + tmp_coords_jac_8_WHITE_DOWN; - const real_t tmp_coords_jac_10_WHITE_DOWN = tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0); - const real_t tmp_coords_jac_11_WHITE_DOWN = tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1); - const real_t tmp_coords_jac_12_WHITE_DOWN = tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2); - const real_t p_affine_const_0_0_WHITE_DOWN = tmp_coords_jac_3_WHITE_DOWN; - const real_t p_affine_const_0_1_WHITE_DOWN = tmp_coords_jac_6_WHITE_DOWN; - const real_t p_affine_const_0_2_WHITE_DOWN = tmp_coords_jac_9_WHITE_DOWN; - const real_t p_affine_const_1_0_WHITE_DOWN = tmp_coords_jac_10_WHITE_DOWN + tmp_coords_jac_2_WHITE_DOWN; - const real_t p_affine_const_1_1_WHITE_DOWN = tmp_coords_jac_11_WHITE_DOWN + tmp_coords_jac_5_WHITE_DOWN; - const real_t p_affine_const_1_2_WHITE_DOWN = tmp_coords_jac_12_WHITE_DOWN + tmp_coords_jac_8_WHITE_DOWN; - const real_t p_affine_const_2_0_WHITE_DOWN = macro_vertex_coord_id_0comp0 + tmp_coords_jac_10_WHITE_DOWN + tmp_coords_jac_1_WHITE_DOWN; - const real_t p_affine_const_2_1_WHITE_DOWN = macro_vertex_coord_id_0comp1 + tmp_coords_jac_11_WHITE_DOWN + tmp_coords_jac_4_WHITE_DOWN; - const real_t p_affine_const_2_2_WHITE_DOWN = macro_vertex_coord_id_0comp2 + tmp_coords_jac_12_WHITE_DOWN + tmp_coords_jac_7_WHITE_DOWN; - const real_t p_affine_const_3_0_WHITE_DOWN = tmp_coords_jac_10_WHITE_DOWN + tmp_coords_jac_3_WHITE_DOWN; - const real_t p_affine_const_3_1_WHITE_DOWN = tmp_coords_jac_11_WHITE_DOWN + tmp_coords_jac_6_WHITE_DOWN; - const real_t p_affine_const_3_2_WHITE_DOWN = tmp_coords_jac_12_WHITE_DOWN + tmp_coords_jac_9_WHITE_DOWN; - const real_t jac_affine_0_0_WHITE_DOWN = -p_affine_const_0_0_WHITE_DOWN + p_affine_const_1_0_WHITE_DOWN; - const real_t jac_affine_0_1_WHITE_DOWN = -p_affine_const_0_0_WHITE_DOWN + p_affine_const_2_0_WHITE_DOWN; - const real_t jac_affine_0_2_WHITE_DOWN = -p_affine_const_0_0_WHITE_DOWN + p_affine_const_3_0_WHITE_DOWN; - const real_t jac_affine_1_0_WHITE_DOWN = -p_affine_const_0_1_WHITE_DOWN + p_affine_const_1_1_WHITE_DOWN; - const real_t jac_affine_1_1_WHITE_DOWN = -p_affine_const_0_1_WHITE_DOWN + p_affine_const_2_1_WHITE_DOWN; - const real_t tmp_coords_jac_17_WHITE_DOWN = jac_affine_0_2_WHITE_DOWN*jac_affine_1_1_WHITE_DOWN; - const real_t jac_affine_1_2_WHITE_DOWN = -p_affine_const_0_1_WHITE_DOWN + p_affine_const_3_1_WHITE_DOWN; - const real_t tmp_coords_jac_15_WHITE_DOWN = jac_affine_0_1_WHITE_DOWN*jac_affine_1_2_WHITE_DOWN; - const real_t jac_affine_2_0_WHITE_DOWN = -p_affine_const_0_2_WHITE_DOWN + p_affine_const_1_2_WHITE_DOWN; - const real_t jac_affine_2_1_WHITE_DOWN = -p_affine_const_0_2_WHITE_DOWN + p_affine_const_2_2_WHITE_DOWN; - const real_t tmp_coords_jac_14_WHITE_DOWN = jac_affine_1_2_WHITE_DOWN*jac_affine_2_1_WHITE_DOWN; - const real_t jac_affine_2_2_WHITE_DOWN = -p_affine_const_0_2_WHITE_DOWN + p_affine_const_3_2_WHITE_DOWN; - const real_t tmp_coords_jac_13_WHITE_DOWN = jac_affine_1_1_WHITE_DOWN*jac_affine_2_2_WHITE_DOWN; - const real_t tmp_coords_jac_16_WHITE_DOWN = jac_affine_0_1_WHITE_DOWN*jac_affine_2_2_WHITE_DOWN; - const real_t tmp_coords_jac_18_WHITE_DOWN = 1.0 / (jac_affine_0_0_WHITE_DOWN*tmp_coords_jac_13_WHITE_DOWN - jac_affine_0_0_WHITE_DOWN*tmp_coords_jac_14_WHITE_DOWN + jac_affine_0_2_WHITE_DOWN*jac_affine_1_0_WHITE_DOWN*jac_affine_2_1_WHITE_DOWN - jac_affine_1_0_WHITE_DOWN*tmp_coords_jac_16_WHITE_DOWN + jac_affine_2_0_WHITE_DOWN*tmp_coords_jac_15_WHITE_DOWN - jac_affine_2_0_WHITE_DOWN*tmp_coords_jac_17_WHITE_DOWN); - const real_t jac_affine_inv_0_0_WHITE_DOWN = tmp_coords_jac_18_WHITE_DOWN*(tmp_coords_jac_13_WHITE_DOWN - tmp_coords_jac_14_WHITE_DOWN); - const real_t jac_affine_inv_0_1_WHITE_DOWN = tmp_coords_jac_18_WHITE_DOWN*(jac_affine_0_2_WHITE_DOWN*jac_affine_2_1_WHITE_DOWN - tmp_coords_jac_16_WHITE_DOWN); - const real_t jac_affine_inv_0_2_WHITE_DOWN = tmp_coords_jac_18_WHITE_DOWN*(tmp_coords_jac_15_WHITE_DOWN - tmp_coords_jac_17_WHITE_DOWN); - const real_t jac_affine_inv_1_0_WHITE_DOWN = tmp_coords_jac_18_WHITE_DOWN*(-jac_affine_1_0_WHITE_DOWN*jac_affine_2_2_WHITE_DOWN + jac_affine_1_2_WHITE_DOWN*jac_affine_2_0_WHITE_DOWN); - const real_t jac_affine_inv_1_1_WHITE_DOWN = tmp_coords_jac_18_WHITE_DOWN*(jac_affine_0_0_WHITE_DOWN*jac_affine_2_2_WHITE_DOWN - jac_affine_0_2_WHITE_DOWN*jac_affine_2_0_WHITE_DOWN); - const real_t jac_affine_inv_1_2_WHITE_DOWN = tmp_coords_jac_18_WHITE_DOWN*(-jac_affine_0_0_WHITE_DOWN*jac_affine_1_2_WHITE_DOWN + jac_affine_0_2_WHITE_DOWN*jac_affine_1_0_WHITE_DOWN); - const real_t jac_affine_inv_2_0_WHITE_DOWN = tmp_coords_jac_18_WHITE_DOWN*(jac_affine_1_0_WHITE_DOWN*jac_affine_2_1_WHITE_DOWN - jac_affine_1_1_WHITE_DOWN*jac_affine_2_0_WHITE_DOWN); - const real_t jac_affine_inv_2_1_WHITE_DOWN = tmp_coords_jac_18_WHITE_DOWN*(-jac_affine_0_0_WHITE_DOWN*jac_affine_2_1_WHITE_DOWN + jac_affine_0_1_WHITE_DOWN*jac_affine_2_0_WHITE_DOWN); - const real_t jac_affine_inv_2_2_WHITE_DOWN = tmp_coords_jac_18_WHITE_DOWN*(jac_affine_0_0_WHITE_DOWN*jac_affine_1_1_WHITE_DOWN - jac_affine_0_1_WHITE_DOWN*jac_affine_1_0_WHITE_DOWN); for (int64_t ctr_2 = 0; ctr_2 < micro_edges_per_macro_edge; ctr_2 += 1) for (int64_t ctr_1 = 0; ctr_1 < -ctr_2 + micro_edges_per_macro_edge; ctr_1 += 1) for (int64_t ctr_0 = 0; ctr_0 < -ctr_1 - ctr_2 + micro_edges_per_macro_edge - 2; ctr_0 += 1) { - const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2)); - const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2)); - const real_t p_affine_0_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2)); - const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2 + 1)); - const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2 + 1)); - const real_t p_affine_1_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2 + 1)); - const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2 + 1)); - const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2 + 1)); - const real_t p_affine_2_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2 + 1)); - const real_t p_affine_3_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2 + 1)); - const real_t p_affine_3_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2 + 1)); - const real_t p_affine_3_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2 + 1)); + + const int64_t phantom_ctr_0 = ctr_0; + real_t _data_float_loop_ctr_array_dim_0[4]; + _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0; + _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1; + _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2; + _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3; + real_t _data_float_loop_ctr_array_dim_1[4]; + _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1; + real_t _data_float_loop_ctr_array_dim_2[4]; + _data_float_loop_ctr_array_dim_2[0] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[1] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[2] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[3] = (real_t) ctr_2; + + const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_0_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_1_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_2_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_3_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_3_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_3_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; const real_t src_dof_0 = _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]; const real_t src_dof_1 = _data_src_vertex_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]; const real_t src_dof_2 = _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]; @@ -978,28 +1022,8 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap::apply_macro_3D( real_t q_acc_3_29 = 0.0; for (int64_t q = 0; q < 5; q += 1) { - const real_t tmp_qloop_0 = rayVertex_1 - refVertex_1; - const real_t tmp_qloop_1 = -rayVertex_0; - const real_t tmp_qloop_2 = -forVertex_0 - tmp_qloop_1; - const real_t tmp_qloop_3 = rayVertex_2 - thrVertex_2; - const real_t tmp_qloop_4 = tmp_qloop_2*tmp_qloop_3; - const real_t tmp_qloop_5 = rayVertex_2 - refVertex_2; - const real_t tmp_qloop_6 = rayVertex_1 - thrVertex_1; - const real_t tmp_qloop_7 = tmp_qloop_2*tmp_qloop_6; - const real_t tmp_qloop_8 = -rayVertex_1; - const real_t tmp_qloop_9 = -forVertex_1 - tmp_qloop_8; - const real_t tmp_qloop_10 = rayVertex_0 - thrVertex_0; - const real_t tmp_qloop_11 = rayVertex_0 - refVertex_0; - const real_t tmp_qloop_12 = -rayVertex_2; - const real_t tmp_qloop_13 = -forVertex_2 - tmp_qloop_12; - const real_t tmp_qloop_14 = tmp_qloop_3*tmp_qloop_9; - const real_t tmp_qloop_15 = tmp_qloop_10*tmp_qloop_13; - const real_t tmp_qloop_16 = -tmp_qloop_0*tmp_qloop_15 + tmp_qloop_0*tmp_qloop_4 + tmp_qloop_10*tmp_qloop_5*tmp_qloop_9 + tmp_qloop_11*tmp_qloop_13*tmp_qloop_6 - tmp_qloop_11*tmp_qloop_14 - tmp_qloop_5*tmp_qloop_7; - const real_t tmp_qloop_17 = 1.0 / (tmp_qloop_16); const real_t tmp_qloop_18 = p_affine_0_0 + (-p_affine_0_0 + p_affine_1_0)*_data_q_p_0[q] + (-p_affine_0_0 + p_affine_2_0)*_data_q_p_1[q] + (-p_affine_0_0 + p_affine_3_0)*_data_q_p_2[q]; const real_t tmp_qloop_19 = (tmp_qloop_18*tmp_qloop_18); - const real_t tmp_qloop_20 = radRayVertex*tmp_qloop_16; - const real_t tmp_qloop_21 = radRayVertex - radRefVertex; const real_t tmp_qloop_22 = p_affine_0_2 + (-p_affine_0_2 + p_affine_1_2)*_data_q_p_0[q] + (-p_affine_0_2 + p_affine_2_2)*_data_q_p_1[q] + (-p_affine_0_2 + p_affine_3_2)*_data_q_p_2[q]; const real_t tmp_qloop_23 = -tmp_qloop_12 - tmp_qloop_22; const real_t tmp_qloop_24 = -tmp_qloop_1 - tmp_qloop_18; @@ -1010,28 +1034,21 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap::apply_macro_3D( const real_t tmp_qloop_29 = (tmp_qloop_25*tmp_qloop_25); const real_t tmp_qloop_30 = (tmp_qloop_22*tmp_qloop_22); const real_t tmp_qloop_31 = tmp_qloop_19 + tmp_qloop_29 + tmp_qloop_30; - const real_t tmp_qloop_32 = -tmp_qloop_13*tmp_qloop_6 + tmp_qloop_14; - const real_t tmp_qloop_33 = -tmp_qloop_21*tmp_qloop_32; const real_t tmp_qloop_34 = tmp_qloop_18*tmp_qloop_33; const real_t tmp_qloop_35 = tmp_qloop_28 + tmp_qloop_34; const real_t tmp_qloop_36 = tmp_qloop_17*(-tmp_qloop_19*tmp_qloop_28 + tmp_qloop_31*tmp_qloop_35); const real_t tmp_qloop_37 = pow(tmp_qloop_31, -1.5000000000000000); const real_t tmp_qloop_38 = tmp_qloop_37*1.0; - const real_t tmp_qloop_39 = tmp_qloop_10*tmp_qloop_13 - tmp_qloop_4; - const real_t tmp_qloop_40 = tmp_qloop_21*tmp_qloop_39; const real_t tmp_qloop_41 = tmp_qloop_31*tmp_qloop_40; const real_t tmp_qloop_42 = -tmp_qloop_25*tmp_qloop_28 - tmp_qloop_41; const real_t tmp_qloop_43 = tmp_qloop_17*tmp_qloop_38; const real_t tmp_qloop_44 = tmp_qloop_42*tmp_qloop_43; - const real_t tmp_qloop_45 = -tmp_qloop_10*tmp_qloop_9 + tmp_qloop_7; - const real_t tmp_qloop_46 = -tmp_qloop_21*tmp_qloop_45; const real_t tmp_qloop_47 = -tmp_qloop_28; const real_t tmp_qloop_48 = tmp_qloop_22*tmp_qloop_47; const real_t tmp_qloop_49 = tmp_qloop_31*tmp_qloop_46 + tmp_qloop_48; const real_t tmp_qloop_50 = tmp_qloop_43*tmp_qloop_49; const real_t tmp_qloop_51 = tmp_qloop_18*tmp_qloop_47 + tmp_qloop_31*tmp_qloop_33; const real_t tmp_qloop_52 = tmp_qloop_43*tmp_qloop_51; - const real_t tmp_qloop_53 = -tmp_qloop_21*tmp_qloop_39; const real_t tmp_qloop_54 = tmp_qloop_25*tmp_qloop_53 + tmp_qloop_28; const real_t tmp_qloop_55 = tmp_qloop_29*tmp_qloop_47 + tmp_qloop_31*tmp_qloop_54; const real_t tmp_qloop_56 = tmp_qloop_22*tmp_qloop_46; @@ -1039,11 +1056,9 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap::apply_macro_3D( const real_t tmp_qloop_58 = tmp_qloop_30*tmp_qloop_47 + tmp_qloop_31*tmp_qloop_57; const real_t tmp_qloop_66 = pow(tmp_qloop_31, -2.5000000000000000)*3.0; const real_t tmp_qloop_67 = tmp_qloop_36*tmp_qloop_66; - const real_t tmp_qloop_68 = tmp_qloop_21*tmp_qloop_32; const real_t tmp_qloop_69 = tmp_qloop_18*2.0; const real_t tmp_qloop_70 = -tmp_qloop_41; const real_t tmp_qloop_71 = tmp_qloop_35*2.0; - const real_t tmp_qloop_72 = tmp_qloop_21*tmp_qloop_45; const real_t tmp_qloop_73 = -tmp_qloop_31*tmp_qloop_72; const real_t tmp_qloop_74 = -tmp_qloop_20 + tmp_qloop_27; const real_t tmp_qloop_75 = tmp_qloop_18*tmp_qloop_68 + tmp_qloop_34*2.0 + tmp_qloop_74; @@ -1105,14 +1120,7 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap::apply_macro_3D( const real_t tmp_qloop_132 = tmp_qloop_109 - tmp_qloop_110 - tmp_qloop_112 - tmp_qloop_131; const real_t tmp_qloop_133 = tmp_qloop_103*4.0; const real_t tmp_qloop_134 = tmp_qloop_111 - tmp_qloop_114 - tmp_qloop_133; - const real_t tmp_qloop_135 = 1.0 / (rho_dof_0*tmp_qloop_115 + rho_dof_1*tmp_qloop_127 + rho_dof_2*tmp_qloop_128 + rho_dof_3*tmp_qloop_129 + rho_dof_4*tmp_qloop_110 + rho_dof_5*tmp_qloop_113 + rho_dof_6*tmp_qloop_112 + rho_dof_7*tmp_qloop_130 + rho_dof_8*tmp_qloop_132 + rho_dof_9*tmp_qloop_134)*_data_q_w[q]; - const real_t tmp_qloop_136 = tmp_qloop_135*(1.0 - _data_q_p_0[q] - _data_q_p_1[q] - _data_q_p_2[q]); - const real_t tmp_qloop_142 = tmp_qloop_135*_data_q_p_0[q]; - const real_t tmp_qloop_144 = tmp_qloop_135*_data_q_p_2[q]; - const real_t tmp_qloop_147 = tmp_qloop_135*_data_q_p_1[q]; const real_t tmp_qloop_158 = tmp_qloop_105*tmp_qloop_111; - const real_t tmp_qloop_159 = tmp_qloop_135*tmp_qloop_158; - const real_t tmp_qloop_161 = tmp_qloop_107*tmp_qloop_135; const real_t jac_blending_0_0 = tmp_qloop_36*tmp_qloop_38; const real_t jac_blending_0_1 = tmp_qloop_18*tmp_qloop_44; const real_t jac_blending_0_2 = tmp_qloop_18*tmp_qloop_50; @@ -1130,6 +1138,13 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap::apply_macro_3D( const real_t tmp_qloop_64 = jac_blending_0_0*tmp_qloop_59 - jac_blending_0_0*tmp_qloop_61 + jac_blending_0_2*jac_blending_1_0*jac_blending_2_1 - jac_blending_1_0*tmp_qloop_62 + jac_blending_2_0*tmp_qloop_60 - jac_blending_2_0*tmp_qloop_63; const real_t tmp_qloop_65 = 1.0 / (tmp_qloop_64); const real_t abs_det_jac_blending = tmp_qloop_64; + const real_t tmp_qloop_135 = abs_det_jac_affine_WHITE_DOWN*abs_det_jac_blending*1.0 / (rho_dof_0*tmp_qloop_115 + rho_dof_1*tmp_qloop_127 + rho_dof_2*tmp_qloop_128 + rho_dof_3*tmp_qloop_129 + rho_dof_4*tmp_qloop_110 + rho_dof_5*tmp_qloop_113 + rho_dof_6*tmp_qloop_112 + rho_dof_7*tmp_qloop_130 + rho_dof_8*tmp_qloop_132 + rho_dof_9*tmp_qloop_134)*_data_q_w[q]; + const real_t tmp_qloop_136 = tmp_qloop_135*(1.0 - _data_q_p_0[q] - _data_q_p_1[q] - _data_q_p_2[q]); + const real_t tmp_qloop_142 = tmp_qloop_135*_data_q_p_0[q]; + const real_t tmp_qloop_144 = tmp_qloop_135*_data_q_p_2[q]; + const real_t tmp_qloop_147 = tmp_qloop_135*_data_q_p_1[q]; + const real_t tmp_qloop_159 = tmp_qloop_135*tmp_qloop_158; + const real_t tmp_qloop_161 = tmp_qloop_107*tmp_qloop_135; const real_t jac_blending_inv_0_0 = tmp_qloop_65*(tmp_qloop_59 - tmp_qloop_61); const real_t jac_blending_inv_0_1 = tmp_qloop_65*(jac_blending_0_2*jac_blending_2_1 - tmp_qloop_62); const real_t jac_blending_inv_0_2 = tmp_qloop_65*(tmp_qloop_60 - tmp_qloop_63); @@ -1439,67 +1454,87 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap::apply_macro_3D( _data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1] = elMatVec_3 + _data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]; } } + const real_t tmp_coords_jac_0_BLUE_UP = 1.0 / (micro_edges_per_macro_edge_float)*1.0; + const real_t tmp_coords_jac_1_BLUE_UP = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0); + const real_t tmp_coords_jac_2_BLUE_UP = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1); + const real_t tmp_coords_jac_3_BLUE_UP = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2); + const real_t tmp_coords_jac_4_BLUE_UP = tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0); + const real_t tmp_coords_jac_5_BLUE_UP = tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1); + const real_t tmp_coords_jac_6_BLUE_UP = tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2); + const real_t p_affine_const_0_0_BLUE_UP = tmp_coords_jac_1_BLUE_UP; + const real_t p_affine_const_0_1_BLUE_UP = tmp_coords_jac_2_BLUE_UP; + const real_t p_affine_const_0_2_BLUE_UP = tmp_coords_jac_3_BLUE_UP; + const real_t p_affine_const_1_0_BLUE_UP = macro_vertex_coord_id_0comp0 + tmp_coords_jac_4_BLUE_UP; + const real_t p_affine_const_1_1_BLUE_UP = macro_vertex_coord_id_0comp1 + tmp_coords_jac_5_BLUE_UP; + const real_t p_affine_const_1_2_BLUE_UP = macro_vertex_coord_id_0comp2 + tmp_coords_jac_6_BLUE_UP; + const real_t p_affine_const_2_0_BLUE_UP = tmp_coords_jac_1_BLUE_UP + tmp_coords_jac_4_BLUE_UP; + const real_t p_affine_const_2_1_BLUE_UP = tmp_coords_jac_2_BLUE_UP + tmp_coords_jac_5_BLUE_UP; + const real_t p_affine_const_2_2_BLUE_UP = tmp_coords_jac_3_BLUE_UP + tmp_coords_jac_6_BLUE_UP; + const real_t p_affine_const_3_0_BLUE_UP = tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0) + tmp_coords_jac_1_BLUE_UP; + const real_t p_affine_const_3_1_BLUE_UP = tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1) + tmp_coords_jac_2_BLUE_UP; + const real_t p_affine_const_3_2_BLUE_UP = tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2) + tmp_coords_jac_3_BLUE_UP; + const real_t jac_affine_0_0_BLUE_UP = -p_affine_const_0_0_BLUE_UP + p_affine_const_1_0_BLUE_UP; + const real_t jac_affine_0_1_BLUE_UP = -p_affine_const_0_0_BLUE_UP + p_affine_const_2_0_BLUE_UP; + const real_t jac_affine_0_2_BLUE_UP = -p_affine_const_0_0_BLUE_UP + p_affine_const_3_0_BLUE_UP; + const real_t jac_affine_1_0_BLUE_UP = -p_affine_const_0_1_BLUE_UP + p_affine_const_1_1_BLUE_UP; + const real_t jac_affine_1_1_BLUE_UP = -p_affine_const_0_1_BLUE_UP + p_affine_const_2_1_BLUE_UP; + const real_t tmp_coords_jac_11_BLUE_UP = jac_affine_0_2_BLUE_UP*jac_affine_1_1_BLUE_UP; + const real_t jac_affine_1_2_BLUE_UP = -p_affine_const_0_1_BLUE_UP + p_affine_const_3_1_BLUE_UP; + const real_t tmp_coords_jac_9_BLUE_UP = jac_affine_0_1_BLUE_UP*jac_affine_1_2_BLUE_UP; + const real_t jac_affine_2_0_BLUE_UP = -p_affine_const_0_2_BLUE_UP + p_affine_const_1_2_BLUE_UP; + const real_t jac_affine_2_1_BLUE_UP = -p_affine_const_0_2_BLUE_UP + p_affine_const_2_2_BLUE_UP; + const real_t tmp_coords_jac_8_BLUE_UP = jac_affine_1_2_BLUE_UP*jac_affine_2_1_BLUE_UP; + const real_t jac_affine_2_2_BLUE_UP = -p_affine_const_0_2_BLUE_UP + p_affine_const_3_2_BLUE_UP; + const real_t tmp_coords_jac_7_BLUE_UP = jac_affine_1_1_BLUE_UP*jac_affine_2_2_BLUE_UP; + const real_t tmp_coords_jac_10_BLUE_UP = jac_affine_0_1_BLUE_UP*jac_affine_2_2_BLUE_UP; + const real_t tmp_coords_jac_12_BLUE_UP = jac_affine_0_0_BLUE_UP*tmp_coords_jac_7_BLUE_UP - jac_affine_0_0_BLUE_UP*tmp_coords_jac_8_BLUE_UP + jac_affine_0_2_BLUE_UP*jac_affine_1_0_BLUE_UP*jac_affine_2_1_BLUE_UP - jac_affine_1_0_BLUE_UP*tmp_coords_jac_10_BLUE_UP - jac_affine_2_0_BLUE_UP*tmp_coords_jac_11_BLUE_UP + jac_affine_2_0_BLUE_UP*tmp_coords_jac_9_BLUE_UP; + const real_t tmp_coords_jac_13_BLUE_UP = 1.0 / (tmp_coords_jac_12_BLUE_UP); + const real_t jac_affine_inv_0_0_BLUE_UP = tmp_coords_jac_13_BLUE_UP*(tmp_coords_jac_7_BLUE_UP - tmp_coords_jac_8_BLUE_UP); + const real_t jac_affine_inv_0_1_BLUE_UP = tmp_coords_jac_13_BLUE_UP*(jac_affine_0_2_BLUE_UP*jac_affine_2_1_BLUE_UP - tmp_coords_jac_10_BLUE_UP); + const real_t jac_affine_inv_0_2_BLUE_UP = tmp_coords_jac_13_BLUE_UP*(-tmp_coords_jac_11_BLUE_UP + tmp_coords_jac_9_BLUE_UP); + const real_t jac_affine_inv_1_0_BLUE_UP = tmp_coords_jac_13_BLUE_UP*(-jac_affine_1_0_BLUE_UP*jac_affine_2_2_BLUE_UP + jac_affine_1_2_BLUE_UP*jac_affine_2_0_BLUE_UP); + const real_t jac_affine_inv_1_1_BLUE_UP = tmp_coords_jac_13_BLUE_UP*(jac_affine_0_0_BLUE_UP*jac_affine_2_2_BLUE_UP - jac_affine_0_2_BLUE_UP*jac_affine_2_0_BLUE_UP); + const real_t jac_affine_inv_1_2_BLUE_UP = tmp_coords_jac_13_BLUE_UP*(-jac_affine_0_0_BLUE_UP*jac_affine_1_2_BLUE_UP + jac_affine_0_2_BLUE_UP*jac_affine_1_0_BLUE_UP); + const real_t jac_affine_inv_2_0_BLUE_UP = tmp_coords_jac_13_BLUE_UP*(jac_affine_1_0_BLUE_UP*jac_affine_2_1_BLUE_UP - jac_affine_1_1_BLUE_UP*jac_affine_2_0_BLUE_UP); + const real_t jac_affine_inv_2_1_BLUE_UP = tmp_coords_jac_13_BLUE_UP*(-jac_affine_0_0_BLUE_UP*jac_affine_2_1_BLUE_UP + jac_affine_0_1_BLUE_UP*jac_affine_2_0_BLUE_UP); + const real_t jac_affine_inv_2_2_BLUE_UP = tmp_coords_jac_13_BLUE_UP*(jac_affine_0_0_BLUE_UP*jac_affine_1_1_BLUE_UP - jac_affine_0_1_BLUE_UP*jac_affine_1_0_BLUE_UP); + const real_t abs_det_jac_affine_BLUE_UP = abs(tmp_coords_jac_12_BLUE_UP); { /* CellType.BLUE_UP */ - const real_t tmp_coords_jac_0_BLUE_UP = 1.0 / (micro_edges_per_macro_edge_float)*1.0; - const real_t tmp_coords_jac_1_BLUE_UP = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0); - const real_t tmp_coords_jac_2_BLUE_UP = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1); - const real_t tmp_coords_jac_3_BLUE_UP = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2); - const real_t tmp_coords_jac_4_BLUE_UP = tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0); - const real_t tmp_coords_jac_5_BLUE_UP = tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1); - const real_t tmp_coords_jac_6_BLUE_UP = tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2); - const real_t p_affine_const_0_0_BLUE_UP = tmp_coords_jac_1_BLUE_UP; - const real_t p_affine_const_0_1_BLUE_UP = tmp_coords_jac_2_BLUE_UP; - const real_t p_affine_const_0_2_BLUE_UP = tmp_coords_jac_3_BLUE_UP; - const real_t p_affine_const_1_0_BLUE_UP = macro_vertex_coord_id_0comp0 + tmp_coords_jac_4_BLUE_UP; - const real_t p_affine_const_1_1_BLUE_UP = macro_vertex_coord_id_0comp1 + tmp_coords_jac_5_BLUE_UP; - const real_t p_affine_const_1_2_BLUE_UP = macro_vertex_coord_id_0comp2 + tmp_coords_jac_6_BLUE_UP; - const real_t p_affine_const_2_0_BLUE_UP = tmp_coords_jac_1_BLUE_UP + tmp_coords_jac_4_BLUE_UP; - const real_t p_affine_const_2_1_BLUE_UP = tmp_coords_jac_2_BLUE_UP + tmp_coords_jac_5_BLUE_UP; - const real_t p_affine_const_2_2_BLUE_UP = tmp_coords_jac_3_BLUE_UP + tmp_coords_jac_6_BLUE_UP; - const real_t p_affine_const_3_0_BLUE_UP = tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0) + tmp_coords_jac_1_BLUE_UP; - const real_t p_affine_const_3_1_BLUE_UP = tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1) + tmp_coords_jac_2_BLUE_UP; - const real_t p_affine_const_3_2_BLUE_UP = tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2) + tmp_coords_jac_3_BLUE_UP; - const real_t jac_affine_0_0_BLUE_UP = -p_affine_const_0_0_BLUE_UP + p_affine_const_1_0_BLUE_UP; - const real_t jac_affine_0_1_BLUE_UP = -p_affine_const_0_0_BLUE_UP + p_affine_const_2_0_BLUE_UP; - const real_t jac_affine_0_2_BLUE_UP = -p_affine_const_0_0_BLUE_UP + p_affine_const_3_0_BLUE_UP; - const real_t jac_affine_1_0_BLUE_UP = -p_affine_const_0_1_BLUE_UP + p_affine_const_1_1_BLUE_UP; - const real_t jac_affine_1_1_BLUE_UP = -p_affine_const_0_1_BLUE_UP + p_affine_const_2_1_BLUE_UP; - const real_t tmp_coords_jac_11_BLUE_UP = jac_affine_0_2_BLUE_UP*jac_affine_1_1_BLUE_UP; - const real_t jac_affine_1_2_BLUE_UP = -p_affine_const_0_1_BLUE_UP + p_affine_const_3_1_BLUE_UP; - const real_t tmp_coords_jac_9_BLUE_UP = jac_affine_0_1_BLUE_UP*jac_affine_1_2_BLUE_UP; - const real_t jac_affine_2_0_BLUE_UP = -p_affine_const_0_2_BLUE_UP + p_affine_const_1_2_BLUE_UP; - const real_t jac_affine_2_1_BLUE_UP = -p_affine_const_0_2_BLUE_UP + p_affine_const_2_2_BLUE_UP; - const real_t tmp_coords_jac_8_BLUE_UP = jac_affine_1_2_BLUE_UP*jac_affine_2_1_BLUE_UP; - const real_t jac_affine_2_2_BLUE_UP = -p_affine_const_0_2_BLUE_UP + p_affine_const_3_2_BLUE_UP; - const real_t tmp_coords_jac_7_BLUE_UP = jac_affine_1_1_BLUE_UP*jac_affine_2_2_BLUE_UP; - const real_t tmp_coords_jac_10_BLUE_UP = jac_affine_0_1_BLUE_UP*jac_affine_2_2_BLUE_UP; - const real_t tmp_coords_jac_12_BLUE_UP = 1.0 / (jac_affine_0_0_BLUE_UP*tmp_coords_jac_7_BLUE_UP - jac_affine_0_0_BLUE_UP*tmp_coords_jac_8_BLUE_UP + jac_affine_0_2_BLUE_UP*jac_affine_1_0_BLUE_UP*jac_affine_2_1_BLUE_UP - jac_affine_1_0_BLUE_UP*tmp_coords_jac_10_BLUE_UP - jac_affine_2_0_BLUE_UP*tmp_coords_jac_11_BLUE_UP + jac_affine_2_0_BLUE_UP*tmp_coords_jac_9_BLUE_UP); - const real_t jac_affine_inv_0_0_BLUE_UP = tmp_coords_jac_12_BLUE_UP*(tmp_coords_jac_7_BLUE_UP - tmp_coords_jac_8_BLUE_UP); - const real_t jac_affine_inv_0_1_BLUE_UP = tmp_coords_jac_12_BLUE_UP*(jac_affine_0_2_BLUE_UP*jac_affine_2_1_BLUE_UP - tmp_coords_jac_10_BLUE_UP); - const real_t jac_affine_inv_0_2_BLUE_UP = tmp_coords_jac_12_BLUE_UP*(-tmp_coords_jac_11_BLUE_UP + tmp_coords_jac_9_BLUE_UP); - const real_t jac_affine_inv_1_0_BLUE_UP = tmp_coords_jac_12_BLUE_UP*(-jac_affine_1_0_BLUE_UP*jac_affine_2_2_BLUE_UP + jac_affine_1_2_BLUE_UP*jac_affine_2_0_BLUE_UP); - const real_t jac_affine_inv_1_1_BLUE_UP = tmp_coords_jac_12_BLUE_UP*(jac_affine_0_0_BLUE_UP*jac_affine_2_2_BLUE_UP - jac_affine_0_2_BLUE_UP*jac_affine_2_0_BLUE_UP); - const real_t jac_affine_inv_1_2_BLUE_UP = tmp_coords_jac_12_BLUE_UP*(-jac_affine_0_0_BLUE_UP*jac_affine_1_2_BLUE_UP + jac_affine_0_2_BLUE_UP*jac_affine_1_0_BLUE_UP); - const real_t jac_affine_inv_2_0_BLUE_UP = tmp_coords_jac_12_BLUE_UP*(jac_affine_1_0_BLUE_UP*jac_affine_2_1_BLUE_UP - jac_affine_1_1_BLUE_UP*jac_affine_2_0_BLUE_UP); - const real_t jac_affine_inv_2_1_BLUE_UP = tmp_coords_jac_12_BLUE_UP*(-jac_affine_0_0_BLUE_UP*jac_affine_2_1_BLUE_UP + jac_affine_0_1_BLUE_UP*jac_affine_2_0_BLUE_UP); - const real_t jac_affine_inv_2_2_BLUE_UP = tmp_coords_jac_12_BLUE_UP*(jac_affine_0_0_BLUE_UP*jac_affine_1_1_BLUE_UP - jac_affine_0_1_BLUE_UP*jac_affine_1_0_BLUE_UP); for (int64_t ctr_2 = 0; ctr_2 < micro_edges_per_macro_edge; ctr_2 += 1) for (int64_t ctr_1 = 0; ctr_1 < -ctr_2 + micro_edges_per_macro_edge; ctr_1 += 1) for (int64_t ctr_0 = 0; ctr_0 < -ctr_1 - ctr_2 + micro_edges_per_macro_edge - 1; ctr_0 += 1) { - const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2)); - const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2)); - const real_t p_affine_0_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2)); - const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2)); - const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2)); - const real_t p_affine_1_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2)); - const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2)); - const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2)); - const real_t p_affine_2_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2)); - const real_t p_affine_3_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2 + 1)); - const real_t p_affine_3_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2 + 1)); - const real_t p_affine_3_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2 + 1)); + + const int64_t phantom_ctr_0 = ctr_0; + real_t _data_float_loop_ctr_array_dim_0[4]; + _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0; + _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1; + _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2; + _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3; + real_t _data_float_loop_ctr_array_dim_1[4]; + _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1; + real_t _data_float_loop_ctr_array_dim_2[4]; + _data_float_loop_ctr_array_dim_2[0] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[1] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[2] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[3] = (real_t) ctr_2; + + const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_0_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_2_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_3_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_3_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_3_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; const real_t src_dof_0 = _data_src_vertex_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]; const real_t src_dof_1 = _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]; const real_t src_dof_2 = _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]; @@ -1662,28 +1697,8 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap::apply_macro_3D( real_t q_acc_3_29 = 0.0; for (int64_t q = 0; q < 5; q += 1) { - const real_t tmp_qloop_0 = rayVertex_1 - refVertex_1; - const real_t tmp_qloop_1 = -rayVertex_0; - const real_t tmp_qloop_2 = -forVertex_0 - tmp_qloop_1; - const real_t tmp_qloop_3 = rayVertex_2 - thrVertex_2; - const real_t tmp_qloop_4 = tmp_qloop_2*tmp_qloop_3; - const real_t tmp_qloop_5 = rayVertex_2 - refVertex_2; - const real_t tmp_qloop_6 = rayVertex_1 - thrVertex_1; - const real_t tmp_qloop_7 = tmp_qloop_2*tmp_qloop_6; - const real_t tmp_qloop_8 = -rayVertex_1; - const real_t tmp_qloop_9 = -forVertex_1 - tmp_qloop_8; - const real_t tmp_qloop_10 = rayVertex_0 - thrVertex_0; - const real_t tmp_qloop_11 = rayVertex_0 - refVertex_0; - const real_t tmp_qloop_12 = -rayVertex_2; - const real_t tmp_qloop_13 = -forVertex_2 - tmp_qloop_12; - const real_t tmp_qloop_14 = tmp_qloop_3*tmp_qloop_9; - const real_t tmp_qloop_15 = tmp_qloop_10*tmp_qloop_13; - const real_t tmp_qloop_16 = -tmp_qloop_0*tmp_qloop_15 + tmp_qloop_0*tmp_qloop_4 + tmp_qloop_10*tmp_qloop_5*tmp_qloop_9 + tmp_qloop_11*tmp_qloop_13*tmp_qloop_6 - tmp_qloop_11*tmp_qloop_14 - tmp_qloop_5*tmp_qloop_7; - const real_t tmp_qloop_17 = 1.0 / (tmp_qloop_16); const real_t tmp_qloop_18 = p_affine_0_0 + (-p_affine_0_0 + p_affine_1_0)*_data_q_p_0[q] + (-p_affine_0_0 + p_affine_2_0)*_data_q_p_1[q] + (-p_affine_0_0 + p_affine_3_0)*_data_q_p_2[q]; const real_t tmp_qloop_19 = (tmp_qloop_18*tmp_qloop_18); - const real_t tmp_qloop_20 = radRayVertex*tmp_qloop_16; - const real_t tmp_qloop_21 = radRayVertex - radRefVertex; const real_t tmp_qloop_22 = p_affine_0_2 + (-p_affine_0_2 + p_affine_1_2)*_data_q_p_0[q] + (-p_affine_0_2 + p_affine_2_2)*_data_q_p_1[q] + (-p_affine_0_2 + p_affine_3_2)*_data_q_p_2[q]; const real_t tmp_qloop_23 = -tmp_qloop_12 - tmp_qloop_22; const real_t tmp_qloop_24 = -tmp_qloop_1 - tmp_qloop_18; @@ -1694,28 +1709,21 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap::apply_macro_3D( const real_t tmp_qloop_29 = (tmp_qloop_25*tmp_qloop_25); const real_t tmp_qloop_30 = (tmp_qloop_22*tmp_qloop_22); const real_t tmp_qloop_31 = tmp_qloop_19 + tmp_qloop_29 + tmp_qloop_30; - const real_t tmp_qloop_32 = -tmp_qloop_13*tmp_qloop_6 + tmp_qloop_14; - const real_t tmp_qloop_33 = -tmp_qloop_21*tmp_qloop_32; const real_t tmp_qloop_34 = tmp_qloop_18*tmp_qloop_33; const real_t tmp_qloop_35 = tmp_qloop_28 + tmp_qloop_34; const real_t tmp_qloop_36 = tmp_qloop_17*(-tmp_qloop_19*tmp_qloop_28 + tmp_qloop_31*tmp_qloop_35); const real_t tmp_qloop_37 = pow(tmp_qloop_31, -1.5000000000000000); const real_t tmp_qloop_38 = tmp_qloop_37*1.0; - const real_t tmp_qloop_39 = tmp_qloop_10*tmp_qloop_13 - tmp_qloop_4; - const real_t tmp_qloop_40 = tmp_qloop_21*tmp_qloop_39; const real_t tmp_qloop_41 = tmp_qloop_31*tmp_qloop_40; const real_t tmp_qloop_42 = -tmp_qloop_25*tmp_qloop_28 - tmp_qloop_41; const real_t tmp_qloop_43 = tmp_qloop_17*tmp_qloop_38; const real_t tmp_qloop_44 = tmp_qloop_42*tmp_qloop_43; - const real_t tmp_qloop_45 = -tmp_qloop_10*tmp_qloop_9 + tmp_qloop_7; - const real_t tmp_qloop_46 = -tmp_qloop_21*tmp_qloop_45; const real_t tmp_qloop_47 = -tmp_qloop_28; const real_t tmp_qloop_48 = tmp_qloop_22*tmp_qloop_47; const real_t tmp_qloop_49 = tmp_qloop_31*tmp_qloop_46 + tmp_qloop_48; const real_t tmp_qloop_50 = tmp_qloop_43*tmp_qloop_49; const real_t tmp_qloop_51 = tmp_qloop_18*tmp_qloop_47 + tmp_qloop_31*tmp_qloop_33; const real_t tmp_qloop_52 = tmp_qloop_43*tmp_qloop_51; - const real_t tmp_qloop_53 = -tmp_qloop_21*tmp_qloop_39; const real_t tmp_qloop_54 = tmp_qloop_25*tmp_qloop_53 + tmp_qloop_28; const real_t tmp_qloop_55 = tmp_qloop_29*tmp_qloop_47 + tmp_qloop_31*tmp_qloop_54; const real_t tmp_qloop_56 = tmp_qloop_22*tmp_qloop_46; @@ -1723,11 +1731,9 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap::apply_macro_3D( const real_t tmp_qloop_58 = tmp_qloop_30*tmp_qloop_47 + tmp_qloop_31*tmp_qloop_57; const real_t tmp_qloop_66 = pow(tmp_qloop_31, -2.5000000000000000)*3.0; const real_t tmp_qloop_67 = tmp_qloop_36*tmp_qloop_66; - const real_t tmp_qloop_68 = tmp_qloop_21*tmp_qloop_32; const real_t tmp_qloop_69 = tmp_qloop_18*2.0; const real_t tmp_qloop_70 = -tmp_qloop_41; const real_t tmp_qloop_71 = tmp_qloop_35*2.0; - const real_t tmp_qloop_72 = tmp_qloop_21*tmp_qloop_45; const real_t tmp_qloop_73 = -tmp_qloop_31*tmp_qloop_72; const real_t tmp_qloop_74 = -tmp_qloop_20 + tmp_qloop_27; const real_t tmp_qloop_75 = tmp_qloop_18*tmp_qloop_68 + tmp_qloop_34*2.0 + tmp_qloop_74; @@ -1789,14 +1795,7 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap::apply_macro_3D( const real_t tmp_qloop_132 = tmp_qloop_109 - tmp_qloop_110 - tmp_qloop_112 - tmp_qloop_131; const real_t tmp_qloop_133 = tmp_qloop_103*4.0; const real_t tmp_qloop_134 = tmp_qloop_111 - tmp_qloop_114 - tmp_qloop_133; - const real_t tmp_qloop_135 = 1.0 / (rho_dof_0*tmp_qloop_115 + rho_dof_1*tmp_qloop_127 + rho_dof_2*tmp_qloop_128 + rho_dof_3*tmp_qloop_129 + rho_dof_4*tmp_qloop_110 + rho_dof_5*tmp_qloop_113 + rho_dof_6*tmp_qloop_112 + rho_dof_7*tmp_qloop_130 + rho_dof_8*tmp_qloop_132 + rho_dof_9*tmp_qloop_134)*_data_q_w[q]; - const real_t tmp_qloop_136 = tmp_qloop_135*(1.0 - _data_q_p_0[q] - _data_q_p_1[q] - _data_q_p_2[q]); - const real_t tmp_qloop_142 = tmp_qloop_135*_data_q_p_0[q]; - const real_t tmp_qloop_144 = tmp_qloop_135*_data_q_p_2[q]; - const real_t tmp_qloop_147 = tmp_qloop_135*_data_q_p_1[q]; const real_t tmp_qloop_158 = tmp_qloop_105*tmp_qloop_111; - const real_t tmp_qloop_159 = tmp_qloop_135*tmp_qloop_158; - const real_t tmp_qloop_161 = tmp_qloop_107*tmp_qloop_135; const real_t jac_blending_0_0 = tmp_qloop_36*tmp_qloop_38; const real_t jac_blending_0_1 = tmp_qloop_18*tmp_qloop_44; const real_t jac_blending_0_2 = tmp_qloop_18*tmp_qloop_50; @@ -1814,6 +1813,13 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap::apply_macro_3D( const real_t tmp_qloop_64 = jac_blending_0_0*tmp_qloop_59 - jac_blending_0_0*tmp_qloop_61 + jac_blending_0_2*jac_blending_1_0*jac_blending_2_1 - jac_blending_1_0*tmp_qloop_62 + jac_blending_2_0*tmp_qloop_60 - jac_blending_2_0*tmp_qloop_63; const real_t tmp_qloop_65 = 1.0 / (tmp_qloop_64); const real_t abs_det_jac_blending = tmp_qloop_64; + const real_t tmp_qloop_135 = abs_det_jac_affine_BLUE_UP*abs_det_jac_blending*1.0 / (rho_dof_0*tmp_qloop_115 + rho_dof_1*tmp_qloop_127 + rho_dof_2*tmp_qloop_128 + rho_dof_3*tmp_qloop_129 + rho_dof_4*tmp_qloop_110 + rho_dof_5*tmp_qloop_113 + rho_dof_6*tmp_qloop_112 + rho_dof_7*tmp_qloop_130 + rho_dof_8*tmp_qloop_132 + rho_dof_9*tmp_qloop_134)*_data_q_w[q]; + const real_t tmp_qloop_136 = tmp_qloop_135*(1.0 - _data_q_p_0[q] - _data_q_p_1[q] - _data_q_p_2[q]); + const real_t tmp_qloop_142 = tmp_qloop_135*_data_q_p_0[q]; + const real_t tmp_qloop_144 = tmp_qloop_135*_data_q_p_2[q]; + const real_t tmp_qloop_147 = tmp_qloop_135*_data_q_p_1[q]; + const real_t tmp_qloop_159 = tmp_qloop_135*tmp_qloop_158; + const real_t tmp_qloop_161 = tmp_qloop_107*tmp_qloop_135; const real_t jac_blending_inv_0_0 = tmp_qloop_65*(tmp_qloop_59 - tmp_qloop_61); const real_t jac_blending_inv_0_1 = tmp_qloop_65*(jac_blending_0_2*jac_blending_2_1 - tmp_qloop_62); const real_t jac_blending_inv_0_2 = tmp_qloop_65*(tmp_qloop_60 - tmp_qloop_63); @@ -2123,70 +2129,90 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap::apply_macro_3D( _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1] = elMatVec_3 + _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]; } } + const real_t tmp_coords_jac_0_BLUE_DOWN = 1.0 / (micro_edges_per_macro_edge_float)*1.0; + const real_t tmp_coords_jac_1_BLUE_DOWN = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0); + const real_t tmp_coords_jac_2_BLUE_DOWN = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1); + const real_t tmp_coords_jac_3_BLUE_DOWN = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2); + const real_t tmp_coords_jac_4_BLUE_DOWN = tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0); + const real_t tmp_coords_jac_5_BLUE_DOWN = macro_vertex_coord_id_0comp0 + tmp_coords_jac_4_BLUE_DOWN; + const real_t tmp_coords_jac_6_BLUE_DOWN = tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1); + const real_t tmp_coords_jac_7_BLUE_DOWN = macro_vertex_coord_id_0comp1 + tmp_coords_jac_6_BLUE_DOWN; + const real_t tmp_coords_jac_8_BLUE_DOWN = tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2); + const real_t tmp_coords_jac_9_BLUE_DOWN = macro_vertex_coord_id_0comp2 + tmp_coords_jac_8_BLUE_DOWN; + const real_t p_affine_const_0_0_BLUE_DOWN = tmp_coords_jac_1_BLUE_DOWN; + const real_t p_affine_const_0_1_BLUE_DOWN = tmp_coords_jac_2_BLUE_DOWN; + const real_t p_affine_const_0_2_BLUE_DOWN = tmp_coords_jac_3_BLUE_DOWN; + const real_t p_affine_const_1_0_BLUE_DOWN = tmp_coords_jac_5_BLUE_DOWN; + const real_t p_affine_const_1_1_BLUE_DOWN = tmp_coords_jac_7_BLUE_DOWN; + const real_t p_affine_const_1_2_BLUE_DOWN = tmp_coords_jac_9_BLUE_DOWN; + const real_t p_affine_const_2_0_BLUE_DOWN = tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0) + tmp_coords_jac_5_BLUE_DOWN; + const real_t p_affine_const_2_1_BLUE_DOWN = tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1) + tmp_coords_jac_7_BLUE_DOWN; + const real_t p_affine_const_2_2_BLUE_DOWN = tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2) + tmp_coords_jac_9_BLUE_DOWN; + const real_t p_affine_const_3_0_BLUE_DOWN = tmp_coords_jac_1_BLUE_DOWN + tmp_coords_jac_4_BLUE_DOWN; + const real_t p_affine_const_3_1_BLUE_DOWN = tmp_coords_jac_2_BLUE_DOWN + tmp_coords_jac_6_BLUE_DOWN; + const real_t p_affine_const_3_2_BLUE_DOWN = tmp_coords_jac_3_BLUE_DOWN + tmp_coords_jac_8_BLUE_DOWN; + const real_t jac_affine_0_0_BLUE_DOWN = -p_affine_const_0_0_BLUE_DOWN + p_affine_const_1_0_BLUE_DOWN; + const real_t jac_affine_0_1_BLUE_DOWN = -p_affine_const_0_0_BLUE_DOWN + p_affine_const_2_0_BLUE_DOWN; + const real_t jac_affine_0_2_BLUE_DOWN = -p_affine_const_0_0_BLUE_DOWN + p_affine_const_3_0_BLUE_DOWN; + const real_t jac_affine_1_0_BLUE_DOWN = -p_affine_const_0_1_BLUE_DOWN + p_affine_const_1_1_BLUE_DOWN; + const real_t jac_affine_1_1_BLUE_DOWN = -p_affine_const_0_1_BLUE_DOWN + p_affine_const_2_1_BLUE_DOWN; + const real_t tmp_coords_jac_14_BLUE_DOWN = jac_affine_0_2_BLUE_DOWN*jac_affine_1_1_BLUE_DOWN; + const real_t jac_affine_1_2_BLUE_DOWN = -p_affine_const_0_1_BLUE_DOWN + p_affine_const_3_1_BLUE_DOWN; + const real_t tmp_coords_jac_12_BLUE_DOWN = jac_affine_0_1_BLUE_DOWN*jac_affine_1_2_BLUE_DOWN; + const real_t jac_affine_2_0_BLUE_DOWN = -p_affine_const_0_2_BLUE_DOWN + p_affine_const_1_2_BLUE_DOWN; + const real_t jac_affine_2_1_BLUE_DOWN = -p_affine_const_0_2_BLUE_DOWN + p_affine_const_2_2_BLUE_DOWN; + const real_t tmp_coords_jac_11_BLUE_DOWN = jac_affine_1_2_BLUE_DOWN*jac_affine_2_1_BLUE_DOWN; + const real_t jac_affine_2_2_BLUE_DOWN = -p_affine_const_0_2_BLUE_DOWN + p_affine_const_3_2_BLUE_DOWN; + const real_t tmp_coords_jac_10_BLUE_DOWN = jac_affine_1_1_BLUE_DOWN*jac_affine_2_2_BLUE_DOWN; + const real_t tmp_coords_jac_13_BLUE_DOWN = jac_affine_0_1_BLUE_DOWN*jac_affine_2_2_BLUE_DOWN; + const real_t tmp_coords_jac_15_BLUE_DOWN = jac_affine_0_0_BLUE_DOWN*tmp_coords_jac_10_BLUE_DOWN - jac_affine_0_0_BLUE_DOWN*tmp_coords_jac_11_BLUE_DOWN + jac_affine_0_2_BLUE_DOWN*jac_affine_1_0_BLUE_DOWN*jac_affine_2_1_BLUE_DOWN - jac_affine_1_0_BLUE_DOWN*tmp_coords_jac_13_BLUE_DOWN + jac_affine_2_0_BLUE_DOWN*tmp_coords_jac_12_BLUE_DOWN - jac_affine_2_0_BLUE_DOWN*tmp_coords_jac_14_BLUE_DOWN; + const real_t tmp_coords_jac_16_BLUE_DOWN = 1.0 / (tmp_coords_jac_15_BLUE_DOWN); + const real_t jac_affine_inv_0_0_BLUE_DOWN = tmp_coords_jac_16_BLUE_DOWN*(tmp_coords_jac_10_BLUE_DOWN - tmp_coords_jac_11_BLUE_DOWN); + const real_t jac_affine_inv_0_1_BLUE_DOWN = tmp_coords_jac_16_BLUE_DOWN*(jac_affine_0_2_BLUE_DOWN*jac_affine_2_1_BLUE_DOWN - tmp_coords_jac_13_BLUE_DOWN); + const real_t jac_affine_inv_0_2_BLUE_DOWN = tmp_coords_jac_16_BLUE_DOWN*(tmp_coords_jac_12_BLUE_DOWN - tmp_coords_jac_14_BLUE_DOWN); + const real_t jac_affine_inv_1_0_BLUE_DOWN = tmp_coords_jac_16_BLUE_DOWN*(-jac_affine_1_0_BLUE_DOWN*jac_affine_2_2_BLUE_DOWN + jac_affine_1_2_BLUE_DOWN*jac_affine_2_0_BLUE_DOWN); + const real_t jac_affine_inv_1_1_BLUE_DOWN = tmp_coords_jac_16_BLUE_DOWN*(jac_affine_0_0_BLUE_DOWN*jac_affine_2_2_BLUE_DOWN - jac_affine_0_2_BLUE_DOWN*jac_affine_2_0_BLUE_DOWN); + const real_t jac_affine_inv_1_2_BLUE_DOWN = tmp_coords_jac_16_BLUE_DOWN*(-jac_affine_0_0_BLUE_DOWN*jac_affine_1_2_BLUE_DOWN + jac_affine_0_2_BLUE_DOWN*jac_affine_1_0_BLUE_DOWN); + const real_t jac_affine_inv_2_0_BLUE_DOWN = tmp_coords_jac_16_BLUE_DOWN*(jac_affine_1_0_BLUE_DOWN*jac_affine_2_1_BLUE_DOWN - jac_affine_1_1_BLUE_DOWN*jac_affine_2_0_BLUE_DOWN); + const real_t jac_affine_inv_2_1_BLUE_DOWN = tmp_coords_jac_16_BLUE_DOWN*(-jac_affine_0_0_BLUE_DOWN*jac_affine_2_1_BLUE_DOWN + jac_affine_0_1_BLUE_DOWN*jac_affine_2_0_BLUE_DOWN); + const real_t jac_affine_inv_2_2_BLUE_DOWN = tmp_coords_jac_16_BLUE_DOWN*(jac_affine_0_0_BLUE_DOWN*jac_affine_1_1_BLUE_DOWN - jac_affine_0_1_BLUE_DOWN*jac_affine_1_0_BLUE_DOWN); + const real_t abs_det_jac_affine_BLUE_DOWN = abs(tmp_coords_jac_15_BLUE_DOWN); { /* CellType.BLUE_DOWN */ - const real_t tmp_coords_jac_0_BLUE_DOWN = 1.0 / (micro_edges_per_macro_edge_float)*1.0; - const real_t tmp_coords_jac_1_BLUE_DOWN = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0); - const real_t tmp_coords_jac_2_BLUE_DOWN = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1); - const real_t tmp_coords_jac_3_BLUE_DOWN = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2); - const real_t tmp_coords_jac_4_BLUE_DOWN = tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0); - const real_t tmp_coords_jac_5_BLUE_DOWN = macro_vertex_coord_id_0comp0 + tmp_coords_jac_4_BLUE_DOWN; - const real_t tmp_coords_jac_6_BLUE_DOWN = tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1); - const real_t tmp_coords_jac_7_BLUE_DOWN = macro_vertex_coord_id_0comp1 + tmp_coords_jac_6_BLUE_DOWN; - const real_t tmp_coords_jac_8_BLUE_DOWN = tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2); - const real_t tmp_coords_jac_9_BLUE_DOWN = macro_vertex_coord_id_0comp2 + tmp_coords_jac_8_BLUE_DOWN; - const real_t p_affine_const_0_0_BLUE_DOWN = tmp_coords_jac_1_BLUE_DOWN; - const real_t p_affine_const_0_1_BLUE_DOWN = tmp_coords_jac_2_BLUE_DOWN; - const real_t p_affine_const_0_2_BLUE_DOWN = tmp_coords_jac_3_BLUE_DOWN; - const real_t p_affine_const_1_0_BLUE_DOWN = tmp_coords_jac_5_BLUE_DOWN; - const real_t p_affine_const_1_1_BLUE_DOWN = tmp_coords_jac_7_BLUE_DOWN; - const real_t p_affine_const_1_2_BLUE_DOWN = tmp_coords_jac_9_BLUE_DOWN; - const real_t p_affine_const_2_0_BLUE_DOWN = tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0) + tmp_coords_jac_5_BLUE_DOWN; - const real_t p_affine_const_2_1_BLUE_DOWN = tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1) + tmp_coords_jac_7_BLUE_DOWN; - const real_t p_affine_const_2_2_BLUE_DOWN = tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2) + tmp_coords_jac_9_BLUE_DOWN; - const real_t p_affine_const_3_0_BLUE_DOWN = tmp_coords_jac_1_BLUE_DOWN + tmp_coords_jac_4_BLUE_DOWN; - const real_t p_affine_const_3_1_BLUE_DOWN = tmp_coords_jac_2_BLUE_DOWN + tmp_coords_jac_6_BLUE_DOWN; - const real_t p_affine_const_3_2_BLUE_DOWN = tmp_coords_jac_3_BLUE_DOWN + tmp_coords_jac_8_BLUE_DOWN; - const real_t jac_affine_0_0_BLUE_DOWN = -p_affine_const_0_0_BLUE_DOWN + p_affine_const_1_0_BLUE_DOWN; - const real_t jac_affine_0_1_BLUE_DOWN = -p_affine_const_0_0_BLUE_DOWN + p_affine_const_2_0_BLUE_DOWN; - const real_t jac_affine_0_2_BLUE_DOWN = -p_affine_const_0_0_BLUE_DOWN + p_affine_const_3_0_BLUE_DOWN; - const real_t jac_affine_1_0_BLUE_DOWN = -p_affine_const_0_1_BLUE_DOWN + p_affine_const_1_1_BLUE_DOWN; - const real_t jac_affine_1_1_BLUE_DOWN = -p_affine_const_0_1_BLUE_DOWN + p_affine_const_2_1_BLUE_DOWN; - const real_t tmp_coords_jac_14_BLUE_DOWN = jac_affine_0_2_BLUE_DOWN*jac_affine_1_1_BLUE_DOWN; - const real_t jac_affine_1_2_BLUE_DOWN = -p_affine_const_0_1_BLUE_DOWN + p_affine_const_3_1_BLUE_DOWN; - const real_t tmp_coords_jac_12_BLUE_DOWN = jac_affine_0_1_BLUE_DOWN*jac_affine_1_2_BLUE_DOWN; - const real_t jac_affine_2_0_BLUE_DOWN = -p_affine_const_0_2_BLUE_DOWN + p_affine_const_1_2_BLUE_DOWN; - const real_t jac_affine_2_1_BLUE_DOWN = -p_affine_const_0_2_BLUE_DOWN + p_affine_const_2_2_BLUE_DOWN; - const real_t tmp_coords_jac_11_BLUE_DOWN = jac_affine_1_2_BLUE_DOWN*jac_affine_2_1_BLUE_DOWN; - const real_t jac_affine_2_2_BLUE_DOWN = -p_affine_const_0_2_BLUE_DOWN + p_affine_const_3_2_BLUE_DOWN; - const real_t tmp_coords_jac_10_BLUE_DOWN = jac_affine_1_1_BLUE_DOWN*jac_affine_2_2_BLUE_DOWN; - const real_t tmp_coords_jac_13_BLUE_DOWN = jac_affine_0_1_BLUE_DOWN*jac_affine_2_2_BLUE_DOWN; - const real_t tmp_coords_jac_15_BLUE_DOWN = 1.0 / (jac_affine_0_0_BLUE_DOWN*tmp_coords_jac_10_BLUE_DOWN - jac_affine_0_0_BLUE_DOWN*tmp_coords_jac_11_BLUE_DOWN + jac_affine_0_2_BLUE_DOWN*jac_affine_1_0_BLUE_DOWN*jac_affine_2_1_BLUE_DOWN - jac_affine_1_0_BLUE_DOWN*tmp_coords_jac_13_BLUE_DOWN + jac_affine_2_0_BLUE_DOWN*tmp_coords_jac_12_BLUE_DOWN - jac_affine_2_0_BLUE_DOWN*tmp_coords_jac_14_BLUE_DOWN); - const real_t jac_affine_inv_0_0_BLUE_DOWN = tmp_coords_jac_15_BLUE_DOWN*(tmp_coords_jac_10_BLUE_DOWN - tmp_coords_jac_11_BLUE_DOWN); - const real_t jac_affine_inv_0_1_BLUE_DOWN = tmp_coords_jac_15_BLUE_DOWN*(jac_affine_0_2_BLUE_DOWN*jac_affine_2_1_BLUE_DOWN - tmp_coords_jac_13_BLUE_DOWN); - const real_t jac_affine_inv_0_2_BLUE_DOWN = tmp_coords_jac_15_BLUE_DOWN*(tmp_coords_jac_12_BLUE_DOWN - tmp_coords_jac_14_BLUE_DOWN); - const real_t jac_affine_inv_1_0_BLUE_DOWN = tmp_coords_jac_15_BLUE_DOWN*(-jac_affine_1_0_BLUE_DOWN*jac_affine_2_2_BLUE_DOWN + jac_affine_1_2_BLUE_DOWN*jac_affine_2_0_BLUE_DOWN); - const real_t jac_affine_inv_1_1_BLUE_DOWN = tmp_coords_jac_15_BLUE_DOWN*(jac_affine_0_0_BLUE_DOWN*jac_affine_2_2_BLUE_DOWN - jac_affine_0_2_BLUE_DOWN*jac_affine_2_0_BLUE_DOWN); - const real_t jac_affine_inv_1_2_BLUE_DOWN = tmp_coords_jac_15_BLUE_DOWN*(-jac_affine_0_0_BLUE_DOWN*jac_affine_1_2_BLUE_DOWN + jac_affine_0_2_BLUE_DOWN*jac_affine_1_0_BLUE_DOWN); - const real_t jac_affine_inv_2_0_BLUE_DOWN = tmp_coords_jac_15_BLUE_DOWN*(jac_affine_1_0_BLUE_DOWN*jac_affine_2_1_BLUE_DOWN - jac_affine_1_1_BLUE_DOWN*jac_affine_2_0_BLUE_DOWN); - const real_t jac_affine_inv_2_1_BLUE_DOWN = tmp_coords_jac_15_BLUE_DOWN*(-jac_affine_0_0_BLUE_DOWN*jac_affine_2_1_BLUE_DOWN + jac_affine_0_1_BLUE_DOWN*jac_affine_2_0_BLUE_DOWN); - const real_t jac_affine_inv_2_2_BLUE_DOWN = tmp_coords_jac_15_BLUE_DOWN*(jac_affine_0_0_BLUE_DOWN*jac_affine_1_1_BLUE_DOWN - jac_affine_0_1_BLUE_DOWN*jac_affine_1_0_BLUE_DOWN); for (int64_t ctr_2 = 0; ctr_2 < micro_edges_per_macro_edge; ctr_2 += 1) for (int64_t ctr_1 = 0; ctr_1 < -ctr_2 + micro_edges_per_macro_edge; ctr_1 += 1) for (int64_t ctr_0 = 0; ctr_0 < -ctr_1 - ctr_2 + micro_edges_per_macro_edge - 1; ctr_0 += 1) { - const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2)); - const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2)); - const real_t p_affine_0_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2)); - const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2 + 1)); - const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2 + 1)); - const real_t p_affine_1_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2 + 1)); - const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2 + 1)); - const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2 + 1)); - const real_t p_affine_2_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2 + 1)); - const real_t p_affine_3_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2 + 1)); - const real_t p_affine_3_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2 + 1)); - const real_t p_affine_3_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2 + 1)); + + const int64_t phantom_ctr_0 = ctr_0; + real_t _data_float_loop_ctr_array_dim_0[4]; + _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0; + _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1; + _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2; + _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3; + real_t _data_float_loop_ctr_array_dim_1[4]; + _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1; + real_t _data_float_loop_ctr_array_dim_2[4]; + _data_float_loop_ctr_array_dim_2[0] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[1] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[2] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[3] = (real_t) ctr_2; + + const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_0_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_1_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_2_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_3_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_3_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_3_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; const real_t src_dof_0 = _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]; const real_t src_dof_1 = _data_src_vertex_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]; const real_t src_dof_2 = _data_src_vertex_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]; @@ -2349,28 +2375,8 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap::apply_macro_3D( real_t q_acc_3_29 = 0.0; for (int64_t q = 0; q < 5; q += 1) { - const real_t tmp_qloop_0 = rayVertex_1 - refVertex_1; - const real_t tmp_qloop_1 = -rayVertex_0; - const real_t tmp_qloop_2 = -forVertex_0 - tmp_qloop_1; - const real_t tmp_qloop_3 = rayVertex_2 - thrVertex_2; - const real_t tmp_qloop_4 = tmp_qloop_2*tmp_qloop_3; - const real_t tmp_qloop_5 = rayVertex_2 - refVertex_2; - const real_t tmp_qloop_6 = rayVertex_1 - thrVertex_1; - const real_t tmp_qloop_7 = tmp_qloop_2*tmp_qloop_6; - const real_t tmp_qloop_8 = -rayVertex_1; - const real_t tmp_qloop_9 = -forVertex_1 - tmp_qloop_8; - const real_t tmp_qloop_10 = rayVertex_0 - thrVertex_0; - const real_t tmp_qloop_11 = rayVertex_0 - refVertex_0; - const real_t tmp_qloop_12 = -rayVertex_2; - const real_t tmp_qloop_13 = -forVertex_2 - tmp_qloop_12; - const real_t tmp_qloop_14 = tmp_qloop_3*tmp_qloop_9; - const real_t tmp_qloop_15 = tmp_qloop_10*tmp_qloop_13; - const real_t tmp_qloop_16 = -tmp_qloop_0*tmp_qloop_15 + tmp_qloop_0*tmp_qloop_4 + tmp_qloop_10*tmp_qloop_5*tmp_qloop_9 + tmp_qloop_11*tmp_qloop_13*tmp_qloop_6 - tmp_qloop_11*tmp_qloop_14 - tmp_qloop_5*tmp_qloop_7; - const real_t tmp_qloop_17 = 1.0 / (tmp_qloop_16); const real_t tmp_qloop_18 = p_affine_0_0 + (-p_affine_0_0 + p_affine_1_0)*_data_q_p_0[q] + (-p_affine_0_0 + p_affine_2_0)*_data_q_p_1[q] + (-p_affine_0_0 + p_affine_3_0)*_data_q_p_2[q]; const real_t tmp_qloop_19 = (tmp_qloop_18*tmp_qloop_18); - const real_t tmp_qloop_20 = radRayVertex*tmp_qloop_16; - const real_t tmp_qloop_21 = radRayVertex - radRefVertex; const real_t tmp_qloop_22 = p_affine_0_2 + (-p_affine_0_2 + p_affine_1_2)*_data_q_p_0[q] + (-p_affine_0_2 + p_affine_2_2)*_data_q_p_1[q] + (-p_affine_0_2 + p_affine_3_2)*_data_q_p_2[q]; const real_t tmp_qloop_23 = -tmp_qloop_12 - tmp_qloop_22; const real_t tmp_qloop_24 = -tmp_qloop_1 - tmp_qloop_18; @@ -2381,28 +2387,21 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap::apply_macro_3D( const real_t tmp_qloop_29 = (tmp_qloop_25*tmp_qloop_25); const real_t tmp_qloop_30 = (tmp_qloop_22*tmp_qloop_22); const real_t tmp_qloop_31 = tmp_qloop_19 + tmp_qloop_29 + tmp_qloop_30; - const real_t tmp_qloop_32 = -tmp_qloop_13*tmp_qloop_6 + tmp_qloop_14; - const real_t tmp_qloop_33 = -tmp_qloop_21*tmp_qloop_32; const real_t tmp_qloop_34 = tmp_qloop_18*tmp_qloop_33; const real_t tmp_qloop_35 = tmp_qloop_28 + tmp_qloop_34; const real_t tmp_qloop_36 = tmp_qloop_17*(-tmp_qloop_19*tmp_qloop_28 + tmp_qloop_31*tmp_qloop_35); const real_t tmp_qloop_37 = pow(tmp_qloop_31, -1.5000000000000000); const real_t tmp_qloop_38 = tmp_qloop_37*1.0; - const real_t tmp_qloop_39 = tmp_qloop_10*tmp_qloop_13 - tmp_qloop_4; - const real_t tmp_qloop_40 = tmp_qloop_21*tmp_qloop_39; const real_t tmp_qloop_41 = tmp_qloop_31*tmp_qloop_40; const real_t tmp_qloop_42 = -tmp_qloop_25*tmp_qloop_28 - tmp_qloop_41; const real_t tmp_qloop_43 = tmp_qloop_17*tmp_qloop_38; const real_t tmp_qloop_44 = tmp_qloop_42*tmp_qloop_43; - const real_t tmp_qloop_45 = -tmp_qloop_10*tmp_qloop_9 + tmp_qloop_7; - const real_t tmp_qloop_46 = -tmp_qloop_21*tmp_qloop_45; const real_t tmp_qloop_47 = -tmp_qloop_28; const real_t tmp_qloop_48 = tmp_qloop_22*tmp_qloop_47; const real_t tmp_qloop_49 = tmp_qloop_31*tmp_qloop_46 + tmp_qloop_48; const real_t tmp_qloop_50 = tmp_qloop_43*tmp_qloop_49; const real_t tmp_qloop_51 = tmp_qloop_18*tmp_qloop_47 + tmp_qloop_31*tmp_qloop_33; const real_t tmp_qloop_52 = tmp_qloop_43*tmp_qloop_51; - const real_t tmp_qloop_53 = -tmp_qloop_21*tmp_qloop_39; const real_t tmp_qloop_54 = tmp_qloop_25*tmp_qloop_53 + tmp_qloop_28; const real_t tmp_qloop_55 = tmp_qloop_29*tmp_qloop_47 + tmp_qloop_31*tmp_qloop_54; const real_t tmp_qloop_56 = tmp_qloop_22*tmp_qloop_46; @@ -2410,11 +2409,9 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap::apply_macro_3D( const real_t tmp_qloop_58 = tmp_qloop_30*tmp_qloop_47 + tmp_qloop_31*tmp_qloop_57; const real_t tmp_qloop_66 = pow(tmp_qloop_31, -2.5000000000000000)*3.0; const real_t tmp_qloop_67 = tmp_qloop_36*tmp_qloop_66; - const real_t tmp_qloop_68 = tmp_qloop_21*tmp_qloop_32; const real_t tmp_qloop_69 = tmp_qloop_18*2.0; const real_t tmp_qloop_70 = -tmp_qloop_41; const real_t tmp_qloop_71 = tmp_qloop_35*2.0; - const real_t tmp_qloop_72 = tmp_qloop_21*tmp_qloop_45; const real_t tmp_qloop_73 = -tmp_qloop_31*tmp_qloop_72; const real_t tmp_qloop_74 = -tmp_qloop_20 + tmp_qloop_27; const real_t tmp_qloop_75 = tmp_qloop_18*tmp_qloop_68 + tmp_qloop_34*2.0 + tmp_qloop_74; @@ -2476,14 +2473,7 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap::apply_macro_3D( const real_t tmp_qloop_132 = tmp_qloop_109 - tmp_qloop_110 - tmp_qloop_112 - tmp_qloop_131; const real_t tmp_qloop_133 = tmp_qloop_103*4.0; const real_t tmp_qloop_134 = tmp_qloop_111 - tmp_qloop_114 - tmp_qloop_133; - const real_t tmp_qloop_135 = 1.0 / (rho_dof_0*tmp_qloop_115 + rho_dof_1*tmp_qloop_127 + rho_dof_2*tmp_qloop_128 + rho_dof_3*tmp_qloop_129 + rho_dof_4*tmp_qloop_110 + rho_dof_5*tmp_qloop_113 + rho_dof_6*tmp_qloop_112 + rho_dof_7*tmp_qloop_130 + rho_dof_8*tmp_qloop_132 + rho_dof_9*tmp_qloop_134)*_data_q_w[q]; - const real_t tmp_qloop_136 = tmp_qloop_135*(1.0 - _data_q_p_0[q] - _data_q_p_1[q] - _data_q_p_2[q]); - const real_t tmp_qloop_142 = tmp_qloop_135*_data_q_p_0[q]; - const real_t tmp_qloop_144 = tmp_qloop_135*_data_q_p_2[q]; - const real_t tmp_qloop_147 = tmp_qloop_135*_data_q_p_1[q]; const real_t tmp_qloop_158 = tmp_qloop_105*tmp_qloop_111; - const real_t tmp_qloop_159 = tmp_qloop_135*tmp_qloop_158; - const real_t tmp_qloop_161 = tmp_qloop_107*tmp_qloop_135; const real_t jac_blending_0_0 = tmp_qloop_36*tmp_qloop_38; const real_t jac_blending_0_1 = tmp_qloop_18*tmp_qloop_44; const real_t jac_blending_0_2 = tmp_qloop_18*tmp_qloop_50; @@ -2501,6 +2491,13 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap::apply_macro_3D( const real_t tmp_qloop_64 = jac_blending_0_0*tmp_qloop_59 - jac_blending_0_0*tmp_qloop_61 + jac_blending_0_2*jac_blending_1_0*jac_blending_2_1 - jac_blending_1_0*tmp_qloop_62 + jac_blending_2_0*tmp_qloop_60 - jac_blending_2_0*tmp_qloop_63; const real_t tmp_qloop_65 = 1.0 / (tmp_qloop_64); const real_t abs_det_jac_blending = tmp_qloop_64; + const real_t tmp_qloop_135 = abs_det_jac_affine_BLUE_DOWN*abs_det_jac_blending*1.0 / (rho_dof_0*tmp_qloop_115 + rho_dof_1*tmp_qloop_127 + rho_dof_2*tmp_qloop_128 + rho_dof_3*tmp_qloop_129 + rho_dof_4*tmp_qloop_110 + rho_dof_5*tmp_qloop_113 + rho_dof_6*tmp_qloop_112 + rho_dof_7*tmp_qloop_130 + rho_dof_8*tmp_qloop_132 + rho_dof_9*tmp_qloop_134)*_data_q_w[q]; + const real_t tmp_qloop_136 = tmp_qloop_135*(1.0 - _data_q_p_0[q] - _data_q_p_1[q] - _data_q_p_2[q]); + const real_t tmp_qloop_142 = tmp_qloop_135*_data_q_p_0[q]; + const real_t tmp_qloop_144 = tmp_qloop_135*_data_q_p_2[q]; + const real_t tmp_qloop_147 = tmp_qloop_135*_data_q_p_1[q]; + const real_t tmp_qloop_159 = tmp_qloop_135*tmp_qloop_158; + const real_t tmp_qloop_161 = tmp_qloop_107*tmp_qloop_135; const real_t jac_blending_inv_0_0 = tmp_qloop_65*(tmp_qloop_59 - tmp_qloop_61); const real_t jac_blending_inv_0_1 = tmp_qloop_65*(jac_blending_0_2*jac_blending_2_1 - tmp_qloop_62); const real_t jac_blending_inv_0_2 = tmp_qloop_65*(tmp_qloop_60 - tmp_qloop_63); @@ -2810,67 +2807,87 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap::apply_macro_3D( _data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))] = elMatVec_3 + _data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]; } } + const real_t tmp_coords_jac_0_GREEN_UP = 1.0 / (micro_edges_per_macro_edge_float)*1.0; + const real_t tmp_coords_jac_1_GREEN_UP = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0); + const real_t tmp_coords_jac_2_GREEN_UP = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1); + const real_t tmp_coords_jac_3_GREEN_UP = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2); + const real_t tmp_coords_jac_4_GREEN_UP = tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0); + const real_t tmp_coords_jac_5_GREEN_UP = tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1); + const real_t tmp_coords_jac_6_GREEN_UP = tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2); + const real_t p_affine_const_0_0_GREEN_UP = tmp_coords_jac_1_GREEN_UP; + const real_t p_affine_const_0_1_GREEN_UP = tmp_coords_jac_2_GREEN_UP; + const real_t p_affine_const_0_2_GREEN_UP = tmp_coords_jac_3_GREEN_UP; + const real_t p_affine_const_1_0_GREEN_UP = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0); + const real_t p_affine_const_1_1_GREEN_UP = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1); + const real_t p_affine_const_1_2_GREEN_UP = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2); + const real_t p_affine_const_2_0_GREEN_UP = macro_vertex_coord_id_0comp0 + tmp_coords_jac_4_GREEN_UP; + const real_t p_affine_const_2_1_GREEN_UP = macro_vertex_coord_id_0comp1 + tmp_coords_jac_5_GREEN_UP; + const real_t p_affine_const_2_2_GREEN_UP = macro_vertex_coord_id_0comp2 + tmp_coords_jac_6_GREEN_UP; + const real_t p_affine_const_3_0_GREEN_UP = tmp_coords_jac_1_GREEN_UP + tmp_coords_jac_4_GREEN_UP; + const real_t p_affine_const_3_1_GREEN_UP = tmp_coords_jac_2_GREEN_UP + tmp_coords_jac_5_GREEN_UP; + const real_t p_affine_const_3_2_GREEN_UP = tmp_coords_jac_3_GREEN_UP + tmp_coords_jac_6_GREEN_UP; + const real_t jac_affine_0_0_GREEN_UP = -p_affine_const_0_0_GREEN_UP + p_affine_const_1_0_GREEN_UP; + const real_t jac_affine_0_1_GREEN_UP = -p_affine_const_0_0_GREEN_UP + p_affine_const_2_0_GREEN_UP; + const real_t jac_affine_0_2_GREEN_UP = -p_affine_const_0_0_GREEN_UP + p_affine_const_3_0_GREEN_UP; + const real_t jac_affine_1_0_GREEN_UP = -p_affine_const_0_1_GREEN_UP + p_affine_const_1_1_GREEN_UP; + const real_t jac_affine_1_1_GREEN_UP = -p_affine_const_0_1_GREEN_UP + p_affine_const_2_1_GREEN_UP; + const real_t tmp_coords_jac_11_GREEN_UP = jac_affine_0_2_GREEN_UP*jac_affine_1_1_GREEN_UP; + const real_t jac_affine_1_2_GREEN_UP = -p_affine_const_0_1_GREEN_UP + p_affine_const_3_1_GREEN_UP; + const real_t tmp_coords_jac_9_GREEN_UP = jac_affine_0_1_GREEN_UP*jac_affine_1_2_GREEN_UP; + const real_t jac_affine_2_0_GREEN_UP = -p_affine_const_0_2_GREEN_UP + p_affine_const_1_2_GREEN_UP; + const real_t jac_affine_2_1_GREEN_UP = -p_affine_const_0_2_GREEN_UP + p_affine_const_2_2_GREEN_UP; + const real_t tmp_coords_jac_8_GREEN_UP = jac_affine_1_2_GREEN_UP*jac_affine_2_1_GREEN_UP; + const real_t jac_affine_2_2_GREEN_UP = -p_affine_const_0_2_GREEN_UP + p_affine_const_3_2_GREEN_UP; + const real_t tmp_coords_jac_7_GREEN_UP = jac_affine_1_1_GREEN_UP*jac_affine_2_2_GREEN_UP; + const real_t tmp_coords_jac_10_GREEN_UP = jac_affine_0_1_GREEN_UP*jac_affine_2_2_GREEN_UP; + const real_t tmp_coords_jac_12_GREEN_UP = jac_affine_0_0_GREEN_UP*tmp_coords_jac_7_GREEN_UP - jac_affine_0_0_GREEN_UP*tmp_coords_jac_8_GREEN_UP + jac_affine_0_2_GREEN_UP*jac_affine_1_0_GREEN_UP*jac_affine_2_1_GREEN_UP - jac_affine_1_0_GREEN_UP*tmp_coords_jac_10_GREEN_UP - jac_affine_2_0_GREEN_UP*tmp_coords_jac_11_GREEN_UP + jac_affine_2_0_GREEN_UP*tmp_coords_jac_9_GREEN_UP; + const real_t tmp_coords_jac_13_GREEN_UP = 1.0 / (tmp_coords_jac_12_GREEN_UP); + const real_t jac_affine_inv_0_0_GREEN_UP = tmp_coords_jac_13_GREEN_UP*(tmp_coords_jac_7_GREEN_UP - tmp_coords_jac_8_GREEN_UP); + const real_t jac_affine_inv_0_1_GREEN_UP = tmp_coords_jac_13_GREEN_UP*(jac_affine_0_2_GREEN_UP*jac_affine_2_1_GREEN_UP - tmp_coords_jac_10_GREEN_UP); + const real_t jac_affine_inv_0_2_GREEN_UP = tmp_coords_jac_13_GREEN_UP*(-tmp_coords_jac_11_GREEN_UP + tmp_coords_jac_9_GREEN_UP); + const real_t jac_affine_inv_1_0_GREEN_UP = tmp_coords_jac_13_GREEN_UP*(-jac_affine_1_0_GREEN_UP*jac_affine_2_2_GREEN_UP + jac_affine_1_2_GREEN_UP*jac_affine_2_0_GREEN_UP); + const real_t jac_affine_inv_1_1_GREEN_UP = tmp_coords_jac_13_GREEN_UP*(jac_affine_0_0_GREEN_UP*jac_affine_2_2_GREEN_UP - jac_affine_0_2_GREEN_UP*jac_affine_2_0_GREEN_UP); + const real_t jac_affine_inv_1_2_GREEN_UP = tmp_coords_jac_13_GREEN_UP*(-jac_affine_0_0_GREEN_UP*jac_affine_1_2_GREEN_UP + jac_affine_0_2_GREEN_UP*jac_affine_1_0_GREEN_UP); + const real_t jac_affine_inv_2_0_GREEN_UP = tmp_coords_jac_13_GREEN_UP*(jac_affine_1_0_GREEN_UP*jac_affine_2_1_GREEN_UP - jac_affine_1_1_GREEN_UP*jac_affine_2_0_GREEN_UP); + const real_t jac_affine_inv_2_1_GREEN_UP = tmp_coords_jac_13_GREEN_UP*(-jac_affine_0_0_GREEN_UP*jac_affine_2_1_GREEN_UP + jac_affine_0_1_GREEN_UP*jac_affine_2_0_GREEN_UP); + const real_t jac_affine_inv_2_2_GREEN_UP = tmp_coords_jac_13_GREEN_UP*(jac_affine_0_0_GREEN_UP*jac_affine_1_1_GREEN_UP - jac_affine_0_1_GREEN_UP*jac_affine_1_0_GREEN_UP); + const real_t abs_det_jac_affine_GREEN_UP = abs(tmp_coords_jac_12_GREEN_UP); { /* CellType.GREEN_UP */ - const real_t tmp_coords_jac_0_GREEN_UP = 1.0 / (micro_edges_per_macro_edge_float)*1.0; - const real_t tmp_coords_jac_1_GREEN_UP = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0); - const real_t tmp_coords_jac_2_GREEN_UP = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1); - const real_t tmp_coords_jac_3_GREEN_UP = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2); - const real_t tmp_coords_jac_4_GREEN_UP = tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0); - const real_t tmp_coords_jac_5_GREEN_UP = tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1); - const real_t tmp_coords_jac_6_GREEN_UP = tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2); - const real_t p_affine_const_0_0_GREEN_UP = tmp_coords_jac_1_GREEN_UP; - const real_t p_affine_const_0_1_GREEN_UP = tmp_coords_jac_2_GREEN_UP; - const real_t p_affine_const_0_2_GREEN_UP = tmp_coords_jac_3_GREEN_UP; - const real_t p_affine_const_1_0_GREEN_UP = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0); - const real_t p_affine_const_1_1_GREEN_UP = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1); - const real_t p_affine_const_1_2_GREEN_UP = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2); - const real_t p_affine_const_2_0_GREEN_UP = macro_vertex_coord_id_0comp0 + tmp_coords_jac_4_GREEN_UP; - const real_t p_affine_const_2_1_GREEN_UP = macro_vertex_coord_id_0comp1 + tmp_coords_jac_5_GREEN_UP; - const real_t p_affine_const_2_2_GREEN_UP = macro_vertex_coord_id_0comp2 + tmp_coords_jac_6_GREEN_UP; - const real_t p_affine_const_3_0_GREEN_UP = tmp_coords_jac_1_GREEN_UP + tmp_coords_jac_4_GREEN_UP; - const real_t p_affine_const_3_1_GREEN_UP = tmp_coords_jac_2_GREEN_UP + tmp_coords_jac_5_GREEN_UP; - const real_t p_affine_const_3_2_GREEN_UP = tmp_coords_jac_3_GREEN_UP + tmp_coords_jac_6_GREEN_UP; - const real_t jac_affine_0_0_GREEN_UP = -p_affine_const_0_0_GREEN_UP + p_affine_const_1_0_GREEN_UP; - const real_t jac_affine_0_1_GREEN_UP = -p_affine_const_0_0_GREEN_UP + p_affine_const_2_0_GREEN_UP; - const real_t jac_affine_0_2_GREEN_UP = -p_affine_const_0_0_GREEN_UP + p_affine_const_3_0_GREEN_UP; - const real_t jac_affine_1_0_GREEN_UP = -p_affine_const_0_1_GREEN_UP + p_affine_const_1_1_GREEN_UP; - const real_t jac_affine_1_1_GREEN_UP = -p_affine_const_0_1_GREEN_UP + p_affine_const_2_1_GREEN_UP; - const real_t tmp_coords_jac_11_GREEN_UP = jac_affine_0_2_GREEN_UP*jac_affine_1_1_GREEN_UP; - const real_t jac_affine_1_2_GREEN_UP = -p_affine_const_0_1_GREEN_UP + p_affine_const_3_1_GREEN_UP; - const real_t tmp_coords_jac_9_GREEN_UP = jac_affine_0_1_GREEN_UP*jac_affine_1_2_GREEN_UP; - const real_t jac_affine_2_0_GREEN_UP = -p_affine_const_0_2_GREEN_UP + p_affine_const_1_2_GREEN_UP; - const real_t jac_affine_2_1_GREEN_UP = -p_affine_const_0_2_GREEN_UP + p_affine_const_2_2_GREEN_UP; - const real_t tmp_coords_jac_8_GREEN_UP = jac_affine_1_2_GREEN_UP*jac_affine_2_1_GREEN_UP; - const real_t jac_affine_2_2_GREEN_UP = -p_affine_const_0_2_GREEN_UP + p_affine_const_3_2_GREEN_UP; - const real_t tmp_coords_jac_7_GREEN_UP = jac_affine_1_1_GREEN_UP*jac_affine_2_2_GREEN_UP; - const real_t tmp_coords_jac_10_GREEN_UP = jac_affine_0_1_GREEN_UP*jac_affine_2_2_GREEN_UP; - const real_t tmp_coords_jac_12_GREEN_UP = 1.0 / (jac_affine_0_0_GREEN_UP*tmp_coords_jac_7_GREEN_UP - jac_affine_0_0_GREEN_UP*tmp_coords_jac_8_GREEN_UP + jac_affine_0_2_GREEN_UP*jac_affine_1_0_GREEN_UP*jac_affine_2_1_GREEN_UP - jac_affine_1_0_GREEN_UP*tmp_coords_jac_10_GREEN_UP - jac_affine_2_0_GREEN_UP*tmp_coords_jac_11_GREEN_UP + jac_affine_2_0_GREEN_UP*tmp_coords_jac_9_GREEN_UP); - const real_t jac_affine_inv_0_0_GREEN_UP = tmp_coords_jac_12_GREEN_UP*(tmp_coords_jac_7_GREEN_UP - tmp_coords_jac_8_GREEN_UP); - const real_t jac_affine_inv_0_1_GREEN_UP = tmp_coords_jac_12_GREEN_UP*(jac_affine_0_2_GREEN_UP*jac_affine_2_1_GREEN_UP - tmp_coords_jac_10_GREEN_UP); - const real_t jac_affine_inv_0_2_GREEN_UP = tmp_coords_jac_12_GREEN_UP*(-tmp_coords_jac_11_GREEN_UP + tmp_coords_jac_9_GREEN_UP); - const real_t jac_affine_inv_1_0_GREEN_UP = tmp_coords_jac_12_GREEN_UP*(-jac_affine_1_0_GREEN_UP*jac_affine_2_2_GREEN_UP + jac_affine_1_2_GREEN_UP*jac_affine_2_0_GREEN_UP); - const real_t jac_affine_inv_1_1_GREEN_UP = tmp_coords_jac_12_GREEN_UP*(jac_affine_0_0_GREEN_UP*jac_affine_2_2_GREEN_UP - jac_affine_0_2_GREEN_UP*jac_affine_2_0_GREEN_UP); - const real_t jac_affine_inv_1_2_GREEN_UP = tmp_coords_jac_12_GREEN_UP*(-jac_affine_0_0_GREEN_UP*jac_affine_1_2_GREEN_UP + jac_affine_0_2_GREEN_UP*jac_affine_1_0_GREEN_UP); - const real_t jac_affine_inv_2_0_GREEN_UP = tmp_coords_jac_12_GREEN_UP*(jac_affine_1_0_GREEN_UP*jac_affine_2_1_GREEN_UP - jac_affine_1_1_GREEN_UP*jac_affine_2_0_GREEN_UP); - const real_t jac_affine_inv_2_1_GREEN_UP = tmp_coords_jac_12_GREEN_UP*(-jac_affine_0_0_GREEN_UP*jac_affine_2_1_GREEN_UP + jac_affine_0_1_GREEN_UP*jac_affine_2_0_GREEN_UP); - const real_t jac_affine_inv_2_2_GREEN_UP = tmp_coords_jac_12_GREEN_UP*(jac_affine_0_0_GREEN_UP*jac_affine_1_1_GREEN_UP - jac_affine_0_1_GREEN_UP*jac_affine_1_0_GREEN_UP); for (int64_t ctr_2 = 0; ctr_2 < micro_edges_per_macro_edge; ctr_2 += 1) for (int64_t ctr_1 = 0; ctr_1 < -ctr_2 + micro_edges_per_macro_edge; ctr_1 += 1) for (int64_t ctr_0 = 0; ctr_0 < -ctr_1 - ctr_2 + micro_edges_per_macro_edge - 1; ctr_0 += 1) { - const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2)); - const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2)); - const real_t p_affine_0_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2)); - const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2)); - const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2)); - const real_t p_affine_1_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2)); - const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2 + 1)); - const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2 + 1)); - const real_t p_affine_2_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2 + 1)); - const real_t p_affine_3_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2 + 1)); - const real_t p_affine_3_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2 + 1)); - const real_t p_affine_3_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2 + 1)); + + const int64_t phantom_ctr_0 = ctr_0; + real_t _data_float_loop_ctr_array_dim_0[4]; + _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0; + _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1; + _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2; + _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3; + real_t _data_float_loop_ctr_array_dim_1[4]; + _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1; + real_t _data_float_loop_ctr_array_dim_2[4]; + _data_float_loop_ctr_array_dim_2[0] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[1] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[2] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[3] = (real_t) ctr_2; + + const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_0_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_2_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_3_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_3_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_3_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; const real_t src_dof_0 = _data_src_vertex_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]; const real_t src_dof_1 = _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]; const real_t src_dof_2 = _data_src_vertex_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]; @@ -3033,28 +3050,8 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap::apply_macro_3D( real_t q_acc_3_29 = 0.0; for (int64_t q = 0; q < 5; q += 1) { - const real_t tmp_qloop_0 = rayVertex_1 - refVertex_1; - const real_t tmp_qloop_1 = -rayVertex_0; - const real_t tmp_qloop_2 = -forVertex_0 - tmp_qloop_1; - const real_t tmp_qloop_3 = rayVertex_2 - thrVertex_2; - const real_t tmp_qloop_4 = tmp_qloop_2*tmp_qloop_3; - const real_t tmp_qloop_5 = rayVertex_2 - refVertex_2; - const real_t tmp_qloop_6 = rayVertex_1 - thrVertex_1; - const real_t tmp_qloop_7 = tmp_qloop_2*tmp_qloop_6; - const real_t tmp_qloop_8 = -rayVertex_1; - const real_t tmp_qloop_9 = -forVertex_1 - tmp_qloop_8; - const real_t tmp_qloop_10 = rayVertex_0 - thrVertex_0; - const real_t tmp_qloop_11 = rayVertex_0 - refVertex_0; - const real_t tmp_qloop_12 = -rayVertex_2; - const real_t tmp_qloop_13 = -forVertex_2 - tmp_qloop_12; - const real_t tmp_qloop_14 = tmp_qloop_3*tmp_qloop_9; - const real_t tmp_qloop_15 = tmp_qloop_10*tmp_qloop_13; - const real_t tmp_qloop_16 = -tmp_qloop_0*tmp_qloop_15 + tmp_qloop_0*tmp_qloop_4 + tmp_qloop_10*tmp_qloop_5*tmp_qloop_9 + tmp_qloop_11*tmp_qloop_13*tmp_qloop_6 - tmp_qloop_11*tmp_qloop_14 - tmp_qloop_5*tmp_qloop_7; - const real_t tmp_qloop_17 = 1.0 / (tmp_qloop_16); const real_t tmp_qloop_18 = p_affine_0_0 + (-p_affine_0_0 + p_affine_1_0)*_data_q_p_0[q] + (-p_affine_0_0 + p_affine_2_0)*_data_q_p_1[q] + (-p_affine_0_0 + p_affine_3_0)*_data_q_p_2[q]; const real_t tmp_qloop_19 = (tmp_qloop_18*tmp_qloop_18); - const real_t tmp_qloop_20 = radRayVertex*tmp_qloop_16; - const real_t tmp_qloop_21 = radRayVertex - radRefVertex; const real_t tmp_qloop_22 = p_affine_0_2 + (-p_affine_0_2 + p_affine_1_2)*_data_q_p_0[q] + (-p_affine_0_2 + p_affine_2_2)*_data_q_p_1[q] + (-p_affine_0_2 + p_affine_3_2)*_data_q_p_2[q]; const real_t tmp_qloop_23 = -tmp_qloop_12 - tmp_qloop_22; const real_t tmp_qloop_24 = -tmp_qloop_1 - tmp_qloop_18; @@ -3065,28 +3062,21 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap::apply_macro_3D( const real_t tmp_qloop_29 = (tmp_qloop_25*tmp_qloop_25); const real_t tmp_qloop_30 = (tmp_qloop_22*tmp_qloop_22); const real_t tmp_qloop_31 = tmp_qloop_19 + tmp_qloop_29 + tmp_qloop_30; - const real_t tmp_qloop_32 = -tmp_qloop_13*tmp_qloop_6 + tmp_qloop_14; - const real_t tmp_qloop_33 = -tmp_qloop_21*tmp_qloop_32; const real_t tmp_qloop_34 = tmp_qloop_18*tmp_qloop_33; const real_t tmp_qloop_35 = tmp_qloop_28 + tmp_qloop_34; const real_t tmp_qloop_36 = tmp_qloop_17*(-tmp_qloop_19*tmp_qloop_28 + tmp_qloop_31*tmp_qloop_35); const real_t tmp_qloop_37 = pow(tmp_qloop_31, -1.5000000000000000); const real_t tmp_qloop_38 = tmp_qloop_37*1.0; - const real_t tmp_qloop_39 = tmp_qloop_10*tmp_qloop_13 - tmp_qloop_4; - const real_t tmp_qloop_40 = tmp_qloop_21*tmp_qloop_39; const real_t tmp_qloop_41 = tmp_qloop_31*tmp_qloop_40; const real_t tmp_qloop_42 = -tmp_qloop_25*tmp_qloop_28 - tmp_qloop_41; const real_t tmp_qloop_43 = tmp_qloop_17*tmp_qloop_38; const real_t tmp_qloop_44 = tmp_qloop_42*tmp_qloop_43; - const real_t tmp_qloop_45 = -tmp_qloop_10*tmp_qloop_9 + tmp_qloop_7; - const real_t tmp_qloop_46 = -tmp_qloop_21*tmp_qloop_45; const real_t tmp_qloop_47 = -tmp_qloop_28; const real_t tmp_qloop_48 = tmp_qloop_22*tmp_qloop_47; const real_t tmp_qloop_49 = tmp_qloop_31*tmp_qloop_46 + tmp_qloop_48; const real_t tmp_qloop_50 = tmp_qloop_43*tmp_qloop_49; const real_t tmp_qloop_51 = tmp_qloop_18*tmp_qloop_47 + tmp_qloop_31*tmp_qloop_33; const real_t tmp_qloop_52 = tmp_qloop_43*tmp_qloop_51; - const real_t tmp_qloop_53 = -tmp_qloop_21*tmp_qloop_39; const real_t tmp_qloop_54 = tmp_qloop_25*tmp_qloop_53 + tmp_qloop_28; const real_t tmp_qloop_55 = tmp_qloop_29*tmp_qloop_47 + tmp_qloop_31*tmp_qloop_54; const real_t tmp_qloop_56 = tmp_qloop_22*tmp_qloop_46; @@ -3094,11 +3084,9 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap::apply_macro_3D( const real_t tmp_qloop_58 = tmp_qloop_30*tmp_qloop_47 + tmp_qloop_31*tmp_qloop_57; const real_t tmp_qloop_66 = pow(tmp_qloop_31, -2.5000000000000000)*3.0; const real_t tmp_qloop_67 = tmp_qloop_36*tmp_qloop_66; - const real_t tmp_qloop_68 = tmp_qloop_21*tmp_qloop_32; const real_t tmp_qloop_69 = tmp_qloop_18*2.0; const real_t tmp_qloop_70 = -tmp_qloop_41; const real_t tmp_qloop_71 = tmp_qloop_35*2.0; - const real_t tmp_qloop_72 = tmp_qloop_21*tmp_qloop_45; const real_t tmp_qloop_73 = -tmp_qloop_31*tmp_qloop_72; const real_t tmp_qloop_74 = -tmp_qloop_20 + tmp_qloop_27; const real_t tmp_qloop_75 = tmp_qloop_18*tmp_qloop_68 + tmp_qloop_34*2.0 + tmp_qloop_74; @@ -3160,14 +3148,7 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap::apply_macro_3D( const real_t tmp_qloop_132 = tmp_qloop_109 - tmp_qloop_110 - tmp_qloop_112 - tmp_qloop_131; const real_t tmp_qloop_133 = tmp_qloop_103*4.0; const real_t tmp_qloop_134 = tmp_qloop_111 - tmp_qloop_114 - tmp_qloop_133; - const real_t tmp_qloop_135 = 1.0 / (rho_dof_0*tmp_qloop_115 + rho_dof_1*tmp_qloop_127 + rho_dof_2*tmp_qloop_128 + rho_dof_3*tmp_qloop_129 + rho_dof_4*tmp_qloop_110 + rho_dof_5*tmp_qloop_113 + rho_dof_6*tmp_qloop_112 + rho_dof_7*tmp_qloop_130 + rho_dof_8*tmp_qloop_132 + rho_dof_9*tmp_qloop_134)*_data_q_w[q]; - const real_t tmp_qloop_136 = tmp_qloop_135*(1.0 - _data_q_p_0[q] - _data_q_p_1[q] - _data_q_p_2[q]); - const real_t tmp_qloop_142 = tmp_qloop_135*_data_q_p_0[q]; - const real_t tmp_qloop_144 = tmp_qloop_135*_data_q_p_2[q]; - const real_t tmp_qloop_147 = tmp_qloop_135*_data_q_p_1[q]; const real_t tmp_qloop_158 = tmp_qloop_105*tmp_qloop_111; - const real_t tmp_qloop_159 = tmp_qloop_135*tmp_qloop_158; - const real_t tmp_qloop_161 = tmp_qloop_107*tmp_qloop_135; const real_t jac_blending_0_0 = tmp_qloop_36*tmp_qloop_38; const real_t jac_blending_0_1 = tmp_qloop_18*tmp_qloop_44; const real_t jac_blending_0_2 = tmp_qloop_18*tmp_qloop_50; @@ -3185,6 +3166,13 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap::apply_macro_3D( const real_t tmp_qloop_64 = jac_blending_0_0*tmp_qloop_59 - jac_blending_0_0*tmp_qloop_61 + jac_blending_0_2*jac_blending_1_0*jac_blending_2_1 - jac_blending_1_0*tmp_qloop_62 + jac_blending_2_0*tmp_qloop_60 - jac_blending_2_0*tmp_qloop_63; const real_t tmp_qloop_65 = 1.0 / (tmp_qloop_64); const real_t abs_det_jac_blending = tmp_qloop_64; + const real_t tmp_qloop_135 = abs_det_jac_affine_GREEN_UP*abs_det_jac_blending*1.0 / (rho_dof_0*tmp_qloop_115 + rho_dof_1*tmp_qloop_127 + rho_dof_2*tmp_qloop_128 + rho_dof_3*tmp_qloop_129 + rho_dof_4*tmp_qloop_110 + rho_dof_5*tmp_qloop_113 + rho_dof_6*tmp_qloop_112 + rho_dof_7*tmp_qloop_130 + rho_dof_8*tmp_qloop_132 + rho_dof_9*tmp_qloop_134)*_data_q_w[q]; + const real_t tmp_qloop_136 = tmp_qloop_135*(1.0 - _data_q_p_0[q] - _data_q_p_1[q] - _data_q_p_2[q]); + const real_t tmp_qloop_142 = tmp_qloop_135*_data_q_p_0[q]; + const real_t tmp_qloop_144 = tmp_qloop_135*_data_q_p_2[q]; + const real_t tmp_qloop_147 = tmp_qloop_135*_data_q_p_1[q]; + const real_t tmp_qloop_159 = tmp_qloop_135*tmp_qloop_158; + const real_t tmp_qloop_161 = tmp_qloop_107*tmp_qloop_135; const real_t jac_blending_inv_0_0 = tmp_qloop_65*(tmp_qloop_59 - tmp_qloop_61); const real_t jac_blending_inv_0_1 = tmp_qloop_65*(jac_blending_0_2*jac_blending_2_1 - tmp_qloop_62); const real_t jac_blending_inv_0_2 = tmp_qloop_65*(tmp_qloop_60 - tmp_qloop_63); @@ -3494,70 +3482,90 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap::apply_macro_3D( _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1] = elMatVec_3 + _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]; } } + const real_t tmp_coords_jac_0_GREEN_DOWN = 1.0 / (micro_edges_per_macro_edge_float)*1.0; + const real_t tmp_coords_jac_1_GREEN_DOWN = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0); + const real_t tmp_coords_jac_2_GREEN_DOWN = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1); + const real_t tmp_coords_jac_3_GREEN_DOWN = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2); + const real_t tmp_coords_jac_4_GREEN_DOWN = tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0); + const real_t tmp_coords_jac_5_GREEN_DOWN = tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1); + const real_t tmp_coords_jac_6_GREEN_DOWN = tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2); + const real_t tmp_coords_jac_7_GREEN_DOWN = tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0); + const real_t tmp_coords_jac_8_GREEN_DOWN = tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1); + const real_t tmp_coords_jac_9_GREEN_DOWN = tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2); + const real_t p_affine_const_0_0_GREEN_DOWN = tmp_coords_jac_1_GREEN_DOWN; + const real_t p_affine_const_0_1_GREEN_DOWN = tmp_coords_jac_2_GREEN_DOWN; + const real_t p_affine_const_0_2_GREEN_DOWN = tmp_coords_jac_3_GREEN_DOWN; + const real_t p_affine_const_1_0_GREEN_DOWN = tmp_coords_jac_1_GREEN_DOWN + tmp_coords_jac_4_GREEN_DOWN; + const real_t p_affine_const_1_1_GREEN_DOWN = tmp_coords_jac_2_GREEN_DOWN + tmp_coords_jac_5_GREEN_DOWN; + const real_t p_affine_const_1_2_GREEN_DOWN = tmp_coords_jac_3_GREEN_DOWN + tmp_coords_jac_6_GREEN_DOWN; + const real_t p_affine_const_2_0_GREEN_DOWN = macro_vertex_coord_id_0comp0 + tmp_coords_jac_4_GREEN_DOWN + tmp_coords_jac_7_GREEN_DOWN; + const real_t p_affine_const_2_1_GREEN_DOWN = macro_vertex_coord_id_0comp1 + tmp_coords_jac_5_GREEN_DOWN + tmp_coords_jac_8_GREEN_DOWN; + const real_t p_affine_const_2_2_GREEN_DOWN = macro_vertex_coord_id_0comp2 + tmp_coords_jac_6_GREEN_DOWN + tmp_coords_jac_9_GREEN_DOWN; + const real_t p_affine_const_3_0_GREEN_DOWN = tmp_coords_jac_1_GREEN_DOWN + tmp_coords_jac_7_GREEN_DOWN; + const real_t p_affine_const_3_1_GREEN_DOWN = tmp_coords_jac_2_GREEN_DOWN + tmp_coords_jac_8_GREEN_DOWN; + const real_t p_affine_const_3_2_GREEN_DOWN = tmp_coords_jac_3_GREEN_DOWN + tmp_coords_jac_9_GREEN_DOWN; + const real_t jac_affine_0_0_GREEN_DOWN = -p_affine_const_0_0_GREEN_DOWN + p_affine_const_1_0_GREEN_DOWN; + const real_t jac_affine_0_1_GREEN_DOWN = -p_affine_const_0_0_GREEN_DOWN + p_affine_const_2_0_GREEN_DOWN; + const real_t jac_affine_0_2_GREEN_DOWN = -p_affine_const_0_0_GREEN_DOWN + p_affine_const_3_0_GREEN_DOWN; + const real_t jac_affine_1_0_GREEN_DOWN = -p_affine_const_0_1_GREEN_DOWN + p_affine_const_1_1_GREEN_DOWN; + const real_t jac_affine_1_1_GREEN_DOWN = -p_affine_const_0_1_GREEN_DOWN + p_affine_const_2_1_GREEN_DOWN; + const real_t tmp_coords_jac_14_GREEN_DOWN = jac_affine_0_2_GREEN_DOWN*jac_affine_1_1_GREEN_DOWN; + const real_t jac_affine_1_2_GREEN_DOWN = -p_affine_const_0_1_GREEN_DOWN + p_affine_const_3_1_GREEN_DOWN; + const real_t tmp_coords_jac_12_GREEN_DOWN = jac_affine_0_1_GREEN_DOWN*jac_affine_1_2_GREEN_DOWN; + const real_t jac_affine_2_0_GREEN_DOWN = -p_affine_const_0_2_GREEN_DOWN + p_affine_const_1_2_GREEN_DOWN; + const real_t jac_affine_2_1_GREEN_DOWN = -p_affine_const_0_2_GREEN_DOWN + p_affine_const_2_2_GREEN_DOWN; + const real_t tmp_coords_jac_11_GREEN_DOWN = jac_affine_1_2_GREEN_DOWN*jac_affine_2_1_GREEN_DOWN; + const real_t jac_affine_2_2_GREEN_DOWN = -p_affine_const_0_2_GREEN_DOWN + p_affine_const_3_2_GREEN_DOWN; + const real_t tmp_coords_jac_10_GREEN_DOWN = jac_affine_1_1_GREEN_DOWN*jac_affine_2_2_GREEN_DOWN; + const real_t tmp_coords_jac_13_GREEN_DOWN = jac_affine_0_1_GREEN_DOWN*jac_affine_2_2_GREEN_DOWN; + const real_t tmp_coords_jac_15_GREEN_DOWN = jac_affine_0_0_GREEN_DOWN*tmp_coords_jac_10_GREEN_DOWN - jac_affine_0_0_GREEN_DOWN*tmp_coords_jac_11_GREEN_DOWN + jac_affine_0_2_GREEN_DOWN*jac_affine_1_0_GREEN_DOWN*jac_affine_2_1_GREEN_DOWN - jac_affine_1_0_GREEN_DOWN*tmp_coords_jac_13_GREEN_DOWN + jac_affine_2_0_GREEN_DOWN*tmp_coords_jac_12_GREEN_DOWN - jac_affine_2_0_GREEN_DOWN*tmp_coords_jac_14_GREEN_DOWN; + const real_t tmp_coords_jac_16_GREEN_DOWN = 1.0 / (tmp_coords_jac_15_GREEN_DOWN); + const real_t jac_affine_inv_0_0_GREEN_DOWN = tmp_coords_jac_16_GREEN_DOWN*(tmp_coords_jac_10_GREEN_DOWN - tmp_coords_jac_11_GREEN_DOWN); + const real_t jac_affine_inv_0_1_GREEN_DOWN = tmp_coords_jac_16_GREEN_DOWN*(jac_affine_0_2_GREEN_DOWN*jac_affine_2_1_GREEN_DOWN - tmp_coords_jac_13_GREEN_DOWN); + const real_t jac_affine_inv_0_2_GREEN_DOWN = tmp_coords_jac_16_GREEN_DOWN*(tmp_coords_jac_12_GREEN_DOWN - tmp_coords_jac_14_GREEN_DOWN); + const real_t jac_affine_inv_1_0_GREEN_DOWN = tmp_coords_jac_16_GREEN_DOWN*(-jac_affine_1_0_GREEN_DOWN*jac_affine_2_2_GREEN_DOWN + jac_affine_1_2_GREEN_DOWN*jac_affine_2_0_GREEN_DOWN); + const real_t jac_affine_inv_1_1_GREEN_DOWN = tmp_coords_jac_16_GREEN_DOWN*(jac_affine_0_0_GREEN_DOWN*jac_affine_2_2_GREEN_DOWN - jac_affine_0_2_GREEN_DOWN*jac_affine_2_0_GREEN_DOWN); + const real_t jac_affine_inv_1_2_GREEN_DOWN = tmp_coords_jac_16_GREEN_DOWN*(-jac_affine_0_0_GREEN_DOWN*jac_affine_1_2_GREEN_DOWN + jac_affine_0_2_GREEN_DOWN*jac_affine_1_0_GREEN_DOWN); + const real_t jac_affine_inv_2_0_GREEN_DOWN = tmp_coords_jac_16_GREEN_DOWN*(jac_affine_1_0_GREEN_DOWN*jac_affine_2_1_GREEN_DOWN - jac_affine_1_1_GREEN_DOWN*jac_affine_2_0_GREEN_DOWN); + const real_t jac_affine_inv_2_1_GREEN_DOWN = tmp_coords_jac_16_GREEN_DOWN*(-jac_affine_0_0_GREEN_DOWN*jac_affine_2_1_GREEN_DOWN + jac_affine_0_1_GREEN_DOWN*jac_affine_2_0_GREEN_DOWN); + const real_t jac_affine_inv_2_2_GREEN_DOWN = tmp_coords_jac_16_GREEN_DOWN*(jac_affine_0_0_GREEN_DOWN*jac_affine_1_1_GREEN_DOWN - jac_affine_0_1_GREEN_DOWN*jac_affine_1_0_GREEN_DOWN); + const real_t abs_det_jac_affine_GREEN_DOWN = abs(tmp_coords_jac_15_GREEN_DOWN); { /* CellType.GREEN_DOWN */ - const real_t tmp_coords_jac_0_GREEN_DOWN = 1.0 / (micro_edges_per_macro_edge_float)*1.0; - const real_t tmp_coords_jac_1_GREEN_DOWN = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0); - const real_t tmp_coords_jac_2_GREEN_DOWN = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1); - const real_t tmp_coords_jac_3_GREEN_DOWN = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2); - const real_t tmp_coords_jac_4_GREEN_DOWN = tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0); - const real_t tmp_coords_jac_5_GREEN_DOWN = tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1); - const real_t tmp_coords_jac_6_GREEN_DOWN = tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2); - const real_t tmp_coords_jac_7_GREEN_DOWN = tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0); - const real_t tmp_coords_jac_8_GREEN_DOWN = tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1); - const real_t tmp_coords_jac_9_GREEN_DOWN = tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2); - const real_t p_affine_const_0_0_GREEN_DOWN = tmp_coords_jac_1_GREEN_DOWN; - const real_t p_affine_const_0_1_GREEN_DOWN = tmp_coords_jac_2_GREEN_DOWN; - const real_t p_affine_const_0_2_GREEN_DOWN = tmp_coords_jac_3_GREEN_DOWN; - const real_t p_affine_const_1_0_GREEN_DOWN = tmp_coords_jac_1_GREEN_DOWN + tmp_coords_jac_4_GREEN_DOWN; - const real_t p_affine_const_1_1_GREEN_DOWN = tmp_coords_jac_2_GREEN_DOWN + tmp_coords_jac_5_GREEN_DOWN; - const real_t p_affine_const_1_2_GREEN_DOWN = tmp_coords_jac_3_GREEN_DOWN + tmp_coords_jac_6_GREEN_DOWN; - const real_t p_affine_const_2_0_GREEN_DOWN = macro_vertex_coord_id_0comp0 + tmp_coords_jac_4_GREEN_DOWN + tmp_coords_jac_7_GREEN_DOWN; - const real_t p_affine_const_2_1_GREEN_DOWN = macro_vertex_coord_id_0comp1 + tmp_coords_jac_5_GREEN_DOWN + tmp_coords_jac_8_GREEN_DOWN; - const real_t p_affine_const_2_2_GREEN_DOWN = macro_vertex_coord_id_0comp2 + tmp_coords_jac_6_GREEN_DOWN + tmp_coords_jac_9_GREEN_DOWN; - const real_t p_affine_const_3_0_GREEN_DOWN = tmp_coords_jac_1_GREEN_DOWN + tmp_coords_jac_7_GREEN_DOWN; - const real_t p_affine_const_3_1_GREEN_DOWN = tmp_coords_jac_2_GREEN_DOWN + tmp_coords_jac_8_GREEN_DOWN; - const real_t p_affine_const_3_2_GREEN_DOWN = tmp_coords_jac_3_GREEN_DOWN + tmp_coords_jac_9_GREEN_DOWN; - const real_t jac_affine_0_0_GREEN_DOWN = -p_affine_const_0_0_GREEN_DOWN + p_affine_const_1_0_GREEN_DOWN; - const real_t jac_affine_0_1_GREEN_DOWN = -p_affine_const_0_0_GREEN_DOWN + p_affine_const_2_0_GREEN_DOWN; - const real_t jac_affine_0_2_GREEN_DOWN = -p_affine_const_0_0_GREEN_DOWN + p_affine_const_3_0_GREEN_DOWN; - const real_t jac_affine_1_0_GREEN_DOWN = -p_affine_const_0_1_GREEN_DOWN + p_affine_const_1_1_GREEN_DOWN; - const real_t jac_affine_1_1_GREEN_DOWN = -p_affine_const_0_1_GREEN_DOWN + p_affine_const_2_1_GREEN_DOWN; - const real_t tmp_coords_jac_14_GREEN_DOWN = jac_affine_0_2_GREEN_DOWN*jac_affine_1_1_GREEN_DOWN; - const real_t jac_affine_1_2_GREEN_DOWN = -p_affine_const_0_1_GREEN_DOWN + p_affine_const_3_1_GREEN_DOWN; - const real_t tmp_coords_jac_12_GREEN_DOWN = jac_affine_0_1_GREEN_DOWN*jac_affine_1_2_GREEN_DOWN; - const real_t jac_affine_2_0_GREEN_DOWN = -p_affine_const_0_2_GREEN_DOWN + p_affine_const_1_2_GREEN_DOWN; - const real_t jac_affine_2_1_GREEN_DOWN = -p_affine_const_0_2_GREEN_DOWN + p_affine_const_2_2_GREEN_DOWN; - const real_t tmp_coords_jac_11_GREEN_DOWN = jac_affine_1_2_GREEN_DOWN*jac_affine_2_1_GREEN_DOWN; - const real_t jac_affine_2_2_GREEN_DOWN = -p_affine_const_0_2_GREEN_DOWN + p_affine_const_3_2_GREEN_DOWN; - const real_t tmp_coords_jac_10_GREEN_DOWN = jac_affine_1_1_GREEN_DOWN*jac_affine_2_2_GREEN_DOWN; - const real_t tmp_coords_jac_13_GREEN_DOWN = jac_affine_0_1_GREEN_DOWN*jac_affine_2_2_GREEN_DOWN; - const real_t tmp_coords_jac_15_GREEN_DOWN = 1.0 / (jac_affine_0_0_GREEN_DOWN*tmp_coords_jac_10_GREEN_DOWN - jac_affine_0_0_GREEN_DOWN*tmp_coords_jac_11_GREEN_DOWN + jac_affine_0_2_GREEN_DOWN*jac_affine_1_0_GREEN_DOWN*jac_affine_2_1_GREEN_DOWN - jac_affine_1_0_GREEN_DOWN*tmp_coords_jac_13_GREEN_DOWN + jac_affine_2_0_GREEN_DOWN*tmp_coords_jac_12_GREEN_DOWN - jac_affine_2_0_GREEN_DOWN*tmp_coords_jac_14_GREEN_DOWN); - const real_t jac_affine_inv_0_0_GREEN_DOWN = tmp_coords_jac_15_GREEN_DOWN*(tmp_coords_jac_10_GREEN_DOWN - tmp_coords_jac_11_GREEN_DOWN); - const real_t jac_affine_inv_0_1_GREEN_DOWN = tmp_coords_jac_15_GREEN_DOWN*(jac_affine_0_2_GREEN_DOWN*jac_affine_2_1_GREEN_DOWN - tmp_coords_jac_13_GREEN_DOWN); - const real_t jac_affine_inv_0_2_GREEN_DOWN = tmp_coords_jac_15_GREEN_DOWN*(tmp_coords_jac_12_GREEN_DOWN - tmp_coords_jac_14_GREEN_DOWN); - const real_t jac_affine_inv_1_0_GREEN_DOWN = tmp_coords_jac_15_GREEN_DOWN*(-jac_affine_1_0_GREEN_DOWN*jac_affine_2_2_GREEN_DOWN + jac_affine_1_2_GREEN_DOWN*jac_affine_2_0_GREEN_DOWN); - const real_t jac_affine_inv_1_1_GREEN_DOWN = tmp_coords_jac_15_GREEN_DOWN*(jac_affine_0_0_GREEN_DOWN*jac_affine_2_2_GREEN_DOWN - jac_affine_0_2_GREEN_DOWN*jac_affine_2_0_GREEN_DOWN); - const real_t jac_affine_inv_1_2_GREEN_DOWN = tmp_coords_jac_15_GREEN_DOWN*(-jac_affine_0_0_GREEN_DOWN*jac_affine_1_2_GREEN_DOWN + jac_affine_0_2_GREEN_DOWN*jac_affine_1_0_GREEN_DOWN); - const real_t jac_affine_inv_2_0_GREEN_DOWN = tmp_coords_jac_15_GREEN_DOWN*(jac_affine_1_0_GREEN_DOWN*jac_affine_2_1_GREEN_DOWN - jac_affine_1_1_GREEN_DOWN*jac_affine_2_0_GREEN_DOWN); - const real_t jac_affine_inv_2_1_GREEN_DOWN = tmp_coords_jac_15_GREEN_DOWN*(-jac_affine_0_0_GREEN_DOWN*jac_affine_2_1_GREEN_DOWN + jac_affine_0_1_GREEN_DOWN*jac_affine_2_0_GREEN_DOWN); - const real_t jac_affine_inv_2_2_GREEN_DOWN = tmp_coords_jac_15_GREEN_DOWN*(jac_affine_0_0_GREEN_DOWN*jac_affine_1_1_GREEN_DOWN - jac_affine_0_1_GREEN_DOWN*jac_affine_1_0_GREEN_DOWN); for (int64_t ctr_2 = 0; ctr_2 < micro_edges_per_macro_edge; ctr_2 += 1) for (int64_t ctr_1 = 0; ctr_1 < -ctr_2 + micro_edges_per_macro_edge; ctr_1 += 1) for (int64_t ctr_0 = 0; ctr_0 < -ctr_1 - ctr_2 + micro_edges_per_macro_edge - 1; ctr_0 += 1) { - const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2)); - const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2)); - const real_t p_affine_0_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2)); - const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2)); - const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2)); - const real_t p_affine_1_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2)); - const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2 + 1)); - const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2 + 1)); - const real_t p_affine_2_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2 + 1)); - const real_t p_affine_3_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2 + 1)); - const real_t p_affine_3_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2 + 1)); - const real_t p_affine_3_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2 + 1)); + + const int64_t phantom_ctr_0 = ctr_0; + real_t _data_float_loop_ctr_array_dim_0[4]; + _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0; + _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1; + _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2; + _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3; + real_t _data_float_loop_ctr_array_dim_1[4]; + _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1; + real_t _data_float_loop_ctr_array_dim_2[4]; + _data_float_loop_ctr_array_dim_2[0] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[1] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[2] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[3] = (real_t) ctr_2; + + const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_0_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_2_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_3_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_3_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_3_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; const real_t src_dof_0 = _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]; const real_t src_dof_1 = _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]; const real_t src_dof_2 = _data_src_vertex_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]; @@ -3720,28 +3728,8 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap::apply_macro_3D( real_t q_acc_3_29 = 0.0; for (int64_t q = 0; q < 5; q += 1) { - const real_t tmp_qloop_0 = rayVertex_1 - refVertex_1; - const real_t tmp_qloop_1 = -rayVertex_0; - const real_t tmp_qloop_2 = -forVertex_0 - tmp_qloop_1; - const real_t tmp_qloop_3 = rayVertex_2 - thrVertex_2; - const real_t tmp_qloop_4 = tmp_qloop_2*tmp_qloop_3; - const real_t tmp_qloop_5 = rayVertex_2 - refVertex_2; - const real_t tmp_qloop_6 = rayVertex_1 - thrVertex_1; - const real_t tmp_qloop_7 = tmp_qloop_2*tmp_qloop_6; - const real_t tmp_qloop_8 = -rayVertex_1; - const real_t tmp_qloop_9 = -forVertex_1 - tmp_qloop_8; - const real_t tmp_qloop_10 = rayVertex_0 - thrVertex_0; - const real_t tmp_qloop_11 = rayVertex_0 - refVertex_0; - const real_t tmp_qloop_12 = -rayVertex_2; - const real_t tmp_qloop_13 = -forVertex_2 - tmp_qloop_12; - const real_t tmp_qloop_14 = tmp_qloop_3*tmp_qloop_9; - const real_t tmp_qloop_15 = tmp_qloop_10*tmp_qloop_13; - const real_t tmp_qloop_16 = -tmp_qloop_0*tmp_qloop_15 + tmp_qloop_0*tmp_qloop_4 + tmp_qloop_10*tmp_qloop_5*tmp_qloop_9 + tmp_qloop_11*tmp_qloop_13*tmp_qloop_6 - tmp_qloop_11*tmp_qloop_14 - tmp_qloop_5*tmp_qloop_7; - const real_t tmp_qloop_17 = 1.0 / (tmp_qloop_16); const real_t tmp_qloop_18 = p_affine_0_0 + (-p_affine_0_0 + p_affine_1_0)*_data_q_p_0[q] + (-p_affine_0_0 + p_affine_2_0)*_data_q_p_1[q] + (-p_affine_0_0 + p_affine_3_0)*_data_q_p_2[q]; const real_t tmp_qloop_19 = (tmp_qloop_18*tmp_qloop_18); - const real_t tmp_qloop_20 = radRayVertex*tmp_qloop_16; - const real_t tmp_qloop_21 = radRayVertex - radRefVertex; const real_t tmp_qloop_22 = p_affine_0_2 + (-p_affine_0_2 + p_affine_1_2)*_data_q_p_0[q] + (-p_affine_0_2 + p_affine_2_2)*_data_q_p_1[q] + (-p_affine_0_2 + p_affine_3_2)*_data_q_p_2[q]; const real_t tmp_qloop_23 = -tmp_qloop_12 - tmp_qloop_22; const real_t tmp_qloop_24 = -tmp_qloop_1 - tmp_qloop_18; @@ -3752,28 +3740,21 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap::apply_macro_3D( const real_t tmp_qloop_29 = (tmp_qloop_25*tmp_qloop_25); const real_t tmp_qloop_30 = (tmp_qloop_22*tmp_qloop_22); const real_t tmp_qloop_31 = tmp_qloop_19 + tmp_qloop_29 + tmp_qloop_30; - const real_t tmp_qloop_32 = -tmp_qloop_13*tmp_qloop_6 + tmp_qloop_14; - const real_t tmp_qloop_33 = -tmp_qloop_21*tmp_qloop_32; const real_t tmp_qloop_34 = tmp_qloop_18*tmp_qloop_33; const real_t tmp_qloop_35 = tmp_qloop_28 + tmp_qloop_34; const real_t tmp_qloop_36 = tmp_qloop_17*(-tmp_qloop_19*tmp_qloop_28 + tmp_qloop_31*tmp_qloop_35); const real_t tmp_qloop_37 = pow(tmp_qloop_31, -1.5000000000000000); const real_t tmp_qloop_38 = tmp_qloop_37*1.0; - const real_t tmp_qloop_39 = tmp_qloop_10*tmp_qloop_13 - tmp_qloop_4; - const real_t tmp_qloop_40 = tmp_qloop_21*tmp_qloop_39; const real_t tmp_qloop_41 = tmp_qloop_31*tmp_qloop_40; const real_t tmp_qloop_42 = -tmp_qloop_25*tmp_qloop_28 - tmp_qloop_41; const real_t tmp_qloop_43 = tmp_qloop_17*tmp_qloop_38; const real_t tmp_qloop_44 = tmp_qloop_42*tmp_qloop_43; - const real_t tmp_qloop_45 = -tmp_qloop_10*tmp_qloop_9 + tmp_qloop_7; - const real_t tmp_qloop_46 = -tmp_qloop_21*tmp_qloop_45; const real_t tmp_qloop_47 = -tmp_qloop_28; const real_t tmp_qloop_48 = tmp_qloop_22*tmp_qloop_47; const real_t tmp_qloop_49 = tmp_qloop_31*tmp_qloop_46 + tmp_qloop_48; const real_t tmp_qloop_50 = tmp_qloop_43*tmp_qloop_49; const real_t tmp_qloop_51 = tmp_qloop_18*tmp_qloop_47 + tmp_qloop_31*tmp_qloop_33; const real_t tmp_qloop_52 = tmp_qloop_43*tmp_qloop_51; - const real_t tmp_qloop_53 = -tmp_qloop_21*tmp_qloop_39; const real_t tmp_qloop_54 = tmp_qloop_25*tmp_qloop_53 + tmp_qloop_28; const real_t tmp_qloop_55 = tmp_qloop_29*tmp_qloop_47 + tmp_qloop_31*tmp_qloop_54; const real_t tmp_qloop_56 = tmp_qloop_22*tmp_qloop_46; @@ -3781,11 +3762,9 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap::apply_macro_3D( const real_t tmp_qloop_58 = tmp_qloop_30*tmp_qloop_47 + tmp_qloop_31*tmp_qloop_57; const real_t tmp_qloop_66 = pow(tmp_qloop_31, -2.5000000000000000)*3.0; const real_t tmp_qloop_67 = tmp_qloop_36*tmp_qloop_66; - const real_t tmp_qloop_68 = tmp_qloop_21*tmp_qloop_32; const real_t tmp_qloop_69 = tmp_qloop_18*2.0; const real_t tmp_qloop_70 = -tmp_qloop_41; const real_t tmp_qloop_71 = tmp_qloop_35*2.0; - const real_t tmp_qloop_72 = tmp_qloop_21*tmp_qloop_45; const real_t tmp_qloop_73 = -tmp_qloop_31*tmp_qloop_72; const real_t tmp_qloop_74 = -tmp_qloop_20 + tmp_qloop_27; const real_t tmp_qloop_75 = tmp_qloop_18*tmp_qloop_68 + tmp_qloop_34*2.0 + tmp_qloop_74; @@ -3847,14 +3826,7 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap::apply_macro_3D( const real_t tmp_qloop_132 = tmp_qloop_109 - tmp_qloop_110 - tmp_qloop_112 - tmp_qloop_131; const real_t tmp_qloop_133 = tmp_qloop_103*4.0; const real_t tmp_qloop_134 = tmp_qloop_111 - tmp_qloop_114 - tmp_qloop_133; - const real_t tmp_qloop_135 = 1.0 / (rho_dof_0*tmp_qloop_115 + rho_dof_1*tmp_qloop_127 + rho_dof_2*tmp_qloop_128 + rho_dof_3*tmp_qloop_129 + rho_dof_4*tmp_qloop_110 + rho_dof_5*tmp_qloop_113 + rho_dof_6*tmp_qloop_112 + rho_dof_7*tmp_qloop_130 + rho_dof_8*tmp_qloop_132 + rho_dof_9*tmp_qloop_134)*_data_q_w[q]; - const real_t tmp_qloop_136 = tmp_qloop_135*(1.0 - _data_q_p_0[q] - _data_q_p_1[q] - _data_q_p_2[q]); - const real_t tmp_qloop_142 = tmp_qloop_135*_data_q_p_0[q]; - const real_t tmp_qloop_144 = tmp_qloop_135*_data_q_p_2[q]; - const real_t tmp_qloop_147 = tmp_qloop_135*_data_q_p_1[q]; const real_t tmp_qloop_158 = tmp_qloop_105*tmp_qloop_111; - const real_t tmp_qloop_159 = tmp_qloop_135*tmp_qloop_158; - const real_t tmp_qloop_161 = tmp_qloop_107*tmp_qloop_135; const real_t jac_blending_0_0 = tmp_qloop_36*tmp_qloop_38; const real_t jac_blending_0_1 = tmp_qloop_18*tmp_qloop_44; const real_t jac_blending_0_2 = tmp_qloop_18*tmp_qloop_50; @@ -3872,6 +3844,13 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap::apply_macro_3D( const real_t tmp_qloop_64 = jac_blending_0_0*tmp_qloop_59 - jac_blending_0_0*tmp_qloop_61 + jac_blending_0_2*jac_blending_1_0*jac_blending_2_1 - jac_blending_1_0*tmp_qloop_62 + jac_blending_2_0*tmp_qloop_60 - jac_blending_2_0*tmp_qloop_63; const real_t tmp_qloop_65 = 1.0 / (tmp_qloop_64); const real_t abs_det_jac_blending = tmp_qloop_64; + const real_t tmp_qloop_135 = abs_det_jac_affine_GREEN_DOWN*abs_det_jac_blending*1.0 / (rho_dof_0*tmp_qloop_115 + rho_dof_1*tmp_qloop_127 + rho_dof_2*tmp_qloop_128 + rho_dof_3*tmp_qloop_129 + rho_dof_4*tmp_qloop_110 + rho_dof_5*tmp_qloop_113 + rho_dof_6*tmp_qloop_112 + rho_dof_7*tmp_qloop_130 + rho_dof_8*tmp_qloop_132 + rho_dof_9*tmp_qloop_134)*_data_q_w[q]; + const real_t tmp_qloop_136 = tmp_qloop_135*(1.0 - _data_q_p_0[q] - _data_q_p_1[q] - _data_q_p_2[q]); + const real_t tmp_qloop_142 = tmp_qloop_135*_data_q_p_0[q]; + const real_t tmp_qloop_144 = tmp_qloop_135*_data_q_p_2[q]; + const real_t tmp_qloop_147 = tmp_qloop_135*_data_q_p_1[q]; + const real_t tmp_qloop_159 = tmp_qloop_135*tmp_qloop_158; + const real_t tmp_qloop_161 = tmp_qloop_107*tmp_qloop_135; const real_t jac_blending_inv_0_0 = tmp_qloop_65*(tmp_qloop_59 - tmp_qloop_61); const real_t jac_blending_inv_0_1 = tmp_qloop_65*(jac_blending_0_2*jac_blending_2_1 - tmp_qloop_62); const real_t jac_blending_inv_0_2 = tmp_qloop_65*(tmp_qloop_60 - tmp_qloop_63); diff --git a/operators/grad_rho_by_rho_dot_u/noarch/P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap_toMatrix_macro_3D.cpp b/operators/grad_rho_by_rho_dot_u/noarch/P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap_toMatrix_P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap_macro_3D.cpp similarity index 84% rename from operators/grad_rho_by_rho_dot_u/noarch/P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap_toMatrix_macro_3D.cpp rename to operators/grad_rho_by_rho_dot_u/noarch/P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap_toMatrix_P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap_macro_3D.cpp index 05c564b4..06750a93 100644 --- a/operators/grad_rho_by_rho_dot_u/noarch/P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap_toMatrix_macro_3D.cpp +++ b/operators/grad_rho_by_rho_dot_u/noarch/P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap_toMatrix_P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap_macro_3D.cpp @@ -47,6 +47,10 @@ + + + + @@ -60,7 +64,7 @@ namespace hyteg { namespace operatorgeneration { -void P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap::toMatrix_macro_3D( idx_t * RESTRICT _data_dst, real_t * RESTRICT _data_rhoEdge, real_t * RESTRICT _data_rhoVertex, idx_t * RESTRICT _data_src_edge_0, idx_t * RESTRICT _data_src_edge_1, idx_t * RESTRICT _data_src_edge_2, idx_t * RESTRICT _data_src_vertex_0, idx_t * RESTRICT _data_src_vertex_1, idx_t * RESTRICT _data_src_vertex_2, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const +void P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap::toMatrix_P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap_macro_3D( idx_t * RESTRICT _data_dst, real_t * RESTRICT _data_rhoEdge, real_t * RESTRICT _data_rhoVertex, idx_t * RESTRICT _data_src_edge_0, idx_t * RESTRICT _data_src_edge_1, idx_t * RESTRICT _data_src_edge_2, idx_t * RESTRICT _data_src_vertex_0, idx_t * RESTRICT _data_src_vertex_1, idx_t * RESTRICT _data_src_vertex_2, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const { { const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983}; @@ -71,61 +75,110 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap::toMatrix_macro_ const real_t _data_q_p_2 [] = {0.25, 0.5, 0.16666666666666666, 0.16666666666666666, 0.16666666666666666}; + const real_t tmp_coords_jac_0_WHITE_UP = 1.0 / (micro_edges_per_macro_edge_float)*1.0; + const real_t p_affine_const_0_0_WHITE_UP = macro_vertex_coord_id_0comp0; + const real_t p_affine_const_0_1_WHITE_UP = macro_vertex_coord_id_0comp1; + const real_t p_affine_const_0_2_WHITE_UP = macro_vertex_coord_id_0comp2; + const real_t p_affine_const_1_0_WHITE_UP = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0); + const real_t p_affine_const_1_1_WHITE_UP = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1); + const real_t p_affine_const_1_2_WHITE_UP = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2); + const real_t p_affine_const_2_0_WHITE_UP = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0); + const real_t p_affine_const_2_1_WHITE_UP = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1); + const real_t p_affine_const_2_2_WHITE_UP = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2); + const real_t p_affine_const_3_0_WHITE_UP = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0); + const real_t p_affine_const_3_1_WHITE_UP = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1); + const real_t p_affine_const_3_2_WHITE_UP = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2); + const real_t jac_affine_0_0_WHITE_UP = -p_affine_const_0_0_WHITE_UP + p_affine_const_1_0_WHITE_UP; + const real_t jac_affine_0_1_WHITE_UP = -p_affine_const_0_0_WHITE_UP + p_affine_const_2_0_WHITE_UP; + const real_t jac_affine_0_2_WHITE_UP = -p_affine_const_0_0_WHITE_UP + p_affine_const_3_0_WHITE_UP; + const real_t jac_affine_1_0_WHITE_UP = -p_affine_const_0_1_WHITE_UP + p_affine_const_1_1_WHITE_UP; + const real_t jac_affine_1_1_WHITE_UP = -p_affine_const_0_1_WHITE_UP + p_affine_const_2_1_WHITE_UP; + const real_t tmp_coords_jac_5_WHITE_UP = jac_affine_0_2_WHITE_UP*jac_affine_1_1_WHITE_UP; + const real_t jac_affine_1_2_WHITE_UP = -p_affine_const_0_1_WHITE_UP + p_affine_const_3_1_WHITE_UP; + const real_t tmp_coords_jac_3_WHITE_UP = jac_affine_0_1_WHITE_UP*jac_affine_1_2_WHITE_UP; + const real_t jac_affine_2_0_WHITE_UP = -p_affine_const_0_2_WHITE_UP + p_affine_const_1_2_WHITE_UP; + const real_t jac_affine_2_1_WHITE_UP = -p_affine_const_0_2_WHITE_UP + p_affine_const_2_2_WHITE_UP; + const real_t tmp_coords_jac_2_WHITE_UP = jac_affine_1_2_WHITE_UP*jac_affine_2_1_WHITE_UP; + const real_t jac_affine_2_2_WHITE_UP = -p_affine_const_0_2_WHITE_UP + p_affine_const_3_2_WHITE_UP; + const real_t tmp_coords_jac_1_WHITE_UP = jac_affine_1_1_WHITE_UP*jac_affine_2_2_WHITE_UP; + const real_t tmp_coords_jac_4_WHITE_UP = jac_affine_0_1_WHITE_UP*jac_affine_2_2_WHITE_UP; + const real_t tmp_coords_jac_6_WHITE_UP = jac_affine_0_0_WHITE_UP*tmp_coords_jac_1_WHITE_UP - jac_affine_0_0_WHITE_UP*tmp_coords_jac_2_WHITE_UP + jac_affine_0_2_WHITE_UP*jac_affine_1_0_WHITE_UP*jac_affine_2_1_WHITE_UP - jac_affine_1_0_WHITE_UP*tmp_coords_jac_4_WHITE_UP + jac_affine_2_0_WHITE_UP*tmp_coords_jac_3_WHITE_UP - jac_affine_2_0_WHITE_UP*tmp_coords_jac_5_WHITE_UP; + const real_t tmp_coords_jac_7_WHITE_UP = 1.0 / (tmp_coords_jac_6_WHITE_UP); + const real_t jac_affine_inv_0_0_WHITE_UP = tmp_coords_jac_7_WHITE_UP*(tmp_coords_jac_1_WHITE_UP - tmp_coords_jac_2_WHITE_UP); + const real_t jac_affine_inv_0_1_WHITE_UP = tmp_coords_jac_7_WHITE_UP*(jac_affine_0_2_WHITE_UP*jac_affine_2_1_WHITE_UP - tmp_coords_jac_4_WHITE_UP); + const real_t jac_affine_inv_0_2_WHITE_UP = tmp_coords_jac_7_WHITE_UP*(tmp_coords_jac_3_WHITE_UP - tmp_coords_jac_5_WHITE_UP); + const real_t jac_affine_inv_1_0_WHITE_UP = tmp_coords_jac_7_WHITE_UP*(-jac_affine_1_0_WHITE_UP*jac_affine_2_2_WHITE_UP + jac_affine_1_2_WHITE_UP*jac_affine_2_0_WHITE_UP); + const real_t jac_affine_inv_1_1_WHITE_UP = tmp_coords_jac_7_WHITE_UP*(jac_affine_0_0_WHITE_UP*jac_affine_2_2_WHITE_UP - jac_affine_0_2_WHITE_UP*jac_affine_2_0_WHITE_UP); + const real_t jac_affine_inv_1_2_WHITE_UP = tmp_coords_jac_7_WHITE_UP*(-jac_affine_0_0_WHITE_UP*jac_affine_1_2_WHITE_UP + jac_affine_0_2_WHITE_UP*jac_affine_1_0_WHITE_UP); + const real_t jac_affine_inv_2_0_WHITE_UP = tmp_coords_jac_7_WHITE_UP*(jac_affine_1_0_WHITE_UP*jac_affine_2_1_WHITE_UP - jac_affine_1_1_WHITE_UP*jac_affine_2_0_WHITE_UP); + const real_t jac_affine_inv_2_1_WHITE_UP = tmp_coords_jac_7_WHITE_UP*(-jac_affine_0_0_WHITE_UP*jac_affine_2_1_WHITE_UP + jac_affine_0_1_WHITE_UP*jac_affine_2_0_WHITE_UP); + const real_t jac_affine_inv_2_2_WHITE_UP = tmp_coords_jac_7_WHITE_UP*(jac_affine_0_0_WHITE_UP*jac_affine_1_1_WHITE_UP - jac_affine_0_1_WHITE_UP*jac_affine_1_0_WHITE_UP); + const real_t abs_det_jac_affine_WHITE_UP = abs(tmp_coords_jac_6_WHITE_UP); + const real_t tmp_qloop_0 = rayVertex_1 - refVertex_1; + const real_t tmp_qloop_1 = -rayVertex_0; + const real_t tmp_qloop_2 = -forVertex_0 - tmp_qloop_1; + const real_t tmp_qloop_3 = rayVertex_2 - thrVertex_2; + const real_t tmp_qloop_4 = tmp_qloop_2*tmp_qloop_3; + const real_t tmp_qloop_5 = rayVertex_2 - refVertex_2; + const real_t tmp_qloop_6 = rayVertex_1 - thrVertex_1; + const real_t tmp_qloop_7 = tmp_qloop_2*tmp_qloop_6; + const real_t tmp_qloop_8 = -rayVertex_1; + const real_t tmp_qloop_9 = -forVertex_1 - tmp_qloop_8; + const real_t tmp_qloop_10 = rayVertex_0 - thrVertex_0; + const real_t tmp_qloop_11 = rayVertex_0 - refVertex_0; + const real_t tmp_qloop_12 = -rayVertex_2; + const real_t tmp_qloop_13 = -forVertex_2 - tmp_qloop_12; + const real_t tmp_qloop_14 = tmp_qloop_3*tmp_qloop_9; + const real_t tmp_qloop_15 = tmp_qloop_10*tmp_qloop_13; + const real_t tmp_qloop_16 = -tmp_qloop_0*tmp_qloop_15 + tmp_qloop_0*tmp_qloop_4 + tmp_qloop_10*tmp_qloop_5*tmp_qloop_9 + tmp_qloop_11*tmp_qloop_13*tmp_qloop_6 - tmp_qloop_11*tmp_qloop_14 - tmp_qloop_5*tmp_qloop_7; + const real_t tmp_qloop_17 = 1.0 / (tmp_qloop_16); + const real_t tmp_qloop_20 = radRayVertex*tmp_qloop_16; + const real_t tmp_qloop_21 = radRayVertex - radRefVertex; + const real_t tmp_qloop_32 = -tmp_qloop_13*tmp_qloop_6 + tmp_qloop_14; + const real_t tmp_qloop_33 = -tmp_qloop_21*tmp_qloop_32; + const real_t tmp_qloop_39 = tmp_qloop_10*tmp_qloop_13 - tmp_qloop_4; + const real_t tmp_qloop_40 = tmp_qloop_21*tmp_qloop_39; + const real_t tmp_qloop_45 = -tmp_qloop_10*tmp_qloop_9 + tmp_qloop_7; + const real_t tmp_qloop_46 = -tmp_qloop_21*tmp_qloop_45; + const real_t tmp_qloop_53 = -tmp_qloop_21*tmp_qloop_39; + const real_t tmp_qloop_68 = tmp_qloop_21*tmp_qloop_32; + const real_t tmp_qloop_72 = tmp_qloop_21*tmp_qloop_45; { /* CellType.WHITE_UP */ - const real_t tmp_coords_jac_0_WHITE_UP = 1.0 / (micro_edges_per_macro_edge_float)*1.0; - const real_t p_affine_const_0_0_WHITE_UP = macro_vertex_coord_id_0comp0; - const real_t p_affine_const_0_1_WHITE_UP = macro_vertex_coord_id_0comp1; - const real_t p_affine_const_0_2_WHITE_UP = macro_vertex_coord_id_0comp2; - const real_t p_affine_const_1_0_WHITE_UP = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0); - const real_t p_affine_const_1_1_WHITE_UP = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1); - const real_t p_affine_const_1_2_WHITE_UP = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2); - const real_t p_affine_const_2_0_WHITE_UP = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0); - const real_t p_affine_const_2_1_WHITE_UP = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1); - const real_t p_affine_const_2_2_WHITE_UP = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2); - const real_t p_affine_const_3_0_WHITE_UP = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0); - const real_t p_affine_const_3_1_WHITE_UP = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1); - const real_t p_affine_const_3_2_WHITE_UP = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2); - const real_t jac_affine_0_0_WHITE_UP = -p_affine_const_0_0_WHITE_UP + p_affine_const_1_0_WHITE_UP; - const real_t jac_affine_0_1_WHITE_UP = -p_affine_const_0_0_WHITE_UP + p_affine_const_2_0_WHITE_UP; - const real_t jac_affine_0_2_WHITE_UP = -p_affine_const_0_0_WHITE_UP + p_affine_const_3_0_WHITE_UP; - const real_t jac_affine_1_0_WHITE_UP = -p_affine_const_0_1_WHITE_UP + p_affine_const_1_1_WHITE_UP; - const real_t jac_affine_1_1_WHITE_UP = -p_affine_const_0_1_WHITE_UP + p_affine_const_2_1_WHITE_UP; - const real_t tmp_coords_jac_5_WHITE_UP = jac_affine_0_2_WHITE_UP*jac_affine_1_1_WHITE_UP; - const real_t jac_affine_1_2_WHITE_UP = -p_affine_const_0_1_WHITE_UP + p_affine_const_3_1_WHITE_UP; - const real_t tmp_coords_jac_3_WHITE_UP = jac_affine_0_1_WHITE_UP*jac_affine_1_2_WHITE_UP; - const real_t jac_affine_2_0_WHITE_UP = -p_affine_const_0_2_WHITE_UP + p_affine_const_1_2_WHITE_UP; - const real_t jac_affine_2_1_WHITE_UP = -p_affine_const_0_2_WHITE_UP + p_affine_const_2_2_WHITE_UP; - const real_t tmp_coords_jac_2_WHITE_UP = jac_affine_1_2_WHITE_UP*jac_affine_2_1_WHITE_UP; - const real_t jac_affine_2_2_WHITE_UP = -p_affine_const_0_2_WHITE_UP + p_affine_const_3_2_WHITE_UP; - const real_t tmp_coords_jac_1_WHITE_UP = jac_affine_1_1_WHITE_UP*jac_affine_2_2_WHITE_UP; - const real_t tmp_coords_jac_4_WHITE_UP = jac_affine_0_1_WHITE_UP*jac_affine_2_2_WHITE_UP; - const real_t tmp_coords_jac_6_WHITE_UP = 1.0 / (jac_affine_0_0_WHITE_UP*tmp_coords_jac_1_WHITE_UP - jac_affine_0_0_WHITE_UP*tmp_coords_jac_2_WHITE_UP + jac_affine_0_2_WHITE_UP*jac_affine_1_0_WHITE_UP*jac_affine_2_1_WHITE_UP - jac_affine_1_0_WHITE_UP*tmp_coords_jac_4_WHITE_UP + jac_affine_2_0_WHITE_UP*tmp_coords_jac_3_WHITE_UP - jac_affine_2_0_WHITE_UP*tmp_coords_jac_5_WHITE_UP); - const real_t jac_affine_inv_0_0_WHITE_UP = tmp_coords_jac_6_WHITE_UP*(tmp_coords_jac_1_WHITE_UP - tmp_coords_jac_2_WHITE_UP); - const real_t jac_affine_inv_0_1_WHITE_UP = tmp_coords_jac_6_WHITE_UP*(jac_affine_0_2_WHITE_UP*jac_affine_2_1_WHITE_UP - tmp_coords_jac_4_WHITE_UP); - const real_t jac_affine_inv_0_2_WHITE_UP = tmp_coords_jac_6_WHITE_UP*(tmp_coords_jac_3_WHITE_UP - tmp_coords_jac_5_WHITE_UP); - const real_t jac_affine_inv_1_0_WHITE_UP = tmp_coords_jac_6_WHITE_UP*(-jac_affine_1_0_WHITE_UP*jac_affine_2_2_WHITE_UP + jac_affine_1_2_WHITE_UP*jac_affine_2_0_WHITE_UP); - const real_t jac_affine_inv_1_1_WHITE_UP = tmp_coords_jac_6_WHITE_UP*(jac_affine_0_0_WHITE_UP*jac_affine_2_2_WHITE_UP - jac_affine_0_2_WHITE_UP*jac_affine_2_0_WHITE_UP); - const real_t jac_affine_inv_1_2_WHITE_UP = tmp_coords_jac_6_WHITE_UP*(-jac_affine_0_0_WHITE_UP*jac_affine_1_2_WHITE_UP + jac_affine_0_2_WHITE_UP*jac_affine_1_0_WHITE_UP); - const real_t jac_affine_inv_2_0_WHITE_UP = tmp_coords_jac_6_WHITE_UP*(jac_affine_1_0_WHITE_UP*jac_affine_2_1_WHITE_UP - jac_affine_1_1_WHITE_UP*jac_affine_2_0_WHITE_UP); - const real_t jac_affine_inv_2_1_WHITE_UP = tmp_coords_jac_6_WHITE_UP*(-jac_affine_0_0_WHITE_UP*jac_affine_2_1_WHITE_UP + jac_affine_0_1_WHITE_UP*jac_affine_2_0_WHITE_UP); - const real_t jac_affine_inv_2_2_WHITE_UP = tmp_coords_jac_6_WHITE_UP*(jac_affine_0_0_WHITE_UP*jac_affine_1_1_WHITE_UP - jac_affine_0_1_WHITE_UP*jac_affine_1_0_WHITE_UP); for (int64_t ctr_2 = 0; ctr_2 < micro_edges_per_macro_edge; ctr_2 += 1) for (int64_t ctr_1 = 0; ctr_1 < -ctr_2 + micro_edges_per_macro_edge; ctr_1 += 1) for (int64_t ctr_0 = 0; ctr_0 < -ctr_1 - ctr_2 + micro_edges_per_macro_edge; ctr_0 += 1) { - const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2)); - const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2)); - const real_t p_affine_0_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2)); - const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2)); - const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2)); - const real_t p_affine_1_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2)); - const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2)); - const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2)); - const real_t p_affine_2_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2)); - const real_t p_affine_3_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2 + 1)); - const real_t p_affine_3_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2 + 1)); - const real_t p_affine_3_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2 + 1)); + + const int64_t phantom_ctr_0 = ctr_0; + real_t _data_float_loop_ctr_array_dim_0[4]; + _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0; + _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1; + _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2; + _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3; + real_t _data_float_loop_ctr_array_dim_1[4]; + _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1; + real_t _data_float_loop_ctr_array_dim_2[4]; + _data_float_loop_ctr_array_dim_2[0] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[1] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[2] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[3] = (real_t) ctr_2; + + const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_0_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_2_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_3_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_3_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_3_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; const real_t rho_dof_0 = _data_rhoVertex[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]; const real_t rho_dof_1 = _data_rhoVertex[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]; const real_t rho_dof_2 = _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]; @@ -258,28 +311,8 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap::toMatrix_macro_ real_t q_acc_3_29 = 0.0; for (int64_t q = 0; q < 5; q += 1) { - const real_t tmp_qloop_0 = rayVertex_1 - refVertex_1; - const real_t tmp_qloop_1 = -rayVertex_0; - const real_t tmp_qloop_2 = -forVertex_0 - tmp_qloop_1; - const real_t tmp_qloop_3 = rayVertex_2 - thrVertex_2; - const real_t tmp_qloop_4 = tmp_qloop_2*tmp_qloop_3; - const real_t tmp_qloop_5 = rayVertex_2 - refVertex_2; - const real_t tmp_qloop_6 = rayVertex_1 - thrVertex_1; - const real_t tmp_qloop_7 = tmp_qloop_2*tmp_qloop_6; - const real_t tmp_qloop_8 = -rayVertex_1; - const real_t tmp_qloop_9 = -forVertex_1 - tmp_qloop_8; - const real_t tmp_qloop_10 = rayVertex_0 - thrVertex_0; - const real_t tmp_qloop_11 = rayVertex_0 - refVertex_0; - const real_t tmp_qloop_12 = -rayVertex_2; - const real_t tmp_qloop_13 = -forVertex_2 - tmp_qloop_12; - const real_t tmp_qloop_14 = tmp_qloop_3*tmp_qloop_9; - const real_t tmp_qloop_15 = tmp_qloop_10*tmp_qloop_13; - const real_t tmp_qloop_16 = -tmp_qloop_0*tmp_qloop_15 + tmp_qloop_0*tmp_qloop_4 + tmp_qloop_10*tmp_qloop_5*tmp_qloop_9 + tmp_qloop_11*tmp_qloop_13*tmp_qloop_6 - tmp_qloop_11*tmp_qloop_14 - tmp_qloop_5*tmp_qloop_7; - const real_t tmp_qloop_17 = 1.0 / (tmp_qloop_16); const real_t tmp_qloop_18 = p_affine_0_0 + (-p_affine_0_0 + p_affine_1_0)*_data_q_p_0[q] + (-p_affine_0_0 + p_affine_2_0)*_data_q_p_1[q] + (-p_affine_0_0 + p_affine_3_0)*_data_q_p_2[q]; const real_t tmp_qloop_19 = (tmp_qloop_18*tmp_qloop_18); - const real_t tmp_qloop_20 = radRayVertex*tmp_qloop_16; - const real_t tmp_qloop_21 = radRayVertex - radRefVertex; const real_t tmp_qloop_22 = p_affine_0_2 + (-p_affine_0_2 + p_affine_1_2)*_data_q_p_0[q] + (-p_affine_0_2 + p_affine_2_2)*_data_q_p_1[q] + (-p_affine_0_2 + p_affine_3_2)*_data_q_p_2[q]; const real_t tmp_qloop_23 = -tmp_qloop_12 - tmp_qloop_22; const real_t tmp_qloop_24 = -tmp_qloop_1 - tmp_qloop_18; @@ -290,28 +323,21 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap::toMatrix_macro_ const real_t tmp_qloop_29 = (tmp_qloop_25*tmp_qloop_25); const real_t tmp_qloop_30 = (tmp_qloop_22*tmp_qloop_22); const real_t tmp_qloop_31 = tmp_qloop_19 + tmp_qloop_29 + tmp_qloop_30; - const real_t tmp_qloop_32 = -tmp_qloop_13*tmp_qloop_6 + tmp_qloop_14; - const real_t tmp_qloop_33 = -tmp_qloop_21*tmp_qloop_32; const real_t tmp_qloop_34 = tmp_qloop_18*tmp_qloop_33; const real_t tmp_qloop_35 = tmp_qloop_28 + tmp_qloop_34; const real_t tmp_qloop_36 = tmp_qloop_17*(-tmp_qloop_19*tmp_qloop_28 + tmp_qloop_31*tmp_qloop_35); const real_t tmp_qloop_37 = pow(tmp_qloop_31, -1.5000000000000000); const real_t tmp_qloop_38 = tmp_qloop_37*1.0; - const real_t tmp_qloop_39 = tmp_qloop_10*tmp_qloop_13 - tmp_qloop_4; - const real_t tmp_qloop_40 = tmp_qloop_21*tmp_qloop_39; const real_t tmp_qloop_41 = tmp_qloop_31*tmp_qloop_40; const real_t tmp_qloop_42 = -tmp_qloop_25*tmp_qloop_28 - tmp_qloop_41; const real_t tmp_qloop_43 = tmp_qloop_17*tmp_qloop_38; const real_t tmp_qloop_44 = tmp_qloop_42*tmp_qloop_43; - const real_t tmp_qloop_45 = -tmp_qloop_10*tmp_qloop_9 + tmp_qloop_7; - const real_t tmp_qloop_46 = -tmp_qloop_21*tmp_qloop_45; const real_t tmp_qloop_47 = -tmp_qloop_28; const real_t tmp_qloop_48 = tmp_qloop_22*tmp_qloop_47; const real_t tmp_qloop_49 = tmp_qloop_31*tmp_qloop_46 + tmp_qloop_48; const real_t tmp_qloop_50 = tmp_qloop_43*tmp_qloop_49; const real_t tmp_qloop_51 = tmp_qloop_18*tmp_qloop_47 + tmp_qloop_31*tmp_qloop_33; const real_t tmp_qloop_52 = tmp_qloop_43*tmp_qloop_51; - const real_t tmp_qloop_53 = -tmp_qloop_21*tmp_qloop_39; const real_t tmp_qloop_54 = tmp_qloop_25*tmp_qloop_53 + tmp_qloop_28; const real_t tmp_qloop_55 = tmp_qloop_29*tmp_qloop_47 + tmp_qloop_31*tmp_qloop_54; const real_t tmp_qloop_56 = tmp_qloop_22*tmp_qloop_46; @@ -319,11 +345,9 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap::toMatrix_macro_ const real_t tmp_qloop_58 = tmp_qloop_30*tmp_qloop_47 + tmp_qloop_31*tmp_qloop_57; const real_t tmp_qloop_66 = pow(tmp_qloop_31, -2.5000000000000000)*3.0; const real_t tmp_qloop_67 = tmp_qloop_36*tmp_qloop_66; - const real_t tmp_qloop_68 = tmp_qloop_21*tmp_qloop_32; const real_t tmp_qloop_69 = tmp_qloop_18*2.0; const real_t tmp_qloop_70 = -tmp_qloop_41; const real_t tmp_qloop_71 = tmp_qloop_35*2.0; - const real_t tmp_qloop_72 = tmp_qloop_21*tmp_qloop_45; const real_t tmp_qloop_73 = -tmp_qloop_31*tmp_qloop_72; const real_t tmp_qloop_74 = -tmp_qloop_20 + tmp_qloop_27; const real_t tmp_qloop_75 = tmp_qloop_18*tmp_qloop_68 + tmp_qloop_34*2.0 + tmp_qloop_74; @@ -385,14 +409,7 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap::toMatrix_macro_ const real_t tmp_qloop_132 = tmp_qloop_109 - tmp_qloop_110 - tmp_qloop_112 - tmp_qloop_131; const real_t tmp_qloop_133 = tmp_qloop_103*4.0; const real_t tmp_qloop_134 = tmp_qloop_111 - tmp_qloop_114 - tmp_qloop_133; - const real_t tmp_qloop_135 = 1.0 / (rho_dof_0*tmp_qloop_115 + rho_dof_1*tmp_qloop_127 + rho_dof_2*tmp_qloop_128 + rho_dof_3*tmp_qloop_129 + rho_dof_4*tmp_qloop_110 + rho_dof_5*tmp_qloop_113 + rho_dof_6*tmp_qloop_112 + rho_dof_7*tmp_qloop_130 + rho_dof_8*tmp_qloop_132 + rho_dof_9*tmp_qloop_134)*_data_q_w[q]; - const real_t tmp_qloop_136 = tmp_qloop_135*(1.0 - _data_q_p_0[q] - _data_q_p_1[q] - _data_q_p_2[q]); - const real_t tmp_qloop_142 = tmp_qloop_135*_data_q_p_0[q]; - const real_t tmp_qloop_144 = tmp_qloop_135*_data_q_p_2[q]; - const real_t tmp_qloop_147 = tmp_qloop_135*_data_q_p_1[q]; const real_t tmp_qloop_158 = tmp_qloop_105*tmp_qloop_111; - const real_t tmp_qloop_159 = tmp_qloop_135*tmp_qloop_158; - const real_t tmp_qloop_161 = tmp_qloop_107*tmp_qloop_135; const real_t jac_blending_0_0 = tmp_qloop_36*tmp_qloop_38; const real_t jac_blending_0_1 = tmp_qloop_18*tmp_qloop_44; const real_t jac_blending_0_2 = tmp_qloop_18*tmp_qloop_50; @@ -410,6 +427,13 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap::toMatrix_macro_ const real_t tmp_qloop_64 = jac_blending_0_0*tmp_qloop_59 - jac_blending_0_0*tmp_qloop_61 + jac_blending_0_2*jac_blending_1_0*jac_blending_2_1 - jac_blending_1_0*tmp_qloop_62 + jac_blending_2_0*tmp_qloop_60 - jac_blending_2_0*tmp_qloop_63; const real_t tmp_qloop_65 = 1.0 / (tmp_qloop_64); const real_t abs_det_jac_blending = tmp_qloop_64; + const real_t tmp_qloop_135 = abs_det_jac_affine_WHITE_UP*abs_det_jac_blending*1.0 / (rho_dof_0*tmp_qloop_115 + rho_dof_1*tmp_qloop_127 + rho_dof_2*tmp_qloop_128 + rho_dof_3*tmp_qloop_129 + rho_dof_4*tmp_qloop_110 + rho_dof_5*tmp_qloop_113 + rho_dof_6*tmp_qloop_112 + rho_dof_7*tmp_qloop_130 + rho_dof_8*tmp_qloop_132 + rho_dof_9*tmp_qloop_134)*_data_q_w[q]; + const real_t tmp_qloop_136 = tmp_qloop_135*(1.0 - _data_q_p_0[q] - _data_q_p_1[q] - _data_q_p_2[q]); + const real_t tmp_qloop_142 = tmp_qloop_135*_data_q_p_0[q]; + const real_t tmp_qloop_144 = tmp_qloop_135*_data_q_p_2[q]; + const real_t tmp_qloop_147 = tmp_qloop_135*_data_q_p_1[q]; + const real_t tmp_qloop_159 = tmp_qloop_135*tmp_qloop_158; + const real_t tmp_qloop_161 = tmp_qloop_107*tmp_qloop_135; const real_t jac_blending_inv_0_0 = tmp_qloop_65*(tmp_qloop_59 - tmp_qloop_61); const real_t jac_blending_inv_0_1 = tmp_qloop_65*(jac_blending_0_2*jac_blending_2_1 - tmp_qloop_62); const real_t jac_blending_inv_0_2 = tmp_qloop_65*(tmp_qloop_60 - tmp_qloop_63); @@ -998,73 +1022,93 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap::toMatrix_macro_ mat->addValues( _data_rowIdx, _data_colIdx, _data_mat ); } } + const real_t tmp_coords_jac_0_WHITE_DOWN = 1.0 / (micro_edges_per_macro_edge_float)*1.0; + const real_t tmp_coords_jac_1_WHITE_DOWN = tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0); + const real_t tmp_coords_jac_2_WHITE_DOWN = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0); + const real_t tmp_coords_jac_3_WHITE_DOWN = tmp_coords_jac_1_WHITE_DOWN + tmp_coords_jac_2_WHITE_DOWN; + const real_t tmp_coords_jac_4_WHITE_DOWN = tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1); + const real_t tmp_coords_jac_5_WHITE_DOWN = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1); + const real_t tmp_coords_jac_6_WHITE_DOWN = tmp_coords_jac_4_WHITE_DOWN + tmp_coords_jac_5_WHITE_DOWN; + const real_t tmp_coords_jac_7_WHITE_DOWN = tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2); + const real_t tmp_coords_jac_8_WHITE_DOWN = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2); + const real_t tmp_coords_jac_9_WHITE_DOWN = tmp_coords_jac_7_WHITE_DOWN + tmp_coords_jac_8_WHITE_DOWN; + const real_t tmp_coords_jac_10_WHITE_DOWN = tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0); + const real_t tmp_coords_jac_11_WHITE_DOWN = tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1); + const real_t tmp_coords_jac_12_WHITE_DOWN = tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2); + const real_t p_affine_const_0_0_WHITE_DOWN = tmp_coords_jac_3_WHITE_DOWN; + const real_t p_affine_const_0_1_WHITE_DOWN = tmp_coords_jac_6_WHITE_DOWN; + const real_t p_affine_const_0_2_WHITE_DOWN = tmp_coords_jac_9_WHITE_DOWN; + const real_t p_affine_const_1_0_WHITE_DOWN = tmp_coords_jac_10_WHITE_DOWN + tmp_coords_jac_2_WHITE_DOWN; + const real_t p_affine_const_1_1_WHITE_DOWN = tmp_coords_jac_11_WHITE_DOWN + tmp_coords_jac_5_WHITE_DOWN; + const real_t p_affine_const_1_2_WHITE_DOWN = tmp_coords_jac_12_WHITE_DOWN + tmp_coords_jac_8_WHITE_DOWN; + const real_t p_affine_const_2_0_WHITE_DOWN = macro_vertex_coord_id_0comp0 + tmp_coords_jac_10_WHITE_DOWN + tmp_coords_jac_1_WHITE_DOWN; + const real_t p_affine_const_2_1_WHITE_DOWN = macro_vertex_coord_id_0comp1 + tmp_coords_jac_11_WHITE_DOWN + tmp_coords_jac_4_WHITE_DOWN; + const real_t p_affine_const_2_2_WHITE_DOWN = macro_vertex_coord_id_0comp2 + tmp_coords_jac_12_WHITE_DOWN + tmp_coords_jac_7_WHITE_DOWN; + const real_t p_affine_const_3_0_WHITE_DOWN = tmp_coords_jac_10_WHITE_DOWN + tmp_coords_jac_3_WHITE_DOWN; + const real_t p_affine_const_3_1_WHITE_DOWN = tmp_coords_jac_11_WHITE_DOWN + tmp_coords_jac_6_WHITE_DOWN; + const real_t p_affine_const_3_2_WHITE_DOWN = tmp_coords_jac_12_WHITE_DOWN + tmp_coords_jac_9_WHITE_DOWN; + const real_t jac_affine_0_0_WHITE_DOWN = -p_affine_const_0_0_WHITE_DOWN + p_affine_const_1_0_WHITE_DOWN; + const real_t jac_affine_0_1_WHITE_DOWN = -p_affine_const_0_0_WHITE_DOWN + p_affine_const_2_0_WHITE_DOWN; + const real_t jac_affine_0_2_WHITE_DOWN = -p_affine_const_0_0_WHITE_DOWN + p_affine_const_3_0_WHITE_DOWN; + const real_t jac_affine_1_0_WHITE_DOWN = -p_affine_const_0_1_WHITE_DOWN + p_affine_const_1_1_WHITE_DOWN; + const real_t jac_affine_1_1_WHITE_DOWN = -p_affine_const_0_1_WHITE_DOWN + p_affine_const_2_1_WHITE_DOWN; + const real_t tmp_coords_jac_17_WHITE_DOWN = jac_affine_0_2_WHITE_DOWN*jac_affine_1_1_WHITE_DOWN; + const real_t jac_affine_1_2_WHITE_DOWN = -p_affine_const_0_1_WHITE_DOWN + p_affine_const_3_1_WHITE_DOWN; + const real_t tmp_coords_jac_15_WHITE_DOWN = jac_affine_0_1_WHITE_DOWN*jac_affine_1_2_WHITE_DOWN; + const real_t jac_affine_2_0_WHITE_DOWN = -p_affine_const_0_2_WHITE_DOWN + p_affine_const_1_2_WHITE_DOWN; + const real_t jac_affine_2_1_WHITE_DOWN = -p_affine_const_0_2_WHITE_DOWN + p_affine_const_2_2_WHITE_DOWN; + const real_t tmp_coords_jac_14_WHITE_DOWN = jac_affine_1_2_WHITE_DOWN*jac_affine_2_1_WHITE_DOWN; + const real_t jac_affine_2_2_WHITE_DOWN = -p_affine_const_0_2_WHITE_DOWN + p_affine_const_3_2_WHITE_DOWN; + const real_t tmp_coords_jac_13_WHITE_DOWN = jac_affine_1_1_WHITE_DOWN*jac_affine_2_2_WHITE_DOWN; + const real_t tmp_coords_jac_16_WHITE_DOWN = jac_affine_0_1_WHITE_DOWN*jac_affine_2_2_WHITE_DOWN; + const real_t tmp_coords_jac_18_WHITE_DOWN = jac_affine_0_0_WHITE_DOWN*tmp_coords_jac_13_WHITE_DOWN - jac_affine_0_0_WHITE_DOWN*tmp_coords_jac_14_WHITE_DOWN + jac_affine_0_2_WHITE_DOWN*jac_affine_1_0_WHITE_DOWN*jac_affine_2_1_WHITE_DOWN - jac_affine_1_0_WHITE_DOWN*tmp_coords_jac_16_WHITE_DOWN + jac_affine_2_0_WHITE_DOWN*tmp_coords_jac_15_WHITE_DOWN - jac_affine_2_0_WHITE_DOWN*tmp_coords_jac_17_WHITE_DOWN; + const real_t tmp_coords_jac_19_WHITE_DOWN = 1.0 / (tmp_coords_jac_18_WHITE_DOWN); + const real_t jac_affine_inv_0_0_WHITE_DOWN = tmp_coords_jac_19_WHITE_DOWN*(tmp_coords_jac_13_WHITE_DOWN - tmp_coords_jac_14_WHITE_DOWN); + const real_t jac_affine_inv_0_1_WHITE_DOWN = tmp_coords_jac_19_WHITE_DOWN*(jac_affine_0_2_WHITE_DOWN*jac_affine_2_1_WHITE_DOWN - tmp_coords_jac_16_WHITE_DOWN); + const real_t jac_affine_inv_0_2_WHITE_DOWN = tmp_coords_jac_19_WHITE_DOWN*(tmp_coords_jac_15_WHITE_DOWN - tmp_coords_jac_17_WHITE_DOWN); + const real_t jac_affine_inv_1_0_WHITE_DOWN = tmp_coords_jac_19_WHITE_DOWN*(-jac_affine_1_0_WHITE_DOWN*jac_affine_2_2_WHITE_DOWN + jac_affine_1_2_WHITE_DOWN*jac_affine_2_0_WHITE_DOWN); + const real_t jac_affine_inv_1_1_WHITE_DOWN = tmp_coords_jac_19_WHITE_DOWN*(jac_affine_0_0_WHITE_DOWN*jac_affine_2_2_WHITE_DOWN - jac_affine_0_2_WHITE_DOWN*jac_affine_2_0_WHITE_DOWN); + const real_t jac_affine_inv_1_2_WHITE_DOWN = tmp_coords_jac_19_WHITE_DOWN*(-jac_affine_0_0_WHITE_DOWN*jac_affine_1_2_WHITE_DOWN + jac_affine_0_2_WHITE_DOWN*jac_affine_1_0_WHITE_DOWN); + const real_t jac_affine_inv_2_0_WHITE_DOWN = tmp_coords_jac_19_WHITE_DOWN*(jac_affine_1_0_WHITE_DOWN*jac_affine_2_1_WHITE_DOWN - jac_affine_1_1_WHITE_DOWN*jac_affine_2_0_WHITE_DOWN); + const real_t jac_affine_inv_2_1_WHITE_DOWN = tmp_coords_jac_19_WHITE_DOWN*(-jac_affine_0_0_WHITE_DOWN*jac_affine_2_1_WHITE_DOWN + jac_affine_0_1_WHITE_DOWN*jac_affine_2_0_WHITE_DOWN); + const real_t jac_affine_inv_2_2_WHITE_DOWN = tmp_coords_jac_19_WHITE_DOWN*(jac_affine_0_0_WHITE_DOWN*jac_affine_1_1_WHITE_DOWN - jac_affine_0_1_WHITE_DOWN*jac_affine_1_0_WHITE_DOWN); + const real_t abs_det_jac_affine_WHITE_DOWN = abs(tmp_coords_jac_18_WHITE_DOWN); { /* CellType.WHITE_DOWN */ - const real_t tmp_coords_jac_0_WHITE_DOWN = 1.0 / (micro_edges_per_macro_edge_float)*1.0; - const real_t tmp_coords_jac_1_WHITE_DOWN = tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0); - const real_t tmp_coords_jac_2_WHITE_DOWN = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0); - const real_t tmp_coords_jac_3_WHITE_DOWN = tmp_coords_jac_1_WHITE_DOWN + tmp_coords_jac_2_WHITE_DOWN; - const real_t tmp_coords_jac_4_WHITE_DOWN = tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1); - const real_t tmp_coords_jac_5_WHITE_DOWN = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1); - const real_t tmp_coords_jac_6_WHITE_DOWN = tmp_coords_jac_4_WHITE_DOWN + tmp_coords_jac_5_WHITE_DOWN; - const real_t tmp_coords_jac_7_WHITE_DOWN = tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2); - const real_t tmp_coords_jac_8_WHITE_DOWN = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2); - const real_t tmp_coords_jac_9_WHITE_DOWN = tmp_coords_jac_7_WHITE_DOWN + tmp_coords_jac_8_WHITE_DOWN; - const real_t tmp_coords_jac_10_WHITE_DOWN = tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0); - const real_t tmp_coords_jac_11_WHITE_DOWN = tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1); - const real_t tmp_coords_jac_12_WHITE_DOWN = tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2); - const real_t p_affine_const_0_0_WHITE_DOWN = tmp_coords_jac_3_WHITE_DOWN; - const real_t p_affine_const_0_1_WHITE_DOWN = tmp_coords_jac_6_WHITE_DOWN; - const real_t p_affine_const_0_2_WHITE_DOWN = tmp_coords_jac_9_WHITE_DOWN; - const real_t p_affine_const_1_0_WHITE_DOWN = tmp_coords_jac_10_WHITE_DOWN + tmp_coords_jac_2_WHITE_DOWN; - const real_t p_affine_const_1_1_WHITE_DOWN = tmp_coords_jac_11_WHITE_DOWN + tmp_coords_jac_5_WHITE_DOWN; - const real_t p_affine_const_1_2_WHITE_DOWN = tmp_coords_jac_12_WHITE_DOWN + tmp_coords_jac_8_WHITE_DOWN; - const real_t p_affine_const_2_0_WHITE_DOWN = macro_vertex_coord_id_0comp0 + tmp_coords_jac_10_WHITE_DOWN + tmp_coords_jac_1_WHITE_DOWN; - const real_t p_affine_const_2_1_WHITE_DOWN = macro_vertex_coord_id_0comp1 + tmp_coords_jac_11_WHITE_DOWN + tmp_coords_jac_4_WHITE_DOWN; - const real_t p_affine_const_2_2_WHITE_DOWN = macro_vertex_coord_id_0comp2 + tmp_coords_jac_12_WHITE_DOWN + tmp_coords_jac_7_WHITE_DOWN; - const real_t p_affine_const_3_0_WHITE_DOWN = tmp_coords_jac_10_WHITE_DOWN + tmp_coords_jac_3_WHITE_DOWN; - const real_t p_affine_const_3_1_WHITE_DOWN = tmp_coords_jac_11_WHITE_DOWN + tmp_coords_jac_6_WHITE_DOWN; - const real_t p_affine_const_3_2_WHITE_DOWN = tmp_coords_jac_12_WHITE_DOWN + tmp_coords_jac_9_WHITE_DOWN; - const real_t jac_affine_0_0_WHITE_DOWN = -p_affine_const_0_0_WHITE_DOWN + p_affine_const_1_0_WHITE_DOWN; - const real_t jac_affine_0_1_WHITE_DOWN = -p_affine_const_0_0_WHITE_DOWN + p_affine_const_2_0_WHITE_DOWN; - const real_t jac_affine_0_2_WHITE_DOWN = -p_affine_const_0_0_WHITE_DOWN + p_affine_const_3_0_WHITE_DOWN; - const real_t jac_affine_1_0_WHITE_DOWN = -p_affine_const_0_1_WHITE_DOWN + p_affine_const_1_1_WHITE_DOWN; - const real_t jac_affine_1_1_WHITE_DOWN = -p_affine_const_0_1_WHITE_DOWN + p_affine_const_2_1_WHITE_DOWN; - const real_t tmp_coords_jac_17_WHITE_DOWN = jac_affine_0_2_WHITE_DOWN*jac_affine_1_1_WHITE_DOWN; - const real_t jac_affine_1_2_WHITE_DOWN = -p_affine_const_0_1_WHITE_DOWN + p_affine_const_3_1_WHITE_DOWN; - const real_t tmp_coords_jac_15_WHITE_DOWN = jac_affine_0_1_WHITE_DOWN*jac_affine_1_2_WHITE_DOWN; - const real_t jac_affine_2_0_WHITE_DOWN = -p_affine_const_0_2_WHITE_DOWN + p_affine_const_1_2_WHITE_DOWN; - const real_t jac_affine_2_1_WHITE_DOWN = -p_affine_const_0_2_WHITE_DOWN + p_affine_const_2_2_WHITE_DOWN; - const real_t tmp_coords_jac_14_WHITE_DOWN = jac_affine_1_2_WHITE_DOWN*jac_affine_2_1_WHITE_DOWN; - const real_t jac_affine_2_2_WHITE_DOWN = -p_affine_const_0_2_WHITE_DOWN + p_affine_const_3_2_WHITE_DOWN; - const real_t tmp_coords_jac_13_WHITE_DOWN = jac_affine_1_1_WHITE_DOWN*jac_affine_2_2_WHITE_DOWN; - const real_t tmp_coords_jac_16_WHITE_DOWN = jac_affine_0_1_WHITE_DOWN*jac_affine_2_2_WHITE_DOWN; - const real_t tmp_coords_jac_18_WHITE_DOWN = 1.0 / (jac_affine_0_0_WHITE_DOWN*tmp_coords_jac_13_WHITE_DOWN - jac_affine_0_0_WHITE_DOWN*tmp_coords_jac_14_WHITE_DOWN + jac_affine_0_2_WHITE_DOWN*jac_affine_1_0_WHITE_DOWN*jac_affine_2_1_WHITE_DOWN - jac_affine_1_0_WHITE_DOWN*tmp_coords_jac_16_WHITE_DOWN + jac_affine_2_0_WHITE_DOWN*tmp_coords_jac_15_WHITE_DOWN - jac_affine_2_0_WHITE_DOWN*tmp_coords_jac_17_WHITE_DOWN); - const real_t jac_affine_inv_0_0_WHITE_DOWN = tmp_coords_jac_18_WHITE_DOWN*(tmp_coords_jac_13_WHITE_DOWN - tmp_coords_jac_14_WHITE_DOWN); - const real_t jac_affine_inv_0_1_WHITE_DOWN = tmp_coords_jac_18_WHITE_DOWN*(jac_affine_0_2_WHITE_DOWN*jac_affine_2_1_WHITE_DOWN - tmp_coords_jac_16_WHITE_DOWN); - const real_t jac_affine_inv_0_2_WHITE_DOWN = tmp_coords_jac_18_WHITE_DOWN*(tmp_coords_jac_15_WHITE_DOWN - tmp_coords_jac_17_WHITE_DOWN); - const real_t jac_affine_inv_1_0_WHITE_DOWN = tmp_coords_jac_18_WHITE_DOWN*(-jac_affine_1_0_WHITE_DOWN*jac_affine_2_2_WHITE_DOWN + jac_affine_1_2_WHITE_DOWN*jac_affine_2_0_WHITE_DOWN); - const real_t jac_affine_inv_1_1_WHITE_DOWN = tmp_coords_jac_18_WHITE_DOWN*(jac_affine_0_0_WHITE_DOWN*jac_affine_2_2_WHITE_DOWN - jac_affine_0_2_WHITE_DOWN*jac_affine_2_0_WHITE_DOWN); - const real_t jac_affine_inv_1_2_WHITE_DOWN = tmp_coords_jac_18_WHITE_DOWN*(-jac_affine_0_0_WHITE_DOWN*jac_affine_1_2_WHITE_DOWN + jac_affine_0_2_WHITE_DOWN*jac_affine_1_0_WHITE_DOWN); - const real_t jac_affine_inv_2_0_WHITE_DOWN = tmp_coords_jac_18_WHITE_DOWN*(jac_affine_1_0_WHITE_DOWN*jac_affine_2_1_WHITE_DOWN - jac_affine_1_1_WHITE_DOWN*jac_affine_2_0_WHITE_DOWN); - const real_t jac_affine_inv_2_1_WHITE_DOWN = tmp_coords_jac_18_WHITE_DOWN*(-jac_affine_0_0_WHITE_DOWN*jac_affine_2_1_WHITE_DOWN + jac_affine_0_1_WHITE_DOWN*jac_affine_2_0_WHITE_DOWN); - const real_t jac_affine_inv_2_2_WHITE_DOWN = tmp_coords_jac_18_WHITE_DOWN*(jac_affine_0_0_WHITE_DOWN*jac_affine_1_1_WHITE_DOWN - jac_affine_0_1_WHITE_DOWN*jac_affine_1_0_WHITE_DOWN); for (int64_t ctr_2 = 0; ctr_2 < micro_edges_per_macro_edge; ctr_2 += 1) for (int64_t ctr_1 = 0; ctr_1 < -ctr_2 + micro_edges_per_macro_edge; ctr_1 += 1) for (int64_t ctr_0 = 0; ctr_0 < -ctr_1 - ctr_2 + micro_edges_per_macro_edge - 2; ctr_0 += 1) { - const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2)); - const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2)); - const real_t p_affine_0_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2)); - const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2 + 1)); - const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2 + 1)); - const real_t p_affine_1_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2 + 1)); - const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2 + 1)); - const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2 + 1)); - const real_t p_affine_2_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2 + 1)); - const real_t p_affine_3_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2 + 1)); - const real_t p_affine_3_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2 + 1)); - const real_t p_affine_3_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2 + 1)); + + const int64_t phantom_ctr_0 = ctr_0; + real_t _data_float_loop_ctr_array_dim_0[4]; + _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0; + _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1; + _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2; + _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3; + real_t _data_float_loop_ctr_array_dim_1[4]; + _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1; + real_t _data_float_loop_ctr_array_dim_2[4]; + _data_float_loop_ctr_array_dim_2[0] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[1] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[2] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[3] = (real_t) ctr_2; + + const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_0_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_1_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_2_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_3_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_3_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_3_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; const real_t rho_dof_0 = _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]; const real_t rho_dof_1 = _data_rhoVertex[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]; const real_t rho_dof_2 = _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]; @@ -1197,28 +1241,8 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap::toMatrix_macro_ real_t q_acc_3_29 = 0.0; for (int64_t q = 0; q < 5; q += 1) { - const real_t tmp_qloop_0 = rayVertex_1 - refVertex_1; - const real_t tmp_qloop_1 = -rayVertex_0; - const real_t tmp_qloop_2 = -forVertex_0 - tmp_qloop_1; - const real_t tmp_qloop_3 = rayVertex_2 - thrVertex_2; - const real_t tmp_qloop_4 = tmp_qloop_2*tmp_qloop_3; - const real_t tmp_qloop_5 = rayVertex_2 - refVertex_2; - const real_t tmp_qloop_6 = rayVertex_1 - thrVertex_1; - const real_t tmp_qloop_7 = tmp_qloop_2*tmp_qloop_6; - const real_t tmp_qloop_8 = -rayVertex_1; - const real_t tmp_qloop_9 = -forVertex_1 - tmp_qloop_8; - const real_t tmp_qloop_10 = rayVertex_0 - thrVertex_0; - const real_t tmp_qloop_11 = rayVertex_0 - refVertex_0; - const real_t tmp_qloop_12 = -rayVertex_2; - const real_t tmp_qloop_13 = -forVertex_2 - tmp_qloop_12; - const real_t tmp_qloop_14 = tmp_qloop_3*tmp_qloop_9; - const real_t tmp_qloop_15 = tmp_qloop_10*tmp_qloop_13; - const real_t tmp_qloop_16 = -tmp_qloop_0*tmp_qloop_15 + tmp_qloop_0*tmp_qloop_4 + tmp_qloop_10*tmp_qloop_5*tmp_qloop_9 + tmp_qloop_11*tmp_qloop_13*tmp_qloop_6 - tmp_qloop_11*tmp_qloop_14 - tmp_qloop_5*tmp_qloop_7; - const real_t tmp_qloop_17 = 1.0 / (tmp_qloop_16); const real_t tmp_qloop_18 = p_affine_0_0 + (-p_affine_0_0 + p_affine_1_0)*_data_q_p_0[q] + (-p_affine_0_0 + p_affine_2_0)*_data_q_p_1[q] + (-p_affine_0_0 + p_affine_3_0)*_data_q_p_2[q]; const real_t tmp_qloop_19 = (tmp_qloop_18*tmp_qloop_18); - const real_t tmp_qloop_20 = radRayVertex*tmp_qloop_16; - const real_t tmp_qloop_21 = radRayVertex - radRefVertex; const real_t tmp_qloop_22 = p_affine_0_2 + (-p_affine_0_2 + p_affine_1_2)*_data_q_p_0[q] + (-p_affine_0_2 + p_affine_2_2)*_data_q_p_1[q] + (-p_affine_0_2 + p_affine_3_2)*_data_q_p_2[q]; const real_t tmp_qloop_23 = -tmp_qloop_12 - tmp_qloop_22; const real_t tmp_qloop_24 = -tmp_qloop_1 - tmp_qloop_18; @@ -1229,28 +1253,21 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap::toMatrix_macro_ const real_t tmp_qloop_29 = (tmp_qloop_25*tmp_qloop_25); const real_t tmp_qloop_30 = (tmp_qloop_22*tmp_qloop_22); const real_t tmp_qloop_31 = tmp_qloop_19 + tmp_qloop_29 + tmp_qloop_30; - const real_t tmp_qloop_32 = -tmp_qloop_13*tmp_qloop_6 + tmp_qloop_14; - const real_t tmp_qloop_33 = -tmp_qloop_21*tmp_qloop_32; const real_t tmp_qloop_34 = tmp_qloop_18*tmp_qloop_33; const real_t tmp_qloop_35 = tmp_qloop_28 + tmp_qloop_34; const real_t tmp_qloop_36 = tmp_qloop_17*(-tmp_qloop_19*tmp_qloop_28 + tmp_qloop_31*tmp_qloop_35); const real_t tmp_qloop_37 = pow(tmp_qloop_31, -1.5000000000000000); const real_t tmp_qloop_38 = tmp_qloop_37*1.0; - const real_t tmp_qloop_39 = tmp_qloop_10*tmp_qloop_13 - tmp_qloop_4; - const real_t tmp_qloop_40 = tmp_qloop_21*tmp_qloop_39; const real_t tmp_qloop_41 = tmp_qloop_31*tmp_qloop_40; const real_t tmp_qloop_42 = -tmp_qloop_25*tmp_qloop_28 - tmp_qloop_41; const real_t tmp_qloop_43 = tmp_qloop_17*tmp_qloop_38; const real_t tmp_qloop_44 = tmp_qloop_42*tmp_qloop_43; - const real_t tmp_qloop_45 = -tmp_qloop_10*tmp_qloop_9 + tmp_qloop_7; - const real_t tmp_qloop_46 = -tmp_qloop_21*tmp_qloop_45; const real_t tmp_qloop_47 = -tmp_qloop_28; const real_t tmp_qloop_48 = tmp_qloop_22*tmp_qloop_47; const real_t tmp_qloop_49 = tmp_qloop_31*tmp_qloop_46 + tmp_qloop_48; const real_t tmp_qloop_50 = tmp_qloop_43*tmp_qloop_49; const real_t tmp_qloop_51 = tmp_qloop_18*tmp_qloop_47 + tmp_qloop_31*tmp_qloop_33; const real_t tmp_qloop_52 = tmp_qloop_43*tmp_qloop_51; - const real_t tmp_qloop_53 = -tmp_qloop_21*tmp_qloop_39; const real_t tmp_qloop_54 = tmp_qloop_25*tmp_qloop_53 + tmp_qloop_28; const real_t tmp_qloop_55 = tmp_qloop_29*tmp_qloop_47 + tmp_qloop_31*tmp_qloop_54; const real_t tmp_qloop_56 = tmp_qloop_22*tmp_qloop_46; @@ -1258,11 +1275,9 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap::toMatrix_macro_ const real_t tmp_qloop_58 = tmp_qloop_30*tmp_qloop_47 + tmp_qloop_31*tmp_qloop_57; const real_t tmp_qloop_66 = pow(tmp_qloop_31, -2.5000000000000000)*3.0; const real_t tmp_qloop_67 = tmp_qloop_36*tmp_qloop_66; - const real_t tmp_qloop_68 = tmp_qloop_21*tmp_qloop_32; const real_t tmp_qloop_69 = tmp_qloop_18*2.0; const real_t tmp_qloop_70 = -tmp_qloop_41; const real_t tmp_qloop_71 = tmp_qloop_35*2.0; - const real_t tmp_qloop_72 = tmp_qloop_21*tmp_qloop_45; const real_t tmp_qloop_73 = -tmp_qloop_31*tmp_qloop_72; const real_t tmp_qloop_74 = -tmp_qloop_20 + tmp_qloop_27; const real_t tmp_qloop_75 = tmp_qloop_18*tmp_qloop_68 + tmp_qloop_34*2.0 + tmp_qloop_74; @@ -1324,14 +1339,7 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap::toMatrix_macro_ const real_t tmp_qloop_132 = tmp_qloop_109 - tmp_qloop_110 - tmp_qloop_112 - tmp_qloop_131; const real_t tmp_qloop_133 = tmp_qloop_103*4.0; const real_t tmp_qloop_134 = tmp_qloop_111 - tmp_qloop_114 - tmp_qloop_133; - const real_t tmp_qloop_135 = 1.0 / (rho_dof_0*tmp_qloop_115 + rho_dof_1*tmp_qloop_127 + rho_dof_2*tmp_qloop_128 + rho_dof_3*tmp_qloop_129 + rho_dof_4*tmp_qloop_110 + rho_dof_5*tmp_qloop_113 + rho_dof_6*tmp_qloop_112 + rho_dof_7*tmp_qloop_130 + rho_dof_8*tmp_qloop_132 + rho_dof_9*tmp_qloop_134)*_data_q_w[q]; - const real_t tmp_qloop_136 = tmp_qloop_135*(1.0 - _data_q_p_0[q] - _data_q_p_1[q] - _data_q_p_2[q]); - const real_t tmp_qloop_142 = tmp_qloop_135*_data_q_p_0[q]; - const real_t tmp_qloop_144 = tmp_qloop_135*_data_q_p_2[q]; - const real_t tmp_qloop_147 = tmp_qloop_135*_data_q_p_1[q]; const real_t tmp_qloop_158 = tmp_qloop_105*tmp_qloop_111; - const real_t tmp_qloop_159 = tmp_qloop_135*tmp_qloop_158; - const real_t tmp_qloop_161 = tmp_qloop_107*tmp_qloop_135; const real_t jac_blending_0_0 = tmp_qloop_36*tmp_qloop_38; const real_t jac_blending_0_1 = tmp_qloop_18*tmp_qloop_44; const real_t jac_blending_0_2 = tmp_qloop_18*tmp_qloop_50; @@ -1349,6 +1357,13 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap::toMatrix_macro_ const real_t tmp_qloop_64 = jac_blending_0_0*tmp_qloop_59 - jac_blending_0_0*tmp_qloop_61 + jac_blending_0_2*jac_blending_1_0*jac_blending_2_1 - jac_blending_1_0*tmp_qloop_62 + jac_blending_2_0*tmp_qloop_60 - jac_blending_2_0*tmp_qloop_63; const real_t tmp_qloop_65 = 1.0 / (tmp_qloop_64); const real_t abs_det_jac_blending = tmp_qloop_64; + const real_t tmp_qloop_135 = abs_det_jac_affine_WHITE_DOWN*abs_det_jac_blending*1.0 / (rho_dof_0*tmp_qloop_115 + rho_dof_1*tmp_qloop_127 + rho_dof_2*tmp_qloop_128 + rho_dof_3*tmp_qloop_129 + rho_dof_4*tmp_qloop_110 + rho_dof_5*tmp_qloop_113 + rho_dof_6*tmp_qloop_112 + rho_dof_7*tmp_qloop_130 + rho_dof_8*tmp_qloop_132 + rho_dof_9*tmp_qloop_134)*_data_q_w[q]; + const real_t tmp_qloop_136 = tmp_qloop_135*(1.0 - _data_q_p_0[q] - _data_q_p_1[q] - _data_q_p_2[q]); + const real_t tmp_qloop_142 = tmp_qloop_135*_data_q_p_0[q]; + const real_t tmp_qloop_144 = tmp_qloop_135*_data_q_p_2[q]; + const real_t tmp_qloop_147 = tmp_qloop_135*_data_q_p_1[q]; + const real_t tmp_qloop_159 = tmp_qloop_135*tmp_qloop_158; + const real_t tmp_qloop_161 = tmp_qloop_107*tmp_qloop_135; const real_t jac_blending_inv_0_0 = tmp_qloop_65*(tmp_qloop_59 - tmp_qloop_61); const real_t jac_blending_inv_0_1 = tmp_qloop_65*(jac_blending_0_2*jac_blending_2_1 - tmp_qloop_62); const real_t jac_blending_inv_0_2 = tmp_qloop_65*(tmp_qloop_60 - tmp_qloop_63); @@ -1937,67 +1952,87 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap::toMatrix_macro_ mat->addValues( _data_rowIdx, _data_colIdx, _data_mat ); } } + const real_t tmp_coords_jac_0_BLUE_UP = 1.0 / (micro_edges_per_macro_edge_float)*1.0; + const real_t tmp_coords_jac_1_BLUE_UP = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0); + const real_t tmp_coords_jac_2_BLUE_UP = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1); + const real_t tmp_coords_jac_3_BLUE_UP = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2); + const real_t tmp_coords_jac_4_BLUE_UP = tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0); + const real_t tmp_coords_jac_5_BLUE_UP = tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1); + const real_t tmp_coords_jac_6_BLUE_UP = tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2); + const real_t p_affine_const_0_0_BLUE_UP = tmp_coords_jac_1_BLUE_UP; + const real_t p_affine_const_0_1_BLUE_UP = tmp_coords_jac_2_BLUE_UP; + const real_t p_affine_const_0_2_BLUE_UP = tmp_coords_jac_3_BLUE_UP; + const real_t p_affine_const_1_0_BLUE_UP = macro_vertex_coord_id_0comp0 + tmp_coords_jac_4_BLUE_UP; + const real_t p_affine_const_1_1_BLUE_UP = macro_vertex_coord_id_0comp1 + tmp_coords_jac_5_BLUE_UP; + const real_t p_affine_const_1_2_BLUE_UP = macro_vertex_coord_id_0comp2 + tmp_coords_jac_6_BLUE_UP; + const real_t p_affine_const_2_0_BLUE_UP = tmp_coords_jac_1_BLUE_UP + tmp_coords_jac_4_BLUE_UP; + const real_t p_affine_const_2_1_BLUE_UP = tmp_coords_jac_2_BLUE_UP + tmp_coords_jac_5_BLUE_UP; + const real_t p_affine_const_2_2_BLUE_UP = tmp_coords_jac_3_BLUE_UP + tmp_coords_jac_6_BLUE_UP; + const real_t p_affine_const_3_0_BLUE_UP = tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0) + tmp_coords_jac_1_BLUE_UP; + const real_t p_affine_const_3_1_BLUE_UP = tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1) + tmp_coords_jac_2_BLUE_UP; + const real_t p_affine_const_3_2_BLUE_UP = tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2) + tmp_coords_jac_3_BLUE_UP; + const real_t jac_affine_0_0_BLUE_UP = -p_affine_const_0_0_BLUE_UP + p_affine_const_1_0_BLUE_UP; + const real_t jac_affine_0_1_BLUE_UP = -p_affine_const_0_0_BLUE_UP + p_affine_const_2_0_BLUE_UP; + const real_t jac_affine_0_2_BLUE_UP = -p_affine_const_0_0_BLUE_UP + p_affine_const_3_0_BLUE_UP; + const real_t jac_affine_1_0_BLUE_UP = -p_affine_const_0_1_BLUE_UP + p_affine_const_1_1_BLUE_UP; + const real_t jac_affine_1_1_BLUE_UP = -p_affine_const_0_1_BLUE_UP + p_affine_const_2_1_BLUE_UP; + const real_t tmp_coords_jac_11_BLUE_UP = jac_affine_0_2_BLUE_UP*jac_affine_1_1_BLUE_UP; + const real_t jac_affine_1_2_BLUE_UP = -p_affine_const_0_1_BLUE_UP + p_affine_const_3_1_BLUE_UP; + const real_t tmp_coords_jac_9_BLUE_UP = jac_affine_0_1_BLUE_UP*jac_affine_1_2_BLUE_UP; + const real_t jac_affine_2_0_BLUE_UP = -p_affine_const_0_2_BLUE_UP + p_affine_const_1_2_BLUE_UP; + const real_t jac_affine_2_1_BLUE_UP = -p_affine_const_0_2_BLUE_UP + p_affine_const_2_2_BLUE_UP; + const real_t tmp_coords_jac_8_BLUE_UP = jac_affine_1_2_BLUE_UP*jac_affine_2_1_BLUE_UP; + const real_t jac_affine_2_2_BLUE_UP = -p_affine_const_0_2_BLUE_UP + p_affine_const_3_2_BLUE_UP; + const real_t tmp_coords_jac_7_BLUE_UP = jac_affine_1_1_BLUE_UP*jac_affine_2_2_BLUE_UP; + const real_t tmp_coords_jac_10_BLUE_UP = jac_affine_0_1_BLUE_UP*jac_affine_2_2_BLUE_UP; + const real_t tmp_coords_jac_12_BLUE_UP = jac_affine_0_0_BLUE_UP*tmp_coords_jac_7_BLUE_UP - jac_affine_0_0_BLUE_UP*tmp_coords_jac_8_BLUE_UP + jac_affine_0_2_BLUE_UP*jac_affine_1_0_BLUE_UP*jac_affine_2_1_BLUE_UP - jac_affine_1_0_BLUE_UP*tmp_coords_jac_10_BLUE_UP - jac_affine_2_0_BLUE_UP*tmp_coords_jac_11_BLUE_UP + jac_affine_2_0_BLUE_UP*tmp_coords_jac_9_BLUE_UP; + const real_t tmp_coords_jac_13_BLUE_UP = 1.0 / (tmp_coords_jac_12_BLUE_UP); + const real_t jac_affine_inv_0_0_BLUE_UP = tmp_coords_jac_13_BLUE_UP*(tmp_coords_jac_7_BLUE_UP - tmp_coords_jac_8_BLUE_UP); + const real_t jac_affine_inv_0_1_BLUE_UP = tmp_coords_jac_13_BLUE_UP*(jac_affine_0_2_BLUE_UP*jac_affine_2_1_BLUE_UP - tmp_coords_jac_10_BLUE_UP); + const real_t jac_affine_inv_0_2_BLUE_UP = tmp_coords_jac_13_BLUE_UP*(-tmp_coords_jac_11_BLUE_UP + tmp_coords_jac_9_BLUE_UP); + const real_t jac_affine_inv_1_0_BLUE_UP = tmp_coords_jac_13_BLUE_UP*(-jac_affine_1_0_BLUE_UP*jac_affine_2_2_BLUE_UP + jac_affine_1_2_BLUE_UP*jac_affine_2_0_BLUE_UP); + const real_t jac_affine_inv_1_1_BLUE_UP = tmp_coords_jac_13_BLUE_UP*(jac_affine_0_0_BLUE_UP*jac_affine_2_2_BLUE_UP - jac_affine_0_2_BLUE_UP*jac_affine_2_0_BLUE_UP); + const real_t jac_affine_inv_1_2_BLUE_UP = tmp_coords_jac_13_BLUE_UP*(-jac_affine_0_0_BLUE_UP*jac_affine_1_2_BLUE_UP + jac_affine_0_2_BLUE_UP*jac_affine_1_0_BLUE_UP); + const real_t jac_affine_inv_2_0_BLUE_UP = tmp_coords_jac_13_BLUE_UP*(jac_affine_1_0_BLUE_UP*jac_affine_2_1_BLUE_UP - jac_affine_1_1_BLUE_UP*jac_affine_2_0_BLUE_UP); + const real_t jac_affine_inv_2_1_BLUE_UP = tmp_coords_jac_13_BLUE_UP*(-jac_affine_0_0_BLUE_UP*jac_affine_2_1_BLUE_UP + jac_affine_0_1_BLUE_UP*jac_affine_2_0_BLUE_UP); + const real_t jac_affine_inv_2_2_BLUE_UP = tmp_coords_jac_13_BLUE_UP*(jac_affine_0_0_BLUE_UP*jac_affine_1_1_BLUE_UP - jac_affine_0_1_BLUE_UP*jac_affine_1_0_BLUE_UP); + const real_t abs_det_jac_affine_BLUE_UP = abs(tmp_coords_jac_12_BLUE_UP); { /* CellType.BLUE_UP */ - const real_t tmp_coords_jac_0_BLUE_UP = 1.0 / (micro_edges_per_macro_edge_float)*1.0; - const real_t tmp_coords_jac_1_BLUE_UP = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0); - const real_t tmp_coords_jac_2_BLUE_UP = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1); - const real_t tmp_coords_jac_3_BLUE_UP = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2); - const real_t tmp_coords_jac_4_BLUE_UP = tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0); - const real_t tmp_coords_jac_5_BLUE_UP = tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1); - const real_t tmp_coords_jac_6_BLUE_UP = tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2); - const real_t p_affine_const_0_0_BLUE_UP = tmp_coords_jac_1_BLUE_UP; - const real_t p_affine_const_0_1_BLUE_UP = tmp_coords_jac_2_BLUE_UP; - const real_t p_affine_const_0_2_BLUE_UP = tmp_coords_jac_3_BLUE_UP; - const real_t p_affine_const_1_0_BLUE_UP = macro_vertex_coord_id_0comp0 + tmp_coords_jac_4_BLUE_UP; - const real_t p_affine_const_1_1_BLUE_UP = macro_vertex_coord_id_0comp1 + tmp_coords_jac_5_BLUE_UP; - const real_t p_affine_const_1_2_BLUE_UP = macro_vertex_coord_id_0comp2 + tmp_coords_jac_6_BLUE_UP; - const real_t p_affine_const_2_0_BLUE_UP = tmp_coords_jac_1_BLUE_UP + tmp_coords_jac_4_BLUE_UP; - const real_t p_affine_const_2_1_BLUE_UP = tmp_coords_jac_2_BLUE_UP + tmp_coords_jac_5_BLUE_UP; - const real_t p_affine_const_2_2_BLUE_UP = tmp_coords_jac_3_BLUE_UP + tmp_coords_jac_6_BLUE_UP; - const real_t p_affine_const_3_0_BLUE_UP = tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0) + tmp_coords_jac_1_BLUE_UP; - const real_t p_affine_const_3_1_BLUE_UP = tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1) + tmp_coords_jac_2_BLUE_UP; - const real_t p_affine_const_3_2_BLUE_UP = tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2) + tmp_coords_jac_3_BLUE_UP; - const real_t jac_affine_0_0_BLUE_UP = -p_affine_const_0_0_BLUE_UP + p_affine_const_1_0_BLUE_UP; - const real_t jac_affine_0_1_BLUE_UP = -p_affine_const_0_0_BLUE_UP + p_affine_const_2_0_BLUE_UP; - const real_t jac_affine_0_2_BLUE_UP = -p_affine_const_0_0_BLUE_UP + p_affine_const_3_0_BLUE_UP; - const real_t jac_affine_1_0_BLUE_UP = -p_affine_const_0_1_BLUE_UP + p_affine_const_1_1_BLUE_UP; - const real_t jac_affine_1_1_BLUE_UP = -p_affine_const_0_1_BLUE_UP + p_affine_const_2_1_BLUE_UP; - const real_t tmp_coords_jac_11_BLUE_UP = jac_affine_0_2_BLUE_UP*jac_affine_1_1_BLUE_UP; - const real_t jac_affine_1_2_BLUE_UP = -p_affine_const_0_1_BLUE_UP + p_affine_const_3_1_BLUE_UP; - const real_t tmp_coords_jac_9_BLUE_UP = jac_affine_0_1_BLUE_UP*jac_affine_1_2_BLUE_UP; - const real_t jac_affine_2_0_BLUE_UP = -p_affine_const_0_2_BLUE_UP + p_affine_const_1_2_BLUE_UP; - const real_t jac_affine_2_1_BLUE_UP = -p_affine_const_0_2_BLUE_UP + p_affine_const_2_2_BLUE_UP; - const real_t tmp_coords_jac_8_BLUE_UP = jac_affine_1_2_BLUE_UP*jac_affine_2_1_BLUE_UP; - const real_t jac_affine_2_2_BLUE_UP = -p_affine_const_0_2_BLUE_UP + p_affine_const_3_2_BLUE_UP; - const real_t tmp_coords_jac_7_BLUE_UP = jac_affine_1_1_BLUE_UP*jac_affine_2_2_BLUE_UP; - const real_t tmp_coords_jac_10_BLUE_UP = jac_affine_0_1_BLUE_UP*jac_affine_2_2_BLUE_UP; - const real_t tmp_coords_jac_12_BLUE_UP = 1.0 / (jac_affine_0_0_BLUE_UP*tmp_coords_jac_7_BLUE_UP - jac_affine_0_0_BLUE_UP*tmp_coords_jac_8_BLUE_UP + jac_affine_0_2_BLUE_UP*jac_affine_1_0_BLUE_UP*jac_affine_2_1_BLUE_UP - jac_affine_1_0_BLUE_UP*tmp_coords_jac_10_BLUE_UP - jac_affine_2_0_BLUE_UP*tmp_coords_jac_11_BLUE_UP + jac_affine_2_0_BLUE_UP*tmp_coords_jac_9_BLUE_UP); - const real_t jac_affine_inv_0_0_BLUE_UP = tmp_coords_jac_12_BLUE_UP*(tmp_coords_jac_7_BLUE_UP - tmp_coords_jac_8_BLUE_UP); - const real_t jac_affine_inv_0_1_BLUE_UP = tmp_coords_jac_12_BLUE_UP*(jac_affine_0_2_BLUE_UP*jac_affine_2_1_BLUE_UP - tmp_coords_jac_10_BLUE_UP); - const real_t jac_affine_inv_0_2_BLUE_UP = tmp_coords_jac_12_BLUE_UP*(-tmp_coords_jac_11_BLUE_UP + tmp_coords_jac_9_BLUE_UP); - const real_t jac_affine_inv_1_0_BLUE_UP = tmp_coords_jac_12_BLUE_UP*(-jac_affine_1_0_BLUE_UP*jac_affine_2_2_BLUE_UP + jac_affine_1_2_BLUE_UP*jac_affine_2_0_BLUE_UP); - const real_t jac_affine_inv_1_1_BLUE_UP = tmp_coords_jac_12_BLUE_UP*(jac_affine_0_0_BLUE_UP*jac_affine_2_2_BLUE_UP - jac_affine_0_2_BLUE_UP*jac_affine_2_0_BLUE_UP); - const real_t jac_affine_inv_1_2_BLUE_UP = tmp_coords_jac_12_BLUE_UP*(-jac_affine_0_0_BLUE_UP*jac_affine_1_2_BLUE_UP + jac_affine_0_2_BLUE_UP*jac_affine_1_0_BLUE_UP); - const real_t jac_affine_inv_2_0_BLUE_UP = tmp_coords_jac_12_BLUE_UP*(jac_affine_1_0_BLUE_UP*jac_affine_2_1_BLUE_UP - jac_affine_1_1_BLUE_UP*jac_affine_2_0_BLUE_UP); - const real_t jac_affine_inv_2_1_BLUE_UP = tmp_coords_jac_12_BLUE_UP*(-jac_affine_0_0_BLUE_UP*jac_affine_2_1_BLUE_UP + jac_affine_0_1_BLUE_UP*jac_affine_2_0_BLUE_UP); - const real_t jac_affine_inv_2_2_BLUE_UP = tmp_coords_jac_12_BLUE_UP*(jac_affine_0_0_BLUE_UP*jac_affine_1_1_BLUE_UP - jac_affine_0_1_BLUE_UP*jac_affine_1_0_BLUE_UP); for (int64_t ctr_2 = 0; ctr_2 < micro_edges_per_macro_edge; ctr_2 += 1) for (int64_t ctr_1 = 0; ctr_1 < -ctr_2 + micro_edges_per_macro_edge; ctr_1 += 1) for (int64_t ctr_0 = 0; ctr_0 < -ctr_1 - ctr_2 + micro_edges_per_macro_edge - 1; ctr_0 += 1) { - const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2)); - const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2)); - const real_t p_affine_0_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2)); - const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2)); - const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2)); - const real_t p_affine_1_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2)); - const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2)); - const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2)); - const real_t p_affine_2_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2)); - const real_t p_affine_3_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2 + 1)); - const real_t p_affine_3_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2 + 1)); - const real_t p_affine_3_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2 + 1)); + + const int64_t phantom_ctr_0 = ctr_0; + real_t _data_float_loop_ctr_array_dim_0[4]; + _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0; + _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1; + _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2; + _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3; + real_t _data_float_loop_ctr_array_dim_1[4]; + _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1; + real_t _data_float_loop_ctr_array_dim_2[4]; + _data_float_loop_ctr_array_dim_2[0] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[1] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[2] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[3] = (real_t) ctr_2; + + const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_0_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_2_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_3_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_3_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_3_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; const real_t rho_dof_0 = _data_rhoVertex[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]; const real_t rho_dof_1 = _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]; const real_t rho_dof_2 = _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]; @@ -2130,28 +2165,8 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap::toMatrix_macro_ real_t q_acc_3_29 = 0.0; for (int64_t q = 0; q < 5; q += 1) { - const real_t tmp_qloop_0 = rayVertex_1 - refVertex_1; - const real_t tmp_qloop_1 = -rayVertex_0; - const real_t tmp_qloop_2 = -forVertex_0 - tmp_qloop_1; - const real_t tmp_qloop_3 = rayVertex_2 - thrVertex_2; - const real_t tmp_qloop_4 = tmp_qloop_2*tmp_qloop_3; - const real_t tmp_qloop_5 = rayVertex_2 - refVertex_2; - const real_t tmp_qloop_6 = rayVertex_1 - thrVertex_1; - const real_t tmp_qloop_7 = tmp_qloop_2*tmp_qloop_6; - const real_t tmp_qloop_8 = -rayVertex_1; - const real_t tmp_qloop_9 = -forVertex_1 - tmp_qloop_8; - const real_t tmp_qloop_10 = rayVertex_0 - thrVertex_0; - const real_t tmp_qloop_11 = rayVertex_0 - refVertex_0; - const real_t tmp_qloop_12 = -rayVertex_2; - const real_t tmp_qloop_13 = -forVertex_2 - tmp_qloop_12; - const real_t tmp_qloop_14 = tmp_qloop_3*tmp_qloop_9; - const real_t tmp_qloop_15 = tmp_qloop_10*tmp_qloop_13; - const real_t tmp_qloop_16 = -tmp_qloop_0*tmp_qloop_15 + tmp_qloop_0*tmp_qloop_4 + tmp_qloop_10*tmp_qloop_5*tmp_qloop_9 + tmp_qloop_11*tmp_qloop_13*tmp_qloop_6 - tmp_qloop_11*tmp_qloop_14 - tmp_qloop_5*tmp_qloop_7; - const real_t tmp_qloop_17 = 1.0 / (tmp_qloop_16); const real_t tmp_qloop_18 = p_affine_0_0 + (-p_affine_0_0 + p_affine_1_0)*_data_q_p_0[q] + (-p_affine_0_0 + p_affine_2_0)*_data_q_p_1[q] + (-p_affine_0_0 + p_affine_3_0)*_data_q_p_2[q]; const real_t tmp_qloop_19 = (tmp_qloop_18*tmp_qloop_18); - const real_t tmp_qloop_20 = radRayVertex*tmp_qloop_16; - const real_t tmp_qloop_21 = radRayVertex - radRefVertex; const real_t tmp_qloop_22 = p_affine_0_2 + (-p_affine_0_2 + p_affine_1_2)*_data_q_p_0[q] + (-p_affine_0_2 + p_affine_2_2)*_data_q_p_1[q] + (-p_affine_0_2 + p_affine_3_2)*_data_q_p_2[q]; const real_t tmp_qloop_23 = -tmp_qloop_12 - tmp_qloop_22; const real_t tmp_qloop_24 = -tmp_qloop_1 - tmp_qloop_18; @@ -2162,28 +2177,21 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap::toMatrix_macro_ const real_t tmp_qloop_29 = (tmp_qloop_25*tmp_qloop_25); const real_t tmp_qloop_30 = (tmp_qloop_22*tmp_qloop_22); const real_t tmp_qloop_31 = tmp_qloop_19 + tmp_qloop_29 + tmp_qloop_30; - const real_t tmp_qloop_32 = -tmp_qloop_13*tmp_qloop_6 + tmp_qloop_14; - const real_t tmp_qloop_33 = -tmp_qloop_21*tmp_qloop_32; const real_t tmp_qloop_34 = tmp_qloop_18*tmp_qloop_33; const real_t tmp_qloop_35 = tmp_qloop_28 + tmp_qloop_34; const real_t tmp_qloop_36 = tmp_qloop_17*(-tmp_qloop_19*tmp_qloop_28 + tmp_qloop_31*tmp_qloop_35); const real_t tmp_qloop_37 = pow(tmp_qloop_31, -1.5000000000000000); const real_t tmp_qloop_38 = tmp_qloop_37*1.0; - const real_t tmp_qloop_39 = tmp_qloop_10*tmp_qloop_13 - tmp_qloop_4; - const real_t tmp_qloop_40 = tmp_qloop_21*tmp_qloop_39; const real_t tmp_qloop_41 = tmp_qloop_31*tmp_qloop_40; const real_t tmp_qloop_42 = -tmp_qloop_25*tmp_qloop_28 - tmp_qloop_41; const real_t tmp_qloop_43 = tmp_qloop_17*tmp_qloop_38; const real_t tmp_qloop_44 = tmp_qloop_42*tmp_qloop_43; - const real_t tmp_qloop_45 = -tmp_qloop_10*tmp_qloop_9 + tmp_qloop_7; - const real_t tmp_qloop_46 = -tmp_qloop_21*tmp_qloop_45; const real_t tmp_qloop_47 = -tmp_qloop_28; const real_t tmp_qloop_48 = tmp_qloop_22*tmp_qloop_47; const real_t tmp_qloop_49 = tmp_qloop_31*tmp_qloop_46 + tmp_qloop_48; const real_t tmp_qloop_50 = tmp_qloop_43*tmp_qloop_49; const real_t tmp_qloop_51 = tmp_qloop_18*tmp_qloop_47 + tmp_qloop_31*tmp_qloop_33; const real_t tmp_qloop_52 = tmp_qloop_43*tmp_qloop_51; - const real_t tmp_qloop_53 = -tmp_qloop_21*tmp_qloop_39; const real_t tmp_qloop_54 = tmp_qloop_25*tmp_qloop_53 + tmp_qloop_28; const real_t tmp_qloop_55 = tmp_qloop_29*tmp_qloop_47 + tmp_qloop_31*tmp_qloop_54; const real_t tmp_qloop_56 = tmp_qloop_22*tmp_qloop_46; @@ -2191,11 +2199,9 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap::toMatrix_macro_ const real_t tmp_qloop_58 = tmp_qloop_30*tmp_qloop_47 + tmp_qloop_31*tmp_qloop_57; const real_t tmp_qloop_66 = pow(tmp_qloop_31, -2.5000000000000000)*3.0; const real_t tmp_qloop_67 = tmp_qloop_36*tmp_qloop_66; - const real_t tmp_qloop_68 = tmp_qloop_21*tmp_qloop_32; const real_t tmp_qloop_69 = tmp_qloop_18*2.0; const real_t tmp_qloop_70 = -tmp_qloop_41; const real_t tmp_qloop_71 = tmp_qloop_35*2.0; - const real_t tmp_qloop_72 = tmp_qloop_21*tmp_qloop_45; const real_t tmp_qloop_73 = -tmp_qloop_31*tmp_qloop_72; const real_t tmp_qloop_74 = -tmp_qloop_20 + tmp_qloop_27; const real_t tmp_qloop_75 = tmp_qloop_18*tmp_qloop_68 + tmp_qloop_34*2.0 + tmp_qloop_74; @@ -2257,14 +2263,7 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap::toMatrix_macro_ const real_t tmp_qloop_132 = tmp_qloop_109 - tmp_qloop_110 - tmp_qloop_112 - tmp_qloop_131; const real_t tmp_qloop_133 = tmp_qloop_103*4.0; const real_t tmp_qloop_134 = tmp_qloop_111 - tmp_qloop_114 - tmp_qloop_133; - const real_t tmp_qloop_135 = 1.0 / (rho_dof_0*tmp_qloop_115 + rho_dof_1*tmp_qloop_127 + rho_dof_2*tmp_qloop_128 + rho_dof_3*tmp_qloop_129 + rho_dof_4*tmp_qloop_110 + rho_dof_5*tmp_qloop_113 + rho_dof_6*tmp_qloop_112 + rho_dof_7*tmp_qloop_130 + rho_dof_8*tmp_qloop_132 + rho_dof_9*tmp_qloop_134)*_data_q_w[q]; - const real_t tmp_qloop_136 = tmp_qloop_135*(1.0 - _data_q_p_0[q] - _data_q_p_1[q] - _data_q_p_2[q]); - const real_t tmp_qloop_142 = tmp_qloop_135*_data_q_p_0[q]; - const real_t tmp_qloop_144 = tmp_qloop_135*_data_q_p_2[q]; - const real_t tmp_qloop_147 = tmp_qloop_135*_data_q_p_1[q]; const real_t tmp_qloop_158 = tmp_qloop_105*tmp_qloop_111; - const real_t tmp_qloop_159 = tmp_qloop_135*tmp_qloop_158; - const real_t tmp_qloop_161 = tmp_qloop_107*tmp_qloop_135; const real_t jac_blending_0_0 = tmp_qloop_36*tmp_qloop_38; const real_t jac_blending_0_1 = tmp_qloop_18*tmp_qloop_44; const real_t jac_blending_0_2 = tmp_qloop_18*tmp_qloop_50; @@ -2282,6 +2281,13 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap::toMatrix_macro_ const real_t tmp_qloop_64 = jac_blending_0_0*tmp_qloop_59 - jac_blending_0_0*tmp_qloop_61 + jac_blending_0_2*jac_blending_1_0*jac_blending_2_1 - jac_blending_1_0*tmp_qloop_62 + jac_blending_2_0*tmp_qloop_60 - jac_blending_2_0*tmp_qloop_63; const real_t tmp_qloop_65 = 1.0 / (tmp_qloop_64); const real_t abs_det_jac_blending = tmp_qloop_64; + const real_t tmp_qloop_135 = abs_det_jac_affine_BLUE_UP*abs_det_jac_blending*1.0 / (rho_dof_0*tmp_qloop_115 + rho_dof_1*tmp_qloop_127 + rho_dof_2*tmp_qloop_128 + rho_dof_3*tmp_qloop_129 + rho_dof_4*tmp_qloop_110 + rho_dof_5*tmp_qloop_113 + rho_dof_6*tmp_qloop_112 + rho_dof_7*tmp_qloop_130 + rho_dof_8*tmp_qloop_132 + rho_dof_9*tmp_qloop_134)*_data_q_w[q]; + const real_t tmp_qloop_136 = tmp_qloop_135*(1.0 - _data_q_p_0[q] - _data_q_p_1[q] - _data_q_p_2[q]); + const real_t tmp_qloop_142 = tmp_qloop_135*_data_q_p_0[q]; + const real_t tmp_qloop_144 = tmp_qloop_135*_data_q_p_2[q]; + const real_t tmp_qloop_147 = tmp_qloop_135*_data_q_p_1[q]; + const real_t tmp_qloop_159 = tmp_qloop_135*tmp_qloop_158; + const real_t tmp_qloop_161 = tmp_qloop_107*tmp_qloop_135; const real_t jac_blending_inv_0_0 = tmp_qloop_65*(tmp_qloop_59 - tmp_qloop_61); const real_t jac_blending_inv_0_1 = tmp_qloop_65*(jac_blending_0_2*jac_blending_2_1 - tmp_qloop_62); const real_t jac_blending_inv_0_2 = tmp_qloop_65*(tmp_qloop_60 - tmp_qloop_63); @@ -2870,70 +2876,90 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap::toMatrix_macro_ mat->addValues( _data_rowIdx, _data_colIdx, _data_mat ); } } + const real_t tmp_coords_jac_0_BLUE_DOWN = 1.0 / (micro_edges_per_macro_edge_float)*1.0; + const real_t tmp_coords_jac_1_BLUE_DOWN = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0); + const real_t tmp_coords_jac_2_BLUE_DOWN = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1); + const real_t tmp_coords_jac_3_BLUE_DOWN = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2); + const real_t tmp_coords_jac_4_BLUE_DOWN = tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0); + const real_t tmp_coords_jac_5_BLUE_DOWN = macro_vertex_coord_id_0comp0 + tmp_coords_jac_4_BLUE_DOWN; + const real_t tmp_coords_jac_6_BLUE_DOWN = tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1); + const real_t tmp_coords_jac_7_BLUE_DOWN = macro_vertex_coord_id_0comp1 + tmp_coords_jac_6_BLUE_DOWN; + const real_t tmp_coords_jac_8_BLUE_DOWN = tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2); + const real_t tmp_coords_jac_9_BLUE_DOWN = macro_vertex_coord_id_0comp2 + tmp_coords_jac_8_BLUE_DOWN; + const real_t p_affine_const_0_0_BLUE_DOWN = tmp_coords_jac_1_BLUE_DOWN; + const real_t p_affine_const_0_1_BLUE_DOWN = tmp_coords_jac_2_BLUE_DOWN; + const real_t p_affine_const_0_2_BLUE_DOWN = tmp_coords_jac_3_BLUE_DOWN; + const real_t p_affine_const_1_0_BLUE_DOWN = tmp_coords_jac_5_BLUE_DOWN; + const real_t p_affine_const_1_1_BLUE_DOWN = tmp_coords_jac_7_BLUE_DOWN; + const real_t p_affine_const_1_2_BLUE_DOWN = tmp_coords_jac_9_BLUE_DOWN; + const real_t p_affine_const_2_0_BLUE_DOWN = tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0) + tmp_coords_jac_5_BLUE_DOWN; + const real_t p_affine_const_2_1_BLUE_DOWN = tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1) + tmp_coords_jac_7_BLUE_DOWN; + const real_t p_affine_const_2_2_BLUE_DOWN = tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2) + tmp_coords_jac_9_BLUE_DOWN; + const real_t p_affine_const_3_0_BLUE_DOWN = tmp_coords_jac_1_BLUE_DOWN + tmp_coords_jac_4_BLUE_DOWN; + const real_t p_affine_const_3_1_BLUE_DOWN = tmp_coords_jac_2_BLUE_DOWN + tmp_coords_jac_6_BLUE_DOWN; + const real_t p_affine_const_3_2_BLUE_DOWN = tmp_coords_jac_3_BLUE_DOWN + tmp_coords_jac_8_BLUE_DOWN; + const real_t jac_affine_0_0_BLUE_DOWN = -p_affine_const_0_0_BLUE_DOWN + p_affine_const_1_0_BLUE_DOWN; + const real_t jac_affine_0_1_BLUE_DOWN = -p_affine_const_0_0_BLUE_DOWN + p_affine_const_2_0_BLUE_DOWN; + const real_t jac_affine_0_2_BLUE_DOWN = -p_affine_const_0_0_BLUE_DOWN + p_affine_const_3_0_BLUE_DOWN; + const real_t jac_affine_1_0_BLUE_DOWN = -p_affine_const_0_1_BLUE_DOWN + p_affine_const_1_1_BLUE_DOWN; + const real_t jac_affine_1_1_BLUE_DOWN = -p_affine_const_0_1_BLUE_DOWN + p_affine_const_2_1_BLUE_DOWN; + const real_t tmp_coords_jac_14_BLUE_DOWN = jac_affine_0_2_BLUE_DOWN*jac_affine_1_1_BLUE_DOWN; + const real_t jac_affine_1_2_BLUE_DOWN = -p_affine_const_0_1_BLUE_DOWN + p_affine_const_3_1_BLUE_DOWN; + const real_t tmp_coords_jac_12_BLUE_DOWN = jac_affine_0_1_BLUE_DOWN*jac_affine_1_2_BLUE_DOWN; + const real_t jac_affine_2_0_BLUE_DOWN = -p_affine_const_0_2_BLUE_DOWN + p_affine_const_1_2_BLUE_DOWN; + const real_t jac_affine_2_1_BLUE_DOWN = -p_affine_const_0_2_BLUE_DOWN + p_affine_const_2_2_BLUE_DOWN; + const real_t tmp_coords_jac_11_BLUE_DOWN = jac_affine_1_2_BLUE_DOWN*jac_affine_2_1_BLUE_DOWN; + const real_t jac_affine_2_2_BLUE_DOWN = -p_affine_const_0_2_BLUE_DOWN + p_affine_const_3_2_BLUE_DOWN; + const real_t tmp_coords_jac_10_BLUE_DOWN = jac_affine_1_1_BLUE_DOWN*jac_affine_2_2_BLUE_DOWN; + const real_t tmp_coords_jac_13_BLUE_DOWN = jac_affine_0_1_BLUE_DOWN*jac_affine_2_2_BLUE_DOWN; + const real_t tmp_coords_jac_15_BLUE_DOWN = jac_affine_0_0_BLUE_DOWN*tmp_coords_jac_10_BLUE_DOWN - jac_affine_0_0_BLUE_DOWN*tmp_coords_jac_11_BLUE_DOWN + jac_affine_0_2_BLUE_DOWN*jac_affine_1_0_BLUE_DOWN*jac_affine_2_1_BLUE_DOWN - jac_affine_1_0_BLUE_DOWN*tmp_coords_jac_13_BLUE_DOWN + jac_affine_2_0_BLUE_DOWN*tmp_coords_jac_12_BLUE_DOWN - jac_affine_2_0_BLUE_DOWN*tmp_coords_jac_14_BLUE_DOWN; + const real_t tmp_coords_jac_16_BLUE_DOWN = 1.0 / (tmp_coords_jac_15_BLUE_DOWN); + const real_t jac_affine_inv_0_0_BLUE_DOWN = tmp_coords_jac_16_BLUE_DOWN*(tmp_coords_jac_10_BLUE_DOWN - tmp_coords_jac_11_BLUE_DOWN); + const real_t jac_affine_inv_0_1_BLUE_DOWN = tmp_coords_jac_16_BLUE_DOWN*(jac_affine_0_2_BLUE_DOWN*jac_affine_2_1_BLUE_DOWN - tmp_coords_jac_13_BLUE_DOWN); + const real_t jac_affine_inv_0_2_BLUE_DOWN = tmp_coords_jac_16_BLUE_DOWN*(tmp_coords_jac_12_BLUE_DOWN - tmp_coords_jac_14_BLUE_DOWN); + const real_t jac_affine_inv_1_0_BLUE_DOWN = tmp_coords_jac_16_BLUE_DOWN*(-jac_affine_1_0_BLUE_DOWN*jac_affine_2_2_BLUE_DOWN + jac_affine_1_2_BLUE_DOWN*jac_affine_2_0_BLUE_DOWN); + const real_t jac_affine_inv_1_1_BLUE_DOWN = tmp_coords_jac_16_BLUE_DOWN*(jac_affine_0_0_BLUE_DOWN*jac_affine_2_2_BLUE_DOWN - jac_affine_0_2_BLUE_DOWN*jac_affine_2_0_BLUE_DOWN); + const real_t jac_affine_inv_1_2_BLUE_DOWN = tmp_coords_jac_16_BLUE_DOWN*(-jac_affine_0_0_BLUE_DOWN*jac_affine_1_2_BLUE_DOWN + jac_affine_0_2_BLUE_DOWN*jac_affine_1_0_BLUE_DOWN); + const real_t jac_affine_inv_2_0_BLUE_DOWN = tmp_coords_jac_16_BLUE_DOWN*(jac_affine_1_0_BLUE_DOWN*jac_affine_2_1_BLUE_DOWN - jac_affine_1_1_BLUE_DOWN*jac_affine_2_0_BLUE_DOWN); + const real_t jac_affine_inv_2_1_BLUE_DOWN = tmp_coords_jac_16_BLUE_DOWN*(-jac_affine_0_0_BLUE_DOWN*jac_affine_2_1_BLUE_DOWN + jac_affine_0_1_BLUE_DOWN*jac_affine_2_0_BLUE_DOWN); + const real_t jac_affine_inv_2_2_BLUE_DOWN = tmp_coords_jac_16_BLUE_DOWN*(jac_affine_0_0_BLUE_DOWN*jac_affine_1_1_BLUE_DOWN - jac_affine_0_1_BLUE_DOWN*jac_affine_1_0_BLUE_DOWN); + const real_t abs_det_jac_affine_BLUE_DOWN = abs(tmp_coords_jac_15_BLUE_DOWN); { /* CellType.BLUE_DOWN */ - const real_t tmp_coords_jac_0_BLUE_DOWN = 1.0 / (micro_edges_per_macro_edge_float)*1.0; - const real_t tmp_coords_jac_1_BLUE_DOWN = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0); - const real_t tmp_coords_jac_2_BLUE_DOWN = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1); - const real_t tmp_coords_jac_3_BLUE_DOWN = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2); - const real_t tmp_coords_jac_4_BLUE_DOWN = tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0); - const real_t tmp_coords_jac_5_BLUE_DOWN = macro_vertex_coord_id_0comp0 + tmp_coords_jac_4_BLUE_DOWN; - const real_t tmp_coords_jac_6_BLUE_DOWN = tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1); - const real_t tmp_coords_jac_7_BLUE_DOWN = macro_vertex_coord_id_0comp1 + tmp_coords_jac_6_BLUE_DOWN; - const real_t tmp_coords_jac_8_BLUE_DOWN = tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2); - const real_t tmp_coords_jac_9_BLUE_DOWN = macro_vertex_coord_id_0comp2 + tmp_coords_jac_8_BLUE_DOWN; - const real_t p_affine_const_0_0_BLUE_DOWN = tmp_coords_jac_1_BLUE_DOWN; - const real_t p_affine_const_0_1_BLUE_DOWN = tmp_coords_jac_2_BLUE_DOWN; - const real_t p_affine_const_0_2_BLUE_DOWN = tmp_coords_jac_3_BLUE_DOWN; - const real_t p_affine_const_1_0_BLUE_DOWN = tmp_coords_jac_5_BLUE_DOWN; - const real_t p_affine_const_1_1_BLUE_DOWN = tmp_coords_jac_7_BLUE_DOWN; - const real_t p_affine_const_1_2_BLUE_DOWN = tmp_coords_jac_9_BLUE_DOWN; - const real_t p_affine_const_2_0_BLUE_DOWN = tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0) + tmp_coords_jac_5_BLUE_DOWN; - const real_t p_affine_const_2_1_BLUE_DOWN = tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1) + tmp_coords_jac_7_BLUE_DOWN; - const real_t p_affine_const_2_2_BLUE_DOWN = tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2) + tmp_coords_jac_9_BLUE_DOWN; - const real_t p_affine_const_3_0_BLUE_DOWN = tmp_coords_jac_1_BLUE_DOWN + tmp_coords_jac_4_BLUE_DOWN; - const real_t p_affine_const_3_1_BLUE_DOWN = tmp_coords_jac_2_BLUE_DOWN + tmp_coords_jac_6_BLUE_DOWN; - const real_t p_affine_const_3_2_BLUE_DOWN = tmp_coords_jac_3_BLUE_DOWN + tmp_coords_jac_8_BLUE_DOWN; - const real_t jac_affine_0_0_BLUE_DOWN = -p_affine_const_0_0_BLUE_DOWN + p_affine_const_1_0_BLUE_DOWN; - const real_t jac_affine_0_1_BLUE_DOWN = -p_affine_const_0_0_BLUE_DOWN + p_affine_const_2_0_BLUE_DOWN; - const real_t jac_affine_0_2_BLUE_DOWN = -p_affine_const_0_0_BLUE_DOWN + p_affine_const_3_0_BLUE_DOWN; - const real_t jac_affine_1_0_BLUE_DOWN = -p_affine_const_0_1_BLUE_DOWN + p_affine_const_1_1_BLUE_DOWN; - const real_t jac_affine_1_1_BLUE_DOWN = -p_affine_const_0_1_BLUE_DOWN + p_affine_const_2_1_BLUE_DOWN; - const real_t tmp_coords_jac_14_BLUE_DOWN = jac_affine_0_2_BLUE_DOWN*jac_affine_1_1_BLUE_DOWN; - const real_t jac_affine_1_2_BLUE_DOWN = -p_affine_const_0_1_BLUE_DOWN + p_affine_const_3_1_BLUE_DOWN; - const real_t tmp_coords_jac_12_BLUE_DOWN = jac_affine_0_1_BLUE_DOWN*jac_affine_1_2_BLUE_DOWN; - const real_t jac_affine_2_0_BLUE_DOWN = -p_affine_const_0_2_BLUE_DOWN + p_affine_const_1_2_BLUE_DOWN; - const real_t jac_affine_2_1_BLUE_DOWN = -p_affine_const_0_2_BLUE_DOWN + p_affine_const_2_2_BLUE_DOWN; - const real_t tmp_coords_jac_11_BLUE_DOWN = jac_affine_1_2_BLUE_DOWN*jac_affine_2_1_BLUE_DOWN; - const real_t jac_affine_2_2_BLUE_DOWN = -p_affine_const_0_2_BLUE_DOWN + p_affine_const_3_2_BLUE_DOWN; - const real_t tmp_coords_jac_10_BLUE_DOWN = jac_affine_1_1_BLUE_DOWN*jac_affine_2_2_BLUE_DOWN; - const real_t tmp_coords_jac_13_BLUE_DOWN = jac_affine_0_1_BLUE_DOWN*jac_affine_2_2_BLUE_DOWN; - const real_t tmp_coords_jac_15_BLUE_DOWN = 1.0 / (jac_affine_0_0_BLUE_DOWN*tmp_coords_jac_10_BLUE_DOWN - jac_affine_0_0_BLUE_DOWN*tmp_coords_jac_11_BLUE_DOWN + jac_affine_0_2_BLUE_DOWN*jac_affine_1_0_BLUE_DOWN*jac_affine_2_1_BLUE_DOWN - jac_affine_1_0_BLUE_DOWN*tmp_coords_jac_13_BLUE_DOWN + jac_affine_2_0_BLUE_DOWN*tmp_coords_jac_12_BLUE_DOWN - jac_affine_2_0_BLUE_DOWN*tmp_coords_jac_14_BLUE_DOWN); - const real_t jac_affine_inv_0_0_BLUE_DOWN = tmp_coords_jac_15_BLUE_DOWN*(tmp_coords_jac_10_BLUE_DOWN - tmp_coords_jac_11_BLUE_DOWN); - const real_t jac_affine_inv_0_1_BLUE_DOWN = tmp_coords_jac_15_BLUE_DOWN*(jac_affine_0_2_BLUE_DOWN*jac_affine_2_1_BLUE_DOWN - tmp_coords_jac_13_BLUE_DOWN); - const real_t jac_affine_inv_0_2_BLUE_DOWN = tmp_coords_jac_15_BLUE_DOWN*(tmp_coords_jac_12_BLUE_DOWN - tmp_coords_jac_14_BLUE_DOWN); - const real_t jac_affine_inv_1_0_BLUE_DOWN = tmp_coords_jac_15_BLUE_DOWN*(-jac_affine_1_0_BLUE_DOWN*jac_affine_2_2_BLUE_DOWN + jac_affine_1_2_BLUE_DOWN*jac_affine_2_0_BLUE_DOWN); - const real_t jac_affine_inv_1_1_BLUE_DOWN = tmp_coords_jac_15_BLUE_DOWN*(jac_affine_0_0_BLUE_DOWN*jac_affine_2_2_BLUE_DOWN - jac_affine_0_2_BLUE_DOWN*jac_affine_2_0_BLUE_DOWN); - const real_t jac_affine_inv_1_2_BLUE_DOWN = tmp_coords_jac_15_BLUE_DOWN*(-jac_affine_0_0_BLUE_DOWN*jac_affine_1_2_BLUE_DOWN + jac_affine_0_2_BLUE_DOWN*jac_affine_1_0_BLUE_DOWN); - const real_t jac_affine_inv_2_0_BLUE_DOWN = tmp_coords_jac_15_BLUE_DOWN*(jac_affine_1_0_BLUE_DOWN*jac_affine_2_1_BLUE_DOWN - jac_affine_1_1_BLUE_DOWN*jac_affine_2_0_BLUE_DOWN); - const real_t jac_affine_inv_2_1_BLUE_DOWN = tmp_coords_jac_15_BLUE_DOWN*(-jac_affine_0_0_BLUE_DOWN*jac_affine_2_1_BLUE_DOWN + jac_affine_0_1_BLUE_DOWN*jac_affine_2_0_BLUE_DOWN); - const real_t jac_affine_inv_2_2_BLUE_DOWN = tmp_coords_jac_15_BLUE_DOWN*(jac_affine_0_0_BLUE_DOWN*jac_affine_1_1_BLUE_DOWN - jac_affine_0_1_BLUE_DOWN*jac_affine_1_0_BLUE_DOWN); for (int64_t ctr_2 = 0; ctr_2 < micro_edges_per_macro_edge; ctr_2 += 1) for (int64_t ctr_1 = 0; ctr_1 < -ctr_2 + micro_edges_per_macro_edge; ctr_1 += 1) for (int64_t ctr_0 = 0; ctr_0 < -ctr_1 - ctr_2 + micro_edges_per_macro_edge - 1; ctr_0 += 1) { - const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2)); - const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2)); - const real_t p_affine_0_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2)); - const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2 + 1)); - const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2 + 1)); - const real_t p_affine_1_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2 + 1)); - const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2 + 1)); - const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2 + 1)); - const real_t p_affine_2_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2 + 1)); - const real_t p_affine_3_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2 + 1)); - const real_t p_affine_3_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2 + 1)); - const real_t p_affine_3_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2 + 1)); + + const int64_t phantom_ctr_0 = ctr_0; + real_t _data_float_loop_ctr_array_dim_0[4]; + _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0; + _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1; + _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2; + _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3; + real_t _data_float_loop_ctr_array_dim_1[4]; + _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1; + real_t _data_float_loop_ctr_array_dim_2[4]; + _data_float_loop_ctr_array_dim_2[0] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[1] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[2] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[3] = (real_t) ctr_2; + + const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_0_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_1_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_2_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_3_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_3_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_3_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; const real_t rho_dof_0 = _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]; const real_t rho_dof_1 = _data_rhoVertex[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]; const real_t rho_dof_2 = _data_rhoVertex[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]; @@ -3066,28 +3092,8 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap::toMatrix_macro_ real_t q_acc_3_29 = 0.0; for (int64_t q = 0; q < 5; q += 1) { - const real_t tmp_qloop_0 = rayVertex_1 - refVertex_1; - const real_t tmp_qloop_1 = -rayVertex_0; - const real_t tmp_qloop_2 = -forVertex_0 - tmp_qloop_1; - const real_t tmp_qloop_3 = rayVertex_2 - thrVertex_2; - const real_t tmp_qloop_4 = tmp_qloop_2*tmp_qloop_3; - const real_t tmp_qloop_5 = rayVertex_2 - refVertex_2; - const real_t tmp_qloop_6 = rayVertex_1 - thrVertex_1; - const real_t tmp_qloop_7 = tmp_qloop_2*tmp_qloop_6; - const real_t tmp_qloop_8 = -rayVertex_1; - const real_t tmp_qloop_9 = -forVertex_1 - tmp_qloop_8; - const real_t tmp_qloop_10 = rayVertex_0 - thrVertex_0; - const real_t tmp_qloop_11 = rayVertex_0 - refVertex_0; - const real_t tmp_qloop_12 = -rayVertex_2; - const real_t tmp_qloop_13 = -forVertex_2 - tmp_qloop_12; - const real_t tmp_qloop_14 = tmp_qloop_3*tmp_qloop_9; - const real_t tmp_qloop_15 = tmp_qloop_10*tmp_qloop_13; - const real_t tmp_qloop_16 = -tmp_qloop_0*tmp_qloop_15 + tmp_qloop_0*tmp_qloop_4 + tmp_qloop_10*tmp_qloop_5*tmp_qloop_9 + tmp_qloop_11*tmp_qloop_13*tmp_qloop_6 - tmp_qloop_11*tmp_qloop_14 - tmp_qloop_5*tmp_qloop_7; - const real_t tmp_qloop_17 = 1.0 / (tmp_qloop_16); const real_t tmp_qloop_18 = p_affine_0_0 + (-p_affine_0_0 + p_affine_1_0)*_data_q_p_0[q] + (-p_affine_0_0 + p_affine_2_0)*_data_q_p_1[q] + (-p_affine_0_0 + p_affine_3_0)*_data_q_p_2[q]; const real_t tmp_qloop_19 = (tmp_qloop_18*tmp_qloop_18); - const real_t tmp_qloop_20 = radRayVertex*tmp_qloop_16; - const real_t tmp_qloop_21 = radRayVertex - radRefVertex; const real_t tmp_qloop_22 = p_affine_0_2 + (-p_affine_0_2 + p_affine_1_2)*_data_q_p_0[q] + (-p_affine_0_2 + p_affine_2_2)*_data_q_p_1[q] + (-p_affine_0_2 + p_affine_3_2)*_data_q_p_2[q]; const real_t tmp_qloop_23 = -tmp_qloop_12 - tmp_qloop_22; const real_t tmp_qloop_24 = -tmp_qloop_1 - tmp_qloop_18; @@ -3098,28 +3104,21 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap::toMatrix_macro_ const real_t tmp_qloop_29 = (tmp_qloop_25*tmp_qloop_25); const real_t tmp_qloop_30 = (tmp_qloop_22*tmp_qloop_22); const real_t tmp_qloop_31 = tmp_qloop_19 + tmp_qloop_29 + tmp_qloop_30; - const real_t tmp_qloop_32 = -tmp_qloop_13*tmp_qloop_6 + tmp_qloop_14; - const real_t tmp_qloop_33 = -tmp_qloop_21*tmp_qloop_32; const real_t tmp_qloop_34 = tmp_qloop_18*tmp_qloop_33; const real_t tmp_qloop_35 = tmp_qloop_28 + tmp_qloop_34; const real_t tmp_qloop_36 = tmp_qloop_17*(-tmp_qloop_19*tmp_qloop_28 + tmp_qloop_31*tmp_qloop_35); const real_t tmp_qloop_37 = pow(tmp_qloop_31, -1.5000000000000000); const real_t tmp_qloop_38 = tmp_qloop_37*1.0; - const real_t tmp_qloop_39 = tmp_qloop_10*tmp_qloop_13 - tmp_qloop_4; - const real_t tmp_qloop_40 = tmp_qloop_21*tmp_qloop_39; const real_t tmp_qloop_41 = tmp_qloop_31*tmp_qloop_40; const real_t tmp_qloop_42 = -tmp_qloop_25*tmp_qloop_28 - tmp_qloop_41; const real_t tmp_qloop_43 = tmp_qloop_17*tmp_qloop_38; const real_t tmp_qloop_44 = tmp_qloop_42*tmp_qloop_43; - const real_t tmp_qloop_45 = -tmp_qloop_10*tmp_qloop_9 + tmp_qloop_7; - const real_t tmp_qloop_46 = -tmp_qloop_21*tmp_qloop_45; const real_t tmp_qloop_47 = -tmp_qloop_28; const real_t tmp_qloop_48 = tmp_qloop_22*tmp_qloop_47; const real_t tmp_qloop_49 = tmp_qloop_31*tmp_qloop_46 + tmp_qloop_48; const real_t tmp_qloop_50 = tmp_qloop_43*tmp_qloop_49; const real_t tmp_qloop_51 = tmp_qloop_18*tmp_qloop_47 + tmp_qloop_31*tmp_qloop_33; const real_t tmp_qloop_52 = tmp_qloop_43*tmp_qloop_51; - const real_t tmp_qloop_53 = -tmp_qloop_21*tmp_qloop_39; const real_t tmp_qloop_54 = tmp_qloop_25*tmp_qloop_53 + tmp_qloop_28; const real_t tmp_qloop_55 = tmp_qloop_29*tmp_qloop_47 + tmp_qloop_31*tmp_qloop_54; const real_t tmp_qloop_56 = tmp_qloop_22*tmp_qloop_46; @@ -3127,11 +3126,9 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap::toMatrix_macro_ const real_t tmp_qloop_58 = tmp_qloop_30*tmp_qloop_47 + tmp_qloop_31*tmp_qloop_57; const real_t tmp_qloop_66 = pow(tmp_qloop_31, -2.5000000000000000)*3.0; const real_t tmp_qloop_67 = tmp_qloop_36*tmp_qloop_66; - const real_t tmp_qloop_68 = tmp_qloop_21*tmp_qloop_32; const real_t tmp_qloop_69 = tmp_qloop_18*2.0; const real_t tmp_qloop_70 = -tmp_qloop_41; const real_t tmp_qloop_71 = tmp_qloop_35*2.0; - const real_t tmp_qloop_72 = tmp_qloop_21*tmp_qloop_45; const real_t tmp_qloop_73 = -tmp_qloop_31*tmp_qloop_72; const real_t tmp_qloop_74 = -tmp_qloop_20 + tmp_qloop_27; const real_t tmp_qloop_75 = tmp_qloop_18*tmp_qloop_68 + tmp_qloop_34*2.0 + tmp_qloop_74; @@ -3193,14 +3190,7 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap::toMatrix_macro_ const real_t tmp_qloop_132 = tmp_qloop_109 - tmp_qloop_110 - tmp_qloop_112 - tmp_qloop_131; const real_t tmp_qloop_133 = tmp_qloop_103*4.0; const real_t tmp_qloop_134 = tmp_qloop_111 - tmp_qloop_114 - tmp_qloop_133; - const real_t tmp_qloop_135 = 1.0 / (rho_dof_0*tmp_qloop_115 + rho_dof_1*tmp_qloop_127 + rho_dof_2*tmp_qloop_128 + rho_dof_3*tmp_qloop_129 + rho_dof_4*tmp_qloop_110 + rho_dof_5*tmp_qloop_113 + rho_dof_6*tmp_qloop_112 + rho_dof_7*tmp_qloop_130 + rho_dof_8*tmp_qloop_132 + rho_dof_9*tmp_qloop_134)*_data_q_w[q]; - const real_t tmp_qloop_136 = tmp_qloop_135*(1.0 - _data_q_p_0[q] - _data_q_p_1[q] - _data_q_p_2[q]); - const real_t tmp_qloop_142 = tmp_qloop_135*_data_q_p_0[q]; - const real_t tmp_qloop_144 = tmp_qloop_135*_data_q_p_2[q]; - const real_t tmp_qloop_147 = tmp_qloop_135*_data_q_p_1[q]; const real_t tmp_qloop_158 = tmp_qloop_105*tmp_qloop_111; - const real_t tmp_qloop_159 = tmp_qloop_135*tmp_qloop_158; - const real_t tmp_qloop_161 = tmp_qloop_107*tmp_qloop_135; const real_t jac_blending_0_0 = tmp_qloop_36*tmp_qloop_38; const real_t jac_blending_0_1 = tmp_qloop_18*tmp_qloop_44; const real_t jac_blending_0_2 = tmp_qloop_18*tmp_qloop_50; @@ -3218,6 +3208,13 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap::toMatrix_macro_ const real_t tmp_qloop_64 = jac_blending_0_0*tmp_qloop_59 - jac_blending_0_0*tmp_qloop_61 + jac_blending_0_2*jac_blending_1_0*jac_blending_2_1 - jac_blending_1_0*tmp_qloop_62 + jac_blending_2_0*tmp_qloop_60 - jac_blending_2_0*tmp_qloop_63; const real_t tmp_qloop_65 = 1.0 / (tmp_qloop_64); const real_t abs_det_jac_blending = tmp_qloop_64; + const real_t tmp_qloop_135 = abs_det_jac_affine_BLUE_DOWN*abs_det_jac_blending*1.0 / (rho_dof_0*tmp_qloop_115 + rho_dof_1*tmp_qloop_127 + rho_dof_2*tmp_qloop_128 + rho_dof_3*tmp_qloop_129 + rho_dof_4*tmp_qloop_110 + rho_dof_5*tmp_qloop_113 + rho_dof_6*tmp_qloop_112 + rho_dof_7*tmp_qloop_130 + rho_dof_8*tmp_qloop_132 + rho_dof_9*tmp_qloop_134)*_data_q_w[q]; + const real_t tmp_qloop_136 = tmp_qloop_135*(1.0 - _data_q_p_0[q] - _data_q_p_1[q] - _data_q_p_2[q]); + const real_t tmp_qloop_142 = tmp_qloop_135*_data_q_p_0[q]; + const real_t tmp_qloop_144 = tmp_qloop_135*_data_q_p_2[q]; + const real_t tmp_qloop_147 = tmp_qloop_135*_data_q_p_1[q]; + const real_t tmp_qloop_159 = tmp_qloop_135*tmp_qloop_158; + const real_t tmp_qloop_161 = tmp_qloop_107*tmp_qloop_135; const real_t jac_blending_inv_0_0 = tmp_qloop_65*(tmp_qloop_59 - tmp_qloop_61); const real_t jac_blending_inv_0_1 = tmp_qloop_65*(jac_blending_0_2*jac_blending_2_1 - tmp_qloop_62); const real_t jac_blending_inv_0_2 = tmp_qloop_65*(tmp_qloop_60 - tmp_qloop_63); @@ -3806,67 +3803,87 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap::toMatrix_macro_ mat->addValues( _data_rowIdx, _data_colIdx, _data_mat ); } } + const real_t tmp_coords_jac_0_GREEN_UP = 1.0 / (micro_edges_per_macro_edge_float)*1.0; + const real_t tmp_coords_jac_1_GREEN_UP = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0); + const real_t tmp_coords_jac_2_GREEN_UP = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1); + const real_t tmp_coords_jac_3_GREEN_UP = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2); + const real_t tmp_coords_jac_4_GREEN_UP = tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0); + const real_t tmp_coords_jac_5_GREEN_UP = tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1); + const real_t tmp_coords_jac_6_GREEN_UP = tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2); + const real_t p_affine_const_0_0_GREEN_UP = tmp_coords_jac_1_GREEN_UP; + const real_t p_affine_const_0_1_GREEN_UP = tmp_coords_jac_2_GREEN_UP; + const real_t p_affine_const_0_2_GREEN_UP = tmp_coords_jac_3_GREEN_UP; + const real_t p_affine_const_1_0_GREEN_UP = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0); + const real_t p_affine_const_1_1_GREEN_UP = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1); + const real_t p_affine_const_1_2_GREEN_UP = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2); + const real_t p_affine_const_2_0_GREEN_UP = macro_vertex_coord_id_0comp0 + tmp_coords_jac_4_GREEN_UP; + const real_t p_affine_const_2_1_GREEN_UP = macro_vertex_coord_id_0comp1 + tmp_coords_jac_5_GREEN_UP; + const real_t p_affine_const_2_2_GREEN_UP = macro_vertex_coord_id_0comp2 + tmp_coords_jac_6_GREEN_UP; + const real_t p_affine_const_3_0_GREEN_UP = tmp_coords_jac_1_GREEN_UP + tmp_coords_jac_4_GREEN_UP; + const real_t p_affine_const_3_1_GREEN_UP = tmp_coords_jac_2_GREEN_UP + tmp_coords_jac_5_GREEN_UP; + const real_t p_affine_const_3_2_GREEN_UP = tmp_coords_jac_3_GREEN_UP + tmp_coords_jac_6_GREEN_UP; + const real_t jac_affine_0_0_GREEN_UP = -p_affine_const_0_0_GREEN_UP + p_affine_const_1_0_GREEN_UP; + const real_t jac_affine_0_1_GREEN_UP = -p_affine_const_0_0_GREEN_UP + p_affine_const_2_0_GREEN_UP; + const real_t jac_affine_0_2_GREEN_UP = -p_affine_const_0_0_GREEN_UP + p_affine_const_3_0_GREEN_UP; + const real_t jac_affine_1_0_GREEN_UP = -p_affine_const_0_1_GREEN_UP + p_affine_const_1_1_GREEN_UP; + const real_t jac_affine_1_1_GREEN_UP = -p_affine_const_0_1_GREEN_UP + p_affine_const_2_1_GREEN_UP; + const real_t tmp_coords_jac_11_GREEN_UP = jac_affine_0_2_GREEN_UP*jac_affine_1_1_GREEN_UP; + const real_t jac_affine_1_2_GREEN_UP = -p_affine_const_0_1_GREEN_UP + p_affine_const_3_1_GREEN_UP; + const real_t tmp_coords_jac_9_GREEN_UP = jac_affine_0_1_GREEN_UP*jac_affine_1_2_GREEN_UP; + const real_t jac_affine_2_0_GREEN_UP = -p_affine_const_0_2_GREEN_UP + p_affine_const_1_2_GREEN_UP; + const real_t jac_affine_2_1_GREEN_UP = -p_affine_const_0_2_GREEN_UP + p_affine_const_2_2_GREEN_UP; + const real_t tmp_coords_jac_8_GREEN_UP = jac_affine_1_2_GREEN_UP*jac_affine_2_1_GREEN_UP; + const real_t jac_affine_2_2_GREEN_UP = -p_affine_const_0_2_GREEN_UP + p_affine_const_3_2_GREEN_UP; + const real_t tmp_coords_jac_7_GREEN_UP = jac_affine_1_1_GREEN_UP*jac_affine_2_2_GREEN_UP; + const real_t tmp_coords_jac_10_GREEN_UP = jac_affine_0_1_GREEN_UP*jac_affine_2_2_GREEN_UP; + const real_t tmp_coords_jac_12_GREEN_UP = jac_affine_0_0_GREEN_UP*tmp_coords_jac_7_GREEN_UP - jac_affine_0_0_GREEN_UP*tmp_coords_jac_8_GREEN_UP + jac_affine_0_2_GREEN_UP*jac_affine_1_0_GREEN_UP*jac_affine_2_1_GREEN_UP - jac_affine_1_0_GREEN_UP*tmp_coords_jac_10_GREEN_UP - jac_affine_2_0_GREEN_UP*tmp_coords_jac_11_GREEN_UP + jac_affine_2_0_GREEN_UP*tmp_coords_jac_9_GREEN_UP; + const real_t tmp_coords_jac_13_GREEN_UP = 1.0 / (tmp_coords_jac_12_GREEN_UP); + const real_t jac_affine_inv_0_0_GREEN_UP = tmp_coords_jac_13_GREEN_UP*(tmp_coords_jac_7_GREEN_UP - tmp_coords_jac_8_GREEN_UP); + const real_t jac_affine_inv_0_1_GREEN_UP = tmp_coords_jac_13_GREEN_UP*(jac_affine_0_2_GREEN_UP*jac_affine_2_1_GREEN_UP - tmp_coords_jac_10_GREEN_UP); + const real_t jac_affine_inv_0_2_GREEN_UP = tmp_coords_jac_13_GREEN_UP*(-tmp_coords_jac_11_GREEN_UP + tmp_coords_jac_9_GREEN_UP); + const real_t jac_affine_inv_1_0_GREEN_UP = tmp_coords_jac_13_GREEN_UP*(-jac_affine_1_0_GREEN_UP*jac_affine_2_2_GREEN_UP + jac_affine_1_2_GREEN_UP*jac_affine_2_0_GREEN_UP); + const real_t jac_affine_inv_1_1_GREEN_UP = tmp_coords_jac_13_GREEN_UP*(jac_affine_0_0_GREEN_UP*jac_affine_2_2_GREEN_UP - jac_affine_0_2_GREEN_UP*jac_affine_2_0_GREEN_UP); + const real_t jac_affine_inv_1_2_GREEN_UP = tmp_coords_jac_13_GREEN_UP*(-jac_affine_0_0_GREEN_UP*jac_affine_1_2_GREEN_UP + jac_affine_0_2_GREEN_UP*jac_affine_1_0_GREEN_UP); + const real_t jac_affine_inv_2_0_GREEN_UP = tmp_coords_jac_13_GREEN_UP*(jac_affine_1_0_GREEN_UP*jac_affine_2_1_GREEN_UP - jac_affine_1_1_GREEN_UP*jac_affine_2_0_GREEN_UP); + const real_t jac_affine_inv_2_1_GREEN_UP = tmp_coords_jac_13_GREEN_UP*(-jac_affine_0_0_GREEN_UP*jac_affine_2_1_GREEN_UP + jac_affine_0_1_GREEN_UP*jac_affine_2_0_GREEN_UP); + const real_t jac_affine_inv_2_2_GREEN_UP = tmp_coords_jac_13_GREEN_UP*(jac_affine_0_0_GREEN_UP*jac_affine_1_1_GREEN_UP - jac_affine_0_1_GREEN_UP*jac_affine_1_0_GREEN_UP); + const real_t abs_det_jac_affine_GREEN_UP = abs(tmp_coords_jac_12_GREEN_UP); { /* CellType.GREEN_UP */ - const real_t tmp_coords_jac_0_GREEN_UP = 1.0 / (micro_edges_per_macro_edge_float)*1.0; - const real_t tmp_coords_jac_1_GREEN_UP = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0); - const real_t tmp_coords_jac_2_GREEN_UP = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1); - const real_t tmp_coords_jac_3_GREEN_UP = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2); - const real_t tmp_coords_jac_4_GREEN_UP = tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0); - const real_t tmp_coords_jac_5_GREEN_UP = tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1); - const real_t tmp_coords_jac_6_GREEN_UP = tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2); - const real_t p_affine_const_0_0_GREEN_UP = tmp_coords_jac_1_GREEN_UP; - const real_t p_affine_const_0_1_GREEN_UP = tmp_coords_jac_2_GREEN_UP; - const real_t p_affine_const_0_2_GREEN_UP = tmp_coords_jac_3_GREEN_UP; - const real_t p_affine_const_1_0_GREEN_UP = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0); - const real_t p_affine_const_1_1_GREEN_UP = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1); - const real_t p_affine_const_1_2_GREEN_UP = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2); - const real_t p_affine_const_2_0_GREEN_UP = macro_vertex_coord_id_0comp0 + tmp_coords_jac_4_GREEN_UP; - const real_t p_affine_const_2_1_GREEN_UP = macro_vertex_coord_id_0comp1 + tmp_coords_jac_5_GREEN_UP; - const real_t p_affine_const_2_2_GREEN_UP = macro_vertex_coord_id_0comp2 + tmp_coords_jac_6_GREEN_UP; - const real_t p_affine_const_3_0_GREEN_UP = tmp_coords_jac_1_GREEN_UP + tmp_coords_jac_4_GREEN_UP; - const real_t p_affine_const_3_1_GREEN_UP = tmp_coords_jac_2_GREEN_UP + tmp_coords_jac_5_GREEN_UP; - const real_t p_affine_const_3_2_GREEN_UP = tmp_coords_jac_3_GREEN_UP + tmp_coords_jac_6_GREEN_UP; - const real_t jac_affine_0_0_GREEN_UP = -p_affine_const_0_0_GREEN_UP + p_affine_const_1_0_GREEN_UP; - const real_t jac_affine_0_1_GREEN_UP = -p_affine_const_0_0_GREEN_UP + p_affine_const_2_0_GREEN_UP; - const real_t jac_affine_0_2_GREEN_UP = -p_affine_const_0_0_GREEN_UP + p_affine_const_3_0_GREEN_UP; - const real_t jac_affine_1_0_GREEN_UP = -p_affine_const_0_1_GREEN_UP + p_affine_const_1_1_GREEN_UP; - const real_t jac_affine_1_1_GREEN_UP = -p_affine_const_0_1_GREEN_UP + p_affine_const_2_1_GREEN_UP; - const real_t tmp_coords_jac_11_GREEN_UP = jac_affine_0_2_GREEN_UP*jac_affine_1_1_GREEN_UP; - const real_t jac_affine_1_2_GREEN_UP = -p_affine_const_0_1_GREEN_UP + p_affine_const_3_1_GREEN_UP; - const real_t tmp_coords_jac_9_GREEN_UP = jac_affine_0_1_GREEN_UP*jac_affine_1_2_GREEN_UP; - const real_t jac_affine_2_0_GREEN_UP = -p_affine_const_0_2_GREEN_UP + p_affine_const_1_2_GREEN_UP; - const real_t jac_affine_2_1_GREEN_UP = -p_affine_const_0_2_GREEN_UP + p_affine_const_2_2_GREEN_UP; - const real_t tmp_coords_jac_8_GREEN_UP = jac_affine_1_2_GREEN_UP*jac_affine_2_1_GREEN_UP; - const real_t jac_affine_2_2_GREEN_UP = -p_affine_const_0_2_GREEN_UP + p_affine_const_3_2_GREEN_UP; - const real_t tmp_coords_jac_7_GREEN_UP = jac_affine_1_1_GREEN_UP*jac_affine_2_2_GREEN_UP; - const real_t tmp_coords_jac_10_GREEN_UP = jac_affine_0_1_GREEN_UP*jac_affine_2_2_GREEN_UP; - const real_t tmp_coords_jac_12_GREEN_UP = 1.0 / (jac_affine_0_0_GREEN_UP*tmp_coords_jac_7_GREEN_UP - jac_affine_0_0_GREEN_UP*tmp_coords_jac_8_GREEN_UP + jac_affine_0_2_GREEN_UP*jac_affine_1_0_GREEN_UP*jac_affine_2_1_GREEN_UP - jac_affine_1_0_GREEN_UP*tmp_coords_jac_10_GREEN_UP - jac_affine_2_0_GREEN_UP*tmp_coords_jac_11_GREEN_UP + jac_affine_2_0_GREEN_UP*tmp_coords_jac_9_GREEN_UP); - const real_t jac_affine_inv_0_0_GREEN_UP = tmp_coords_jac_12_GREEN_UP*(tmp_coords_jac_7_GREEN_UP - tmp_coords_jac_8_GREEN_UP); - const real_t jac_affine_inv_0_1_GREEN_UP = tmp_coords_jac_12_GREEN_UP*(jac_affine_0_2_GREEN_UP*jac_affine_2_1_GREEN_UP - tmp_coords_jac_10_GREEN_UP); - const real_t jac_affine_inv_0_2_GREEN_UP = tmp_coords_jac_12_GREEN_UP*(-tmp_coords_jac_11_GREEN_UP + tmp_coords_jac_9_GREEN_UP); - const real_t jac_affine_inv_1_0_GREEN_UP = tmp_coords_jac_12_GREEN_UP*(-jac_affine_1_0_GREEN_UP*jac_affine_2_2_GREEN_UP + jac_affine_1_2_GREEN_UP*jac_affine_2_0_GREEN_UP); - const real_t jac_affine_inv_1_1_GREEN_UP = tmp_coords_jac_12_GREEN_UP*(jac_affine_0_0_GREEN_UP*jac_affine_2_2_GREEN_UP - jac_affine_0_2_GREEN_UP*jac_affine_2_0_GREEN_UP); - const real_t jac_affine_inv_1_2_GREEN_UP = tmp_coords_jac_12_GREEN_UP*(-jac_affine_0_0_GREEN_UP*jac_affine_1_2_GREEN_UP + jac_affine_0_2_GREEN_UP*jac_affine_1_0_GREEN_UP); - const real_t jac_affine_inv_2_0_GREEN_UP = tmp_coords_jac_12_GREEN_UP*(jac_affine_1_0_GREEN_UP*jac_affine_2_1_GREEN_UP - jac_affine_1_1_GREEN_UP*jac_affine_2_0_GREEN_UP); - const real_t jac_affine_inv_2_1_GREEN_UP = tmp_coords_jac_12_GREEN_UP*(-jac_affine_0_0_GREEN_UP*jac_affine_2_1_GREEN_UP + jac_affine_0_1_GREEN_UP*jac_affine_2_0_GREEN_UP); - const real_t jac_affine_inv_2_2_GREEN_UP = tmp_coords_jac_12_GREEN_UP*(jac_affine_0_0_GREEN_UP*jac_affine_1_1_GREEN_UP - jac_affine_0_1_GREEN_UP*jac_affine_1_0_GREEN_UP); for (int64_t ctr_2 = 0; ctr_2 < micro_edges_per_macro_edge; ctr_2 += 1) for (int64_t ctr_1 = 0; ctr_1 < -ctr_2 + micro_edges_per_macro_edge; ctr_1 += 1) for (int64_t ctr_0 = 0; ctr_0 < -ctr_1 - ctr_2 + micro_edges_per_macro_edge - 1; ctr_0 += 1) { - const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2)); - const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2)); - const real_t p_affine_0_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2)); - const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2)); - const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2)); - const real_t p_affine_1_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2)); - const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2 + 1)); - const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2 + 1)); - const real_t p_affine_2_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2 + 1)); - const real_t p_affine_3_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2 + 1)); - const real_t p_affine_3_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2 + 1)); - const real_t p_affine_3_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2 + 1)); + + const int64_t phantom_ctr_0 = ctr_0; + real_t _data_float_loop_ctr_array_dim_0[4]; + _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0; + _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1; + _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2; + _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3; + real_t _data_float_loop_ctr_array_dim_1[4]; + _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1; + real_t _data_float_loop_ctr_array_dim_2[4]; + _data_float_loop_ctr_array_dim_2[0] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[1] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[2] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[3] = (real_t) ctr_2; + + const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_0_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_2_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_3_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_3_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_3_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; const real_t rho_dof_0 = _data_rhoVertex[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]; const real_t rho_dof_1 = _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]; const real_t rho_dof_2 = _data_rhoVertex[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]; @@ -3999,28 +4016,8 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap::toMatrix_macro_ real_t q_acc_3_29 = 0.0; for (int64_t q = 0; q < 5; q += 1) { - const real_t tmp_qloop_0 = rayVertex_1 - refVertex_1; - const real_t tmp_qloop_1 = -rayVertex_0; - const real_t tmp_qloop_2 = -forVertex_0 - tmp_qloop_1; - const real_t tmp_qloop_3 = rayVertex_2 - thrVertex_2; - const real_t tmp_qloop_4 = tmp_qloop_2*tmp_qloop_3; - const real_t tmp_qloop_5 = rayVertex_2 - refVertex_2; - const real_t tmp_qloop_6 = rayVertex_1 - thrVertex_1; - const real_t tmp_qloop_7 = tmp_qloop_2*tmp_qloop_6; - const real_t tmp_qloop_8 = -rayVertex_1; - const real_t tmp_qloop_9 = -forVertex_1 - tmp_qloop_8; - const real_t tmp_qloop_10 = rayVertex_0 - thrVertex_0; - const real_t tmp_qloop_11 = rayVertex_0 - refVertex_0; - const real_t tmp_qloop_12 = -rayVertex_2; - const real_t tmp_qloop_13 = -forVertex_2 - tmp_qloop_12; - const real_t tmp_qloop_14 = tmp_qloop_3*tmp_qloop_9; - const real_t tmp_qloop_15 = tmp_qloop_10*tmp_qloop_13; - const real_t tmp_qloop_16 = -tmp_qloop_0*tmp_qloop_15 + tmp_qloop_0*tmp_qloop_4 + tmp_qloop_10*tmp_qloop_5*tmp_qloop_9 + tmp_qloop_11*tmp_qloop_13*tmp_qloop_6 - tmp_qloop_11*tmp_qloop_14 - tmp_qloop_5*tmp_qloop_7; - const real_t tmp_qloop_17 = 1.0 / (tmp_qloop_16); const real_t tmp_qloop_18 = p_affine_0_0 + (-p_affine_0_0 + p_affine_1_0)*_data_q_p_0[q] + (-p_affine_0_0 + p_affine_2_0)*_data_q_p_1[q] + (-p_affine_0_0 + p_affine_3_0)*_data_q_p_2[q]; const real_t tmp_qloop_19 = (tmp_qloop_18*tmp_qloop_18); - const real_t tmp_qloop_20 = radRayVertex*tmp_qloop_16; - const real_t tmp_qloop_21 = radRayVertex - radRefVertex; const real_t tmp_qloop_22 = p_affine_0_2 + (-p_affine_0_2 + p_affine_1_2)*_data_q_p_0[q] + (-p_affine_0_2 + p_affine_2_2)*_data_q_p_1[q] + (-p_affine_0_2 + p_affine_3_2)*_data_q_p_2[q]; const real_t tmp_qloop_23 = -tmp_qloop_12 - tmp_qloop_22; const real_t tmp_qloop_24 = -tmp_qloop_1 - tmp_qloop_18; @@ -4031,28 +4028,21 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap::toMatrix_macro_ const real_t tmp_qloop_29 = (tmp_qloop_25*tmp_qloop_25); const real_t tmp_qloop_30 = (tmp_qloop_22*tmp_qloop_22); const real_t tmp_qloop_31 = tmp_qloop_19 + tmp_qloop_29 + tmp_qloop_30; - const real_t tmp_qloop_32 = -tmp_qloop_13*tmp_qloop_6 + tmp_qloop_14; - const real_t tmp_qloop_33 = -tmp_qloop_21*tmp_qloop_32; const real_t tmp_qloop_34 = tmp_qloop_18*tmp_qloop_33; const real_t tmp_qloop_35 = tmp_qloop_28 + tmp_qloop_34; const real_t tmp_qloop_36 = tmp_qloop_17*(-tmp_qloop_19*tmp_qloop_28 + tmp_qloop_31*tmp_qloop_35); const real_t tmp_qloop_37 = pow(tmp_qloop_31, -1.5000000000000000); const real_t tmp_qloop_38 = tmp_qloop_37*1.0; - const real_t tmp_qloop_39 = tmp_qloop_10*tmp_qloop_13 - tmp_qloop_4; - const real_t tmp_qloop_40 = tmp_qloop_21*tmp_qloop_39; const real_t tmp_qloop_41 = tmp_qloop_31*tmp_qloop_40; const real_t tmp_qloop_42 = -tmp_qloop_25*tmp_qloop_28 - tmp_qloop_41; const real_t tmp_qloop_43 = tmp_qloop_17*tmp_qloop_38; const real_t tmp_qloop_44 = tmp_qloop_42*tmp_qloop_43; - const real_t tmp_qloop_45 = -tmp_qloop_10*tmp_qloop_9 + tmp_qloop_7; - const real_t tmp_qloop_46 = -tmp_qloop_21*tmp_qloop_45; const real_t tmp_qloop_47 = -tmp_qloop_28; const real_t tmp_qloop_48 = tmp_qloop_22*tmp_qloop_47; const real_t tmp_qloop_49 = tmp_qloop_31*tmp_qloop_46 + tmp_qloop_48; const real_t tmp_qloop_50 = tmp_qloop_43*tmp_qloop_49; const real_t tmp_qloop_51 = tmp_qloop_18*tmp_qloop_47 + tmp_qloop_31*tmp_qloop_33; const real_t tmp_qloop_52 = tmp_qloop_43*tmp_qloop_51; - const real_t tmp_qloop_53 = -tmp_qloop_21*tmp_qloop_39; const real_t tmp_qloop_54 = tmp_qloop_25*tmp_qloop_53 + tmp_qloop_28; const real_t tmp_qloop_55 = tmp_qloop_29*tmp_qloop_47 + tmp_qloop_31*tmp_qloop_54; const real_t tmp_qloop_56 = tmp_qloop_22*tmp_qloop_46; @@ -4060,11 +4050,9 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap::toMatrix_macro_ const real_t tmp_qloop_58 = tmp_qloop_30*tmp_qloop_47 + tmp_qloop_31*tmp_qloop_57; const real_t tmp_qloop_66 = pow(tmp_qloop_31, -2.5000000000000000)*3.0; const real_t tmp_qloop_67 = tmp_qloop_36*tmp_qloop_66; - const real_t tmp_qloop_68 = tmp_qloop_21*tmp_qloop_32; const real_t tmp_qloop_69 = tmp_qloop_18*2.0; const real_t tmp_qloop_70 = -tmp_qloop_41; const real_t tmp_qloop_71 = tmp_qloop_35*2.0; - const real_t tmp_qloop_72 = tmp_qloop_21*tmp_qloop_45; const real_t tmp_qloop_73 = -tmp_qloop_31*tmp_qloop_72; const real_t tmp_qloop_74 = -tmp_qloop_20 + tmp_qloop_27; const real_t tmp_qloop_75 = tmp_qloop_18*tmp_qloop_68 + tmp_qloop_34*2.0 + tmp_qloop_74; @@ -4126,14 +4114,7 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap::toMatrix_macro_ const real_t tmp_qloop_132 = tmp_qloop_109 - tmp_qloop_110 - tmp_qloop_112 - tmp_qloop_131; const real_t tmp_qloop_133 = tmp_qloop_103*4.0; const real_t tmp_qloop_134 = tmp_qloop_111 - tmp_qloop_114 - tmp_qloop_133; - const real_t tmp_qloop_135 = 1.0 / (rho_dof_0*tmp_qloop_115 + rho_dof_1*tmp_qloop_127 + rho_dof_2*tmp_qloop_128 + rho_dof_3*tmp_qloop_129 + rho_dof_4*tmp_qloop_110 + rho_dof_5*tmp_qloop_113 + rho_dof_6*tmp_qloop_112 + rho_dof_7*tmp_qloop_130 + rho_dof_8*tmp_qloop_132 + rho_dof_9*tmp_qloop_134)*_data_q_w[q]; - const real_t tmp_qloop_136 = tmp_qloop_135*(1.0 - _data_q_p_0[q] - _data_q_p_1[q] - _data_q_p_2[q]); - const real_t tmp_qloop_142 = tmp_qloop_135*_data_q_p_0[q]; - const real_t tmp_qloop_144 = tmp_qloop_135*_data_q_p_2[q]; - const real_t tmp_qloop_147 = tmp_qloop_135*_data_q_p_1[q]; const real_t tmp_qloop_158 = tmp_qloop_105*tmp_qloop_111; - const real_t tmp_qloop_159 = tmp_qloop_135*tmp_qloop_158; - const real_t tmp_qloop_161 = tmp_qloop_107*tmp_qloop_135; const real_t jac_blending_0_0 = tmp_qloop_36*tmp_qloop_38; const real_t jac_blending_0_1 = tmp_qloop_18*tmp_qloop_44; const real_t jac_blending_0_2 = tmp_qloop_18*tmp_qloop_50; @@ -4151,6 +4132,13 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap::toMatrix_macro_ const real_t tmp_qloop_64 = jac_blending_0_0*tmp_qloop_59 - jac_blending_0_0*tmp_qloop_61 + jac_blending_0_2*jac_blending_1_0*jac_blending_2_1 - jac_blending_1_0*tmp_qloop_62 + jac_blending_2_0*tmp_qloop_60 - jac_blending_2_0*tmp_qloop_63; const real_t tmp_qloop_65 = 1.0 / (tmp_qloop_64); const real_t abs_det_jac_blending = tmp_qloop_64; + const real_t tmp_qloop_135 = abs_det_jac_affine_GREEN_UP*abs_det_jac_blending*1.0 / (rho_dof_0*tmp_qloop_115 + rho_dof_1*tmp_qloop_127 + rho_dof_2*tmp_qloop_128 + rho_dof_3*tmp_qloop_129 + rho_dof_4*tmp_qloop_110 + rho_dof_5*tmp_qloop_113 + rho_dof_6*tmp_qloop_112 + rho_dof_7*tmp_qloop_130 + rho_dof_8*tmp_qloop_132 + rho_dof_9*tmp_qloop_134)*_data_q_w[q]; + const real_t tmp_qloop_136 = tmp_qloop_135*(1.0 - _data_q_p_0[q] - _data_q_p_1[q] - _data_q_p_2[q]); + const real_t tmp_qloop_142 = tmp_qloop_135*_data_q_p_0[q]; + const real_t tmp_qloop_144 = tmp_qloop_135*_data_q_p_2[q]; + const real_t tmp_qloop_147 = tmp_qloop_135*_data_q_p_1[q]; + const real_t tmp_qloop_159 = tmp_qloop_135*tmp_qloop_158; + const real_t tmp_qloop_161 = tmp_qloop_107*tmp_qloop_135; const real_t jac_blending_inv_0_0 = tmp_qloop_65*(tmp_qloop_59 - tmp_qloop_61); const real_t jac_blending_inv_0_1 = tmp_qloop_65*(jac_blending_0_2*jac_blending_2_1 - tmp_qloop_62); const real_t jac_blending_inv_0_2 = tmp_qloop_65*(tmp_qloop_60 - tmp_qloop_63); @@ -4739,70 +4727,90 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap::toMatrix_macro_ mat->addValues( _data_rowIdx, _data_colIdx, _data_mat ); } } + const real_t tmp_coords_jac_0_GREEN_DOWN = 1.0 / (micro_edges_per_macro_edge_float)*1.0; + const real_t tmp_coords_jac_1_GREEN_DOWN = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0); + const real_t tmp_coords_jac_2_GREEN_DOWN = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1); + const real_t tmp_coords_jac_3_GREEN_DOWN = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2); + const real_t tmp_coords_jac_4_GREEN_DOWN = tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0); + const real_t tmp_coords_jac_5_GREEN_DOWN = tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1); + const real_t tmp_coords_jac_6_GREEN_DOWN = tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2); + const real_t tmp_coords_jac_7_GREEN_DOWN = tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0); + const real_t tmp_coords_jac_8_GREEN_DOWN = tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1); + const real_t tmp_coords_jac_9_GREEN_DOWN = tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2); + const real_t p_affine_const_0_0_GREEN_DOWN = tmp_coords_jac_1_GREEN_DOWN; + const real_t p_affine_const_0_1_GREEN_DOWN = tmp_coords_jac_2_GREEN_DOWN; + const real_t p_affine_const_0_2_GREEN_DOWN = tmp_coords_jac_3_GREEN_DOWN; + const real_t p_affine_const_1_0_GREEN_DOWN = tmp_coords_jac_1_GREEN_DOWN + tmp_coords_jac_4_GREEN_DOWN; + const real_t p_affine_const_1_1_GREEN_DOWN = tmp_coords_jac_2_GREEN_DOWN + tmp_coords_jac_5_GREEN_DOWN; + const real_t p_affine_const_1_2_GREEN_DOWN = tmp_coords_jac_3_GREEN_DOWN + tmp_coords_jac_6_GREEN_DOWN; + const real_t p_affine_const_2_0_GREEN_DOWN = macro_vertex_coord_id_0comp0 + tmp_coords_jac_4_GREEN_DOWN + tmp_coords_jac_7_GREEN_DOWN; + const real_t p_affine_const_2_1_GREEN_DOWN = macro_vertex_coord_id_0comp1 + tmp_coords_jac_5_GREEN_DOWN + tmp_coords_jac_8_GREEN_DOWN; + const real_t p_affine_const_2_2_GREEN_DOWN = macro_vertex_coord_id_0comp2 + tmp_coords_jac_6_GREEN_DOWN + tmp_coords_jac_9_GREEN_DOWN; + const real_t p_affine_const_3_0_GREEN_DOWN = tmp_coords_jac_1_GREEN_DOWN + tmp_coords_jac_7_GREEN_DOWN; + const real_t p_affine_const_3_1_GREEN_DOWN = tmp_coords_jac_2_GREEN_DOWN + tmp_coords_jac_8_GREEN_DOWN; + const real_t p_affine_const_3_2_GREEN_DOWN = tmp_coords_jac_3_GREEN_DOWN + tmp_coords_jac_9_GREEN_DOWN; + const real_t jac_affine_0_0_GREEN_DOWN = -p_affine_const_0_0_GREEN_DOWN + p_affine_const_1_0_GREEN_DOWN; + const real_t jac_affine_0_1_GREEN_DOWN = -p_affine_const_0_0_GREEN_DOWN + p_affine_const_2_0_GREEN_DOWN; + const real_t jac_affine_0_2_GREEN_DOWN = -p_affine_const_0_0_GREEN_DOWN + p_affine_const_3_0_GREEN_DOWN; + const real_t jac_affine_1_0_GREEN_DOWN = -p_affine_const_0_1_GREEN_DOWN + p_affine_const_1_1_GREEN_DOWN; + const real_t jac_affine_1_1_GREEN_DOWN = -p_affine_const_0_1_GREEN_DOWN + p_affine_const_2_1_GREEN_DOWN; + const real_t tmp_coords_jac_14_GREEN_DOWN = jac_affine_0_2_GREEN_DOWN*jac_affine_1_1_GREEN_DOWN; + const real_t jac_affine_1_2_GREEN_DOWN = -p_affine_const_0_1_GREEN_DOWN + p_affine_const_3_1_GREEN_DOWN; + const real_t tmp_coords_jac_12_GREEN_DOWN = jac_affine_0_1_GREEN_DOWN*jac_affine_1_2_GREEN_DOWN; + const real_t jac_affine_2_0_GREEN_DOWN = -p_affine_const_0_2_GREEN_DOWN + p_affine_const_1_2_GREEN_DOWN; + const real_t jac_affine_2_1_GREEN_DOWN = -p_affine_const_0_2_GREEN_DOWN + p_affine_const_2_2_GREEN_DOWN; + const real_t tmp_coords_jac_11_GREEN_DOWN = jac_affine_1_2_GREEN_DOWN*jac_affine_2_1_GREEN_DOWN; + const real_t jac_affine_2_2_GREEN_DOWN = -p_affine_const_0_2_GREEN_DOWN + p_affine_const_3_2_GREEN_DOWN; + const real_t tmp_coords_jac_10_GREEN_DOWN = jac_affine_1_1_GREEN_DOWN*jac_affine_2_2_GREEN_DOWN; + const real_t tmp_coords_jac_13_GREEN_DOWN = jac_affine_0_1_GREEN_DOWN*jac_affine_2_2_GREEN_DOWN; + const real_t tmp_coords_jac_15_GREEN_DOWN = jac_affine_0_0_GREEN_DOWN*tmp_coords_jac_10_GREEN_DOWN - jac_affine_0_0_GREEN_DOWN*tmp_coords_jac_11_GREEN_DOWN + jac_affine_0_2_GREEN_DOWN*jac_affine_1_0_GREEN_DOWN*jac_affine_2_1_GREEN_DOWN - jac_affine_1_0_GREEN_DOWN*tmp_coords_jac_13_GREEN_DOWN + jac_affine_2_0_GREEN_DOWN*tmp_coords_jac_12_GREEN_DOWN - jac_affine_2_0_GREEN_DOWN*tmp_coords_jac_14_GREEN_DOWN; + const real_t tmp_coords_jac_16_GREEN_DOWN = 1.0 / (tmp_coords_jac_15_GREEN_DOWN); + const real_t jac_affine_inv_0_0_GREEN_DOWN = tmp_coords_jac_16_GREEN_DOWN*(tmp_coords_jac_10_GREEN_DOWN - tmp_coords_jac_11_GREEN_DOWN); + const real_t jac_affine_inv_0_1_GREEN_DOWN = tmp_coords_jac_16_GREEN_DOWN*(jac_affine_0_2_GREEN_DOWN*jac_affine_2_1_GREEN_DOWN - tmp_coords_jac_13_GREEN_DOWN); + const real_t jac_affine_inv_0_2_GREEN_DOWN = tmp_coords_jac_16_GREEN_DOWN*(tmp_coords_jac_12_GREEN_DOWN - tmp_coords_jac_14_GREEN_DOWN); + const real_t jac_affine_inv_1_0_GREEN_DOWN = tmp_coords_jac_16_GREEN_DOWN*(-jac_affine_1_0_GREEN_DOWN*jac_affine_2_2_GREEN_DOWN + jac_affine_1_2_GREEN_DOWN*jac_affine_2_0_GREEN_DOWN); + const real_t jac_affine_inv_1_1_GREEN_DOWN = tmp_coords_jac_16_GREEN_DOWN*(jac_affine_0_0_GREEN_DOWN*jac_affine_2_2_GREEN_DOWN - jac_affine_0_2_GREEN_DOWN*jac_affine_2_0_GREEN_DOWN); + const real_t jac_affine_inv_1_2_GREEN_DOWN = tmp_coords_jac_16_GREEN_DOWN*(-jac_affine_0_0_GREEN_DOWN*jac_affine_1_2_GREEN_DOWN + jac_affine_0_2_GREEN_DOWN*jac_affine_1_0_GREEN_DOWN); + const real_t jac_affine_inv_2_0_GREEN_DOWN = tmp_coords_jac_16_GREEN_DOWN*(jac_affine_1_0_GREEN_DOWN*jac_affine_2_1_GREEN_DOWN - jac_affine_1_1_GREEN_DOWN*jac_affine_2_0_GREEN_DOWN); + const real_t jac_affine_inv_2_1_GREEN_DOWN = tmp_coords_jac_16_GREEN_DOWN*(-jac_affine_0_0_GREEN_DOWN*jac_affine_2_1_GREEN_DOWN + jac_affine_0_1_GREEN_DOWN*jac_affine_2_0_GREEN_DOWN); + const real_t jac_affine_inv_2_2_GREEN_DOWN = tmp_coords_jac_16_GREEN_DOWN*(jac_affine_0_0_GREEN_DOWN*jac_affine_1_1_GREEN_DOWN - jac_affine_0_1_GREEN_DOWN*jac_affine_1_0_GREEN_DOWN); + const real_t abs_det_jac_affine_GREEN_DOWN = abs(tmp_coords_jac_15_GREEN_DOWN); { /* CellType.GREEN_DOWN */ - const real_t tmp_coords_jac_0_GREEN_DOWN = 1.0 / (micro_edges_per_macro_edge_float)*1.0; - const real_t tmp_coords_jac_1_GREEN_DOWN = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0); - const real_t tmp_coords_jac_2_GREEN_DOWN = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1); - const real_t tmp_coords_jac_3_GREEN_DOWN = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2); - const real_t tmp_coords_jac_4_GREEN_DOWN = tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0); - const real_t tmp_coords_jac_5_GREEN_DOWN = tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1); - const real_t tmp_coords_jac_6_GREEN_DOWN = tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2); - const real_t tmp_coords_jac_7_GREEN_DOWN = tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0); - const real_t tmp_coords_jac_8_GREEN_DOWN = tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1); - const real_t tmp_coords_jac_9_GREEN_DOWN = tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2); - const real_t p_affine_const_0_0_GREEN_DOWN = tmp_coords_jac_1_GREEN_DOWN; - const real_t p_affine_const_0_1_GREEN_DOWN = tmp_coords_jac_2_GREEN_DOWN; - const real_t p_affine_const_0_2_GREEN_DOWN = tmp_coords_jac_3_GREEN_DOWN; - const real_t p_affine_const_1_0_GREEN_DOWN = tmp_coords_jac_1_GREEN_DOWN + tmp_coords_jac_4_GREEN_DOWN; - const real_t p_affine_const_1_1_GREEN_DOWN = tmp_coords_jac_2_GREEN_DOWN + tmp_coords_jac_5_GREEN_DOWN; - const real_t p_affine_const_1_2_GREEN_DOWN = tmp_coords_jac_3_GREEN_DOWN + tmp_coords_jac_6_GREEN_DOWN; - const real_t p_affine_const_2_0_GREEN_DOWN = macro_vertex_coord_id_0comp0 + tmp_coords_jac_4_GREEN_DOWN + tmp_coords_jac_7_GREEN_DOWN; - const real_t p_affine_const_2_1_GREEN_DOWN = macro_vertex_coord_id_0comp1 + tmp_coords_jac_5_GREEN_DOWN + tmp_coords_jac_8_GREEN_DOWN; - const real_t p_affine_const_2_2_GREEN_DOWN = macro_vertex_coord_id_0comp2 + tmp_coords_jac_6_GREEN_DOWN + tmp_coords_jac_9_GREEN_DOWN; - const real_t p_affine_const_3_0_GREEN_DOWN = tmp_coords_jac_1_GREEN_DOWN + tmp_coords_jac_7_GREEN_DOWN; - const real_t p_affine_const_3_1_GREEN_DOWN = tmp_coords_jac_2_GREEN_DOWN + tmp_coords_jac_8_GREEN_DOWN; - const real_t p_affine_const_3_2_GREEN_DOWN = tmp_coords_jac_3_GREEN_DOWN + tmp_coords_jac_9_GREEN_DOWN; - const real_t jac_affine_0_0_GREEN_DOWN = -p_affine_const_0_0_GREEN_DOWN + p_affine_const_1_0_GREEN_DOWN; - const real_t jac_affine_0_1_GREEN_DOWN = -p_affine_const_0_0_GREEN_DOWN + p_affine_const_2_0_GREEN_DOWN; - const real_t jac_affine_0_2_GREEN_DOWN = -p_affine_const_0_0_GREEN_DOWN + p_affine_const_3_0_GREEN_DOWN; - const real_t jac_affine_1_0_GREEN_DOWN = -p_affine_const_0_1_GREEN_DOWN + p_affine_const_1_1_GREEN_DOWN; - const real_t jac_affine_1_1_GREEN_DOWN = -p_affine_const_0_1_GREEN_DOWN + p_affine_const_2_1_GREEN_DOWN; - const real_t tmp_coords_jac_14_GREEN_DOWN = jac_affine_0_2_GREEN_DOWN*jac_affine_1_1_GREEN_DOWN; - const real_t jac_affine_1_2_GREEN_DOWN = -p_affine_const_0_1_GREEN_DOWN + p_affine_const_3_1_GREEN_DOWN; - const real_t tmp_coords_jac_12_GREEN_DOWN = jac_affine_0_1_GREEN_DOWN*jac_affine_1_2_GREEN_DOWN; - const real_t jac_affine_2_0_GREEN_DOWN = -p_affine_const_0_2_GREEN_DOWN + p_affine_const_1_2_GREEN_DOWN; - const real_t jac_affine_2_1_GREEN_DOWN = -p_affine_const_0_2_GREEN_DOWN + p_affine_const_2_2_GREEN_DOWN; - const real_t tmp_coords_jac_11_GREEN_DOWN = jac_affine_1_2_GREEN_DOWN*jac_affine_2_1_GREEN_DOWN; - const real_t jac_affine_2_2_GREEN_DOWN = -p_affine_const_0_2_GREEN_DOWN + p_affine_const_3_2_GREEN_DOWN; - const real_t tmp_coords_jac_10_GREEN_DOWN = jac_affine_1_1_GREEN_DOWN*jac_affine_2_2_GREEN_DOWN; - const real_t tmp_coords_jac_13_GREEN_DOWN = jac_affine_0_1_GREEN_DOWN*jac_affine_2_2_GREEN_DOWN; - const real_t tmp_coords_jac_15_GREEN_DOWN = 1.0 / (jac_affine_0_0_GREEN_DOWN*tmp_coords_jac_10_GREEN_DOWN - jac_affine_0_0_GREEN_DOWN*tmp_coords_jac_11_GREEN_DOWN + jac_affine_0_2_GREEN_DOWN*jac_affine_1_0_GREEN_DOWN*jac_affine_2_1_GREEN_DOWN - jac_affine_1_0_GREEN_DOWN*tmp_coords_jac_13_GREEN_DOWN + jac_affine_2_0_GREEN_DOWN*tmp_coords_jac_12_GREEN_DOWN - jac_affine_2_0_GREEN_DOWN*tmp_coords_jac_14_GREEN_DOWN); - const real_t jac_affine_inv_0_0_GREEN_DOWN = tmp_coords_jac_15_GREEN_DOWN*(tmp_coords_jac_10_GREEN_DOWN - tmp_coords_jac_11_GREEN_DOWN); - const real_t jac_affine_inv_0_1_GREEN_DOWN = tmp_coords_jac_15_GREEN_DOWN*(jac_affine_0_2_GREEN_DOWN*jac_affine_2_1_GREEN_DOWN - tmp_coords_jac_13_GREEN_DOWN); - const real_t jac_affine_inv_0_2_GREEN_DOWN = tmp_coords_jac_15_GREEN_DOWN*(tmp_coords_jac_12_GREEN_DOWN - tmp_coords_jac_14_GREEN_DOWN); - const real_t jac_affine_inv_1_0_GREEN_DOWN = tmp_coords_jac_15_GREEN_DOWN*(-jac_affine_1_0_GREEN_DOWN*jac_affine_2_2_GREEN_DOWN + jac_affine_1_2_GREEN_DOWN*jac_affine_2_0_GREEN_DOWN); - const real_t jac_affine_inv_1_1_GREEN_DOWN = tmp_coords_jac_15_GREEN_DOWN*(jac_affine_0_0_GREEN_DOWN*jac_affine_2_2_GREEN_DOWN - jac_affine_0_2_GREEN_DOWN*jac_affine_2_0_GREEN_DOWN); - const real_t jac_affine_inv_1_2_GREEN_DOWN = tmp_coords_jac_15_GREEN_DOWN*(-jac_affine_0_0_GREEN_DOWN*jac_affine_1_2_GREEN_DOWN + jac_affine_0_2_GREEN_DOWN*jac_affine_1_0_GREEN_DOWN); - const real_t jac_affine_inv_2_0_GREEN_DOWN = tmp_coords_jac_15_GREEN_DOWN*(jac_affine_1_0_GREEN_DOWN*jac_affine_2_1_GREEN_DOWN - jac_affine_1_1_GREEN_DOWN*jac_affine_2_0_GREEN_DOWN); - const real_t jac_affine_inv_2_1_GREEN_DOWN = tmp_coords_jac_15_GREEN_DOWN*(-jac_affine_0_0_GREEN_DOWN*jac_affine_2_1_GREEN_DOWN + jac_affine_0_1_GREEN_DOWN*jac_affine_2_0_GREEN_DOWN); - const real_t jac_affine_inv_2_2_GREEN_DOWN = tmp_coords_jac_15_GREEN_DOWN*(jac_affine_0_0_GREEN_DOWN*jac_affine_1_1_GREEN_DOWN - jac_affine_0_1_GREEN_DOWN*jac_affine_1_0_GREEN_DOWN); for (int64_t ctr_2 = 0; ctr_2 < micro_edges_per_macro_edge; ctr_2 += 1) for (int64_t ctr_1 = 0; ctr_1 < -ctr_2 + micro_edges_per_macro_edge; ctr_1 += 1) for (int64_t ctr_0 = 0; ctr_0 < -ctr_1 - ctr_2 + micro_edges_per_macro_edge - 1; ctr_0 += 1) { - const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2)); - const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2)); - const real_t p_affine_0_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2)); - const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2)); - const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2)); - const real_t p_affine_1_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2)); - const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2 + 1)); - const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2 + 1)); - const real_t p_affine_2_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2 + 1)); - const real_t p_affine_3_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2 + 1)); - const real_t p_affine_3_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2 + 1)); - const real_t p_affine_3_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2 + 1)); + + const int64_t phantom_ctr_0 = ctr_0; + real_t _data_float_loop_ctr_array_dim_0[4]; + _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0; + _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1; + _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2; + _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3; + real_t _data_float_loop_ctr_array_dim_1[4]; + _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1; + real_t _data_float_loop_ctr_array_dim_2[4]; + _data_float_loop_ctr_array_dim_2[0] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[1] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[2] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[3] = (real_t) ctr_2; + + const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_0_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_2_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_3_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_3_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_3_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; const real_t rho_dof_0 = _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]; const real_t rho_dof_1 = _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]; const real_t rho_dof_2 = _data_rhoVertex[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]; @@ -4935,28 +4943,8 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap::toMatrix_macro_ real_t q_acc_3_29 = 0.0; for (int64_t q = 0; q < 5; q += 1) { - const real_t tmp_qloop_0 = rayVertex_1 - refVertex_1; - const real_t tmp_qloop_1 = -rayVertex_0; - const real_t tmp_qloop_2 = -forVertex_0 - tmp_qloop_1; - const real_t tmp_qloop_3 = rayVertex_2 - thrVertex_2; - const real_t tmp_qloop_4 = tmp_qloop_2*tmp_qloop_3; - const real_t tmp_qloop_5 = rayVertex_2 - refVertex_2; - const real_t tmp_qloop_6 = rayVertex_1 - thrVertex_1; - const real_t tmp_qloop_7 = tmp_qloop_2*tmp_qloop_6; - const real_t tmp_qloop_8 = -rayVertex_1; - const real_t tmp_qloop_9 = -forVertex_1 - tmp_qloop_8; - const real_t tmp_qloop_10 = rayVertex_0 - thrVertex_0; - const real_t tmp_qloop_11 = rayVertex_0 - refVertex_0; - const real_t tmp_qloop_12 = -rayVertex_2; - const real_t tmp_qloop_13 = -forVertex_2 - tmp_qloop_12; - const real_t tmp_qloop_14 = tmp_qloop_3*tmp_qloop_9; - const real_t tmp_qloop_15 = tmp_qloop_10*tmp_qloop_13; - const real_t tmp_qloop_16 = -tmp_qloop_0*tmp_qloop_15 + tmp_qloop_0*tmp_qloop_4 + tmp_qloop_10*tmp_qloop_5*tmp_qloop_9 + tmp_qloop_11*tmp_qloop_13*tmp_qloop_6 - tmp_qloop_11*tmp_qloop_14 - tmp_qloop_5*tmp_qloop_7; - const real_t tmp_qloop_17 = 1.0 / (tmp_qloop_16); const real_t tmp_qloop_18 = p_affine_0_0 + (-p_affine_0_0 + p_affine_1_0)*_data_q_p_0[q] + (-p_affine_0_0 + p_affine_2_0)*_data_q_p_1[q] + (-p_affine_0_0 + p_affine_3_0)*_data_q_p_2[q]; const real_t tmp_qloop_19 = (tmp_qloop_18*tmp_qloop_18); - const real_t tmp_qloop_20 = radRayVertex*tmp_qloop_16; - const real_t tmp_qloop_21 = radRayVertex - radRefVertex; const real_t tmp_qloop_22 = p_affine_0_2 + (-p_affine_0_2 + p_affine_1_2)*_data_q_p_0[q] + (-p_affine_0_2 + p_affine_2_2)*_data_q_p_1[q] + (-p_affine_0_2 + p_affine_3_2)*_data_q_p_2[q]; const real_t tmp_qloop_23 = -tmp_qloop_12 - tmp_qloop_22; const real_t tmp_qloop_24 = -tmp_qloop_1 - tmp_qloop_18; @@ -4967,28 +4955,21 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap::toMatrix_macro_ const real_t tmp_qloop_29 = (tmp_qloop_25*tmp_qloop_25); const real_t tmp_qloop_30 = (tmp_qloop_22*tmp_qloop_22); const real_t tmp_qloop_31 = tmp_qloop_19 + tmp_qloop_29 + tmp_qloop_30; - const real_t tmp_qloop_32 = -tmp_qloop_13*tmp_qloop_6 + tmp_qloop_14; - const real_t tmp_qloop_33 = -tmp_qloop_21*tmp_qloop_32; const real_t tmp_qloop_34 = tmp_qloop_18*tmp_qloop_33; const real_t tmp_qloop_35 = tmp_qloop_28 + tmp_qloop_34; const real_t tmp_qloop_36 = tmp_qloop_17*(-tmp_qloop_19*tmp_qloop_28 + tmp_qloop_31*tmp_qloop_35); const real_t tmp_qloop_37 = pow(tmp_qloop_31, -1.5000000000000000); const real_t tmp_qloop_38 = tmp_qloop_37*1.0; - const real_t tmp_qloop_39 = tmp_qloop_10*tmp_qloop_13 - tmp_qloop_4; - const real_t tmp_qloop_40 = tmp_qloop_21*tmp_qloop_39; const real_t tmp_qloop_41 = tmp_qloop_31*tmp_qloop_40; const real_t tmp_qloop_42 = -tmp_qloop_25*tmp_qloop_28 - tmp_qloop_41; const real_t tmp_qloop_43 = tmp_qloop_17*tmp_qloop_38; const real_t tmp_qloop_44 = tmp_qloop_42*tmp_qloop_43; - const real_t tmp_qloop_45 = -tmp_qloop_10*tmp_qloop_9 + tmp_qloop_7; - const real_t tmp_qloop_46 = -tmp_qloop_21*tmp_qloop_45; const real_t tmp_qloop_47 = -tmp_qloop_28; const real_t tmp_qloop_48 = tmp_qloop_22*tmp_qloop_47; const real_t tmp_qloop_49 = tmp_qloop_31*tmp_qloop_46 + tmp_qloop_48; const real_t tmp_qloop_50 = tmp_qloop_43*tmp_qloop_49; const real_t tmp_qloop_51 = tmp_qloop_18*tmp_qloop_47 + tmp_qloop_31*tmp_qloop_33; const real_t tmp_qloop_52 = tmp_qloop_43*tmp_qloop_51; - const real_t tmp_qloop_53 = -tmp_qloop_21*tmp_qloop_39; const real_t tmp_qloop_54 = tmp_qloop_25*tmp_qloop_53 + tmp_qloop_28; const real_t tmp_qloop_55 = tmp_qloop_29*tmp_qloop_47 + tmp_qloop_31*tmp_qloop_54; const real_t tmp_qloop_56 = tmp_qloop_22*tmp_qloop_46; @@ -4996,11 +4977,9 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap::toMatrix_macro_ const real_t tmp_qloop_58 = tmp_qloop_30*tmp_qloop_47 + tmp_qloop_31*tmp_qloop_57; const real_t tmp_qloop_66 = pow(tmp_qloop_31, -2.5000000000000000)*3.0; const real_t tmp_qloop_67 = tmp_qloop_36*tmp_qloop_66; - const real_t tmp_qloop_68 = tmp_qloop_21*tmp_qloop_32; const real_t tmp_qloop_69 = tmp_qloop_18*2.0; const real_t tmp_qloop_70 = -tmp_qloop_41; const real_t tmp_qloop_71 = tmp_qloop_35*2.0; - const real_t tmp_qloop_72 = tmp_qloop_21*tmp_qloop_45; const real_t tmp_qloop_73 = -tmp_qloop_31*tmp_qloop_72; const real_t tmp_qloop_74 = -tmp_qloop_20 + tmp_qloop_27; const real_t tmp_qloop_75 = tmp_qloop_18*tmp_qloop_68 + tmp_qloop_34*2.0 + tmp_qloop_74; @@ -5062,14 +5041,7 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap::toMatrix_macro_ const real_t tmp_qloop_132 = tmp_qloop_109 - tmp_qloop_110 - tmp_qloop_112 - tmp_qloop_131; const real_t tmp_qloop_133 = tmp_qloop_103*4.0; const real_t tmp_qloop_134 = tmp_qloop_111 - tmp_qloop_114 - tmp_qloop_133; - const real_t tmp_qloop_135 = 1.0 / (rho_dof_0*tmp_qloop_115 + rho_dof_1*tmp_qloop_127 + rho_dof_2*tmp_qloop_128 + rho_dof_3*tmp_qloop_129 + rho_dof_4*tmp_qloop_110 + rho_dof_5*tmp_qloop_113 + rho_dof_6*tmp_qloop_112 + rho_dof_7*tmp_qloop_130 + rho_dof_8*tmp_qloop_132 + rho_dof_9*tmp_qloop_134)*_data_q_w[q]; - const real_t tmp_qloop_136 = tmp_qloop_135*(1.0 - _data_q_p_0[q] - _data_q_p_1[q] - _data_q_p_2[q]); - const real_t tmp_qloop_142 = tmp_qloop_135*_data_q_p_0[q]; - const real_t tmp_qloop_144 = tmp_qloop_135*_data_q_p_2[q]; - const real_t tmp_qloop_147 = tmp_qloop_135*_data_q_p_1[q]; const real_t tmp_qloop_158 = tmp_qloop_105*tmp_qloop_111; - const real_t tmp_qloop_159 = tmp_qloop_135*tmp_qloop_158; - const real_t tmp_qloop_161 = tmp_qloop_107*tmp_qloop_135; const real_t jac_blending_0_0 = tmp_qloop_36*tmp_qloop_38; const real_t jac_blending_0_1 = tmp_qloop_18*tmp_qloop_44; const real_t jac_blending_0_2 = tmp_qloop_18*tmp_qloop_50; @@ -5087,6 +5059,13 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotUIcosahedralShellMap::toMatrix_macro_ const real_t tmp_qloop_64 = jac_blending_0_0*tmp_qloop_59 - jac_blending_0_0*tmp_qloop_61 + jac_blending_0_2*jac_blending_1_0*jac_blending_2_1 - jac_blending_1_0*tmp_qloop_62 + jac_blending_2_0*tmp_qloop_60 - jac_blending_2_0*tmp_qloop_63; const real_t tmp_qloop_65 = 1.0 / (tmp_qloop_64); const real_t abs_det_jac_blending = tmp_qloop_64; + const real_t tmp_qloop_135 = abs_det_jac_affine_GREEN_DOWN*abs_det_jac_blending*1.0 / (rho_dof_0*tmp_qloop_115 + rho_dof_1*tmp_qloop_127 + rho_dof_2*tmp_qloop_128 + rho_dof_3*tmp_qloop_129 + rho_dof_4*tmp_qloop_110 + rho_dof_5*tmp_qloop_113 + rho_dof_6*tmp_qloop_112 + rho_dof_7*tmp_qloop_130 + rho_dof_8*tmp_qloop_132 + rho_dof_9*tmp_qloop_134)*_data_q_w[q]; + const real_t tmp_qloop_136 = tmp_qloop_135*(1.0 - _data_q_p_0[q] - _data_q_p_1[q] - _data_q_p_2[q]); + const real_t tmp_qloop_142 = tmp_qloop_135*_data_q_p_0[q]; + const real_t tmp_qloop_144 = tmp_qloop_135*_data_q_p_2[q]; + const real_t tmp_qloop_147 = tmp_qloop_135*_data_q_p_1[q]; + const real_t tmp_qloop_159 = tmp_qloop_135*tmp_qloop_158; + const real_t tmp_qloop_161 = tmp_qloop_107*tmp_qloop_135; const real_t jac_blending_inv_0_0 = tmp_qloop_65*(tmp_qloop_59 - tmp_qloop_61); const real_t jac_blending_inv_0_1 = tmp_qloop_65*(jac_blending_0_2*jac_blending_2_1 - tmp_qloop_62); const real_t jac_blending_inv_0_2 = tmp_qloop_65*(tmp_qloop_60 - tmp_qloop_63); diff --git a/operators/grad_rho_by_rho_dot_u/noarch/P2VectorToP1ElementwiseGradRhoByRhoDotU_apply_macro_2D.cpp b/operators/grad_rho_by_rho_dot_u/noarch/P2VectorToP1ElementwiseGradRhoByRhoDotU_apply_P2VectorToP1ElementwiseGradRhoByRhoDotU_macro_2D.cpp similarity index 74% rename from operators/grad_rho_by_rho_dot_u/noarch/P2VectorToP1ElementwiseGradRhoByRhoDotU_apply_macro_2D.cpp rename to operators/grad_rho_by_rho_dot_u/noarch/P2VectorToP1ElementwiseGradRhoByRhoDotU_apply_P2VectorToP1ElementwiseGradRhoByRhoDotU_macro_2D.cpp index a161a3a8..f1f6f39b 100644 --- a/operators/grad_rho_by_rho_dot_u/noarch/P2VectorToP1ElementwiseGradRhoByRhoDotU_apply_macro_2D.cpp +++ b/operators/grad_rho_by_rho_dot_u/noarch/P2VectorToP1ElementwiseGradRhoByRhoDotU_apply_P2VectorToP1ElementwiseGradRhoByRhoDotU_macro_2D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P2VectorToP1ElementwiseGradRhoByRhoDotU::apply_macro_2D( real_t * RESTRICT _data_dst, real_t * RESTRICT _data_rhoEdge, real_t * RESTRICT _data_rhoVertex, real_t * RESTRICT _data_src_edge_0, real_t * RESTRICT _data_src_edge_1, real_t * RESTRICT _data_src_vertex_0, real_t * RESTRICT _data_src_vertex_1, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2VectorToP1ElementwiseGradRhoByRhoDotU::apply_P2VectorToP1ElementwiseGradRhoByRhoDotU_macro_2D( real_t * RESTRICT _data_dst, real_t * RESTRICT _data_rhoEdge, real_t * RESTRICT _data_rhoVertex, real_t * RESTRICT _data_src_edge_0, real_t * RESTRICT _data_src_edge_1, real_t * RESTRICT _data_src_vertex_0, real_t * RESTRICT _data_src_vertex_1, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669}; @@ -67,33 +71,48 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotU::apply_macro_2D( real_t * RESTRICT const real_t _data_q_p_1 [] = {0.33333333333333331, 0.59999999999999998, 0.20000000000000001, 0.20000000000000001}; + const real_t tmp_coords_jac_0_GRAY = 1.0 / (micro_edges_per_macro_edge_float)*1.0; + const real_t p_affine_const_0_0_GRAY = macro_vertex_coord_id_0comp0; + const real_t p_affine_const_0_1_GRAY = macro_vertex_coord_id_0comp1; + const real_t p_affine_const_1_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0); + const real_t p_affine_const_1_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1); + const real_t p_affine_const_2_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0); + const real_t p_affine_const_2_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1); + const real_t jac_affine_0_0_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_1_0_GRAY; + const real_t jac_affine_0_1_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_2_0_GRAY; + const real_t jac_affine_1_0_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_1_1_GRAY; + const real_t jac_affine_1_1_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_2_1_GRAY; + const real_t tmp_coords_jac_1_GRAY = jac_affine_0_0_GRAY*jac_affine_1_1_GRAY - jac_affine_0_1_GRAY*jac_affine_1_0_GRAY; + const real_t tmp_coords_jac_2_GRAY = 1.0 / (tmp_coords_jac_1_GRAY); + const real_t jac_affine_inv_0_0_GRAY = jac_affine_1_1_GRAY*tmp_coords_jac_2_GRAY; + const real_t jac_affine_inv_0_1_GRAY = -jac_affine_0_1_GRAY*tmp_coords_jac_2_GRAY; + const real_t jac_affine_inv_1_0_GRAY = -jac_affine_1_0_GRAY*tmp_coords_jac_2_GRAY; + const real_t jac_affine_inv_1_1_GRAY = jac_affine_0_0_GRAY*tmp_coords_jac_2_GRAY; + const real_t abs_det_jac_affine_GRAY = abs(tmp_coords_jac_1_GRAY); { /* FaceType.GRAY */ - const real_t tmp_coords_jac_0_GRAY = 1.0 / (micro_edges_per_macro_edge_float)*1.0; - const real_t p_affine_const_0_0_GRAY = macro_vertex_coord_id_0comp0; - const real_t p_affine_const_0_1_GRAY = macro_vertex_coord_id_0comp1; - const real_t p_affine_const_1_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0); - const real_t p_affine_const_1_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1); - const real_t p_affine_const_2_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0); - const real_t p_affine_const_2_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1); - const real_t jac_affine_0_0_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_1_0_GRAY; - const real_t jac_affine_0_1_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_2_0_GRAY; - const real_t jac_affine_1_0_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_1_1_GRAY; - const real_t jac_affine_1_1_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_2_1_GRAY; - const real_t tmp_coords_jac_1_GRAY = 1.0 / (jac_affine_0_0_GRAY*jac_affine_1_1_GRAY - jac_affine_0_1_GRAY*jac_affine_1_0_GRAY); - const real_t jac_affine_inv_0_0_GRAY = jac_affine_1_1_GRAY*tmp_coords_jac_1_GRAY; - const real_t jac_affine_inv_0_1_GRAY = -jac_affine_0_1_GRAY*tmp_coords_jac_1_GRAY; - const real_t jac_affine_inv_1_0_GRAY = -jac_affine_1_0_GRAY*tmp_coords_jac_1_GRAY; - const real_t jac_affine_inv_1_1_GRAY = jac_affine_0_0_GRAY*tmp_coords_jac_1_GRAY; for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1) for (int64_t ctr_0 = 0; ctr_0 < -ctr_1 + micro_edges_per_macro_edge; ctr_0 += 1) { - const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1)); - const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1)); - const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1)); - const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1)); - const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1 + 1)); - const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1 + 1)); + + const int64_t phantom_ctr_0 = ctr_0; + real_t _data_float_loop_ctr_array_dim_0[4]; + _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0; + _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1; + _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2; + _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3; + real_t _data_float_loop_ctr_array_dim_1[4]; + _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1; + + const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; const real_t src_dof_0 = _data_src_vertex_0[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]; const real_t src_dof_1 = _data_src_vertex_0[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; const real_t src_dof_2 = _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; @@ -167,7 +186,7 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotU::apply_macro_2D( real_t * RESTRICT const real_t tmp_qloop_14 = -tmp_qloop_1 + tmp_qloop_4*-4.0 + tmp_qloop_7; const real_t tmp_qloop_15 = tmp_qloop_2*4.0; const real_t tmp_qloop_16 = tmp_qloop_0 - tmp_qloop_1 - tmp_qloop_15; - const real_t tmp_qloop_17 = 1.0 / (rho_dof_0*tmp_qloop_6 + rho_dof_1*tmp_qloop_12 + rho_dof_2*tmp_qloop_13 + rho_dof_3*tmp_qloop_1 + rho_dof_4*tmp_qloop_14 + rho_dof_5*tmp_qloop_16)*_data_q_w[q]; + const real_t tmp_qloop_17 = abs_det_jac_affine_GRAY*1.0 / (rho_dof_0*tmp_qloop_6 + rho_dof_1*tmp_qloop_12 + rho_dof_2*tmp_qloop_13 + rho_dof_3*tmp_qloop_1 + rho_dof_4*tmp_qloop_14 + rho_dof_5*tmp_qloop_16)*_data_q_w[q]; const real_t tmp_qloop_18 = tmp_qloop_17*(1.0 - _data_q_p_0[q] - _data_q_p_1[q]); const real_t tmp_qloop_19 = tmp_qloop_11*tmp_qloop_18; const real_t tmp_qloop_20 = jac_affine_inv_0_1_GRAY*tmp_qloop_9 + jac_affine_inv_1_1_GRAY*tmp_qloop_10; @@ -260,37 +279,52 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotU::apply_macro_2D( real_t * RESTRICT _data_dst[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatVec_2 + _data_dst[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; } } + const real_t tmp_coords_jac_0_BLUE = 1.0 / (micro_edges_per_macro_edge_float)*1.0; + const real_t tmp_coords_jac_1_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0); + const real_t tmp_coords_jac_2_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1); + const real_t tmp_coords_jac_3_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0); + const real_t tmp_coords_jac_4_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1); + const real_t p_affine_const_0_0_BLUE = tmp_coords_jac_1_BLUE; + const real_t p_affine_const_0_1_BLUE = tmp_coords_jac_2_BLUE; + const real_t p_affine_const_1_0_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_3_BLUE; + const real_t p_affine_const_1_1_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_4_BLUE; + const real_t p_affine_const_2_0_BLUE = tmp_coords_jac_1_BLUE + tmp_coords_jac_3_BLUE; + const real_t p_affine_const_2_1_BLUE = tmp_coords_jac_2_BLUE + tmp_coords_jac_4_BLUE; + const real_t jac_affine_0_0_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_1_0_BLUE; + const real_t jac_affine_0_1_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_2_0_BLUE; + const real_t jac_affine_1_0_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_1_1_BLUE; + const real_t jac_affine_1_1_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_2_1_BLUE; + const real_t tmp_coords_jac_5_BLUE = jac_affine_0_0_BLUE*jac_affine_1_1_BLUE - jac_affine_0_1_BLUE*jac_affine_1_0_BLUE; + const real_t tmp_coords_jac_6_BLUE = 1.0 / (tmp_coords_jac_5_BLUE); + const real_t jac_affine_inv_0_0_BLUE = jac_affine_1_1_BLUE*tmp_coords_jac_6_BLUE; + const real_t jac_affine_inv_0_1_BLUE = -jac_affine_0_1_BLUE*tmp_coords_jac_6_BLUE; + const real_t jac_affine_inv_1_0_BLUE = -jac_affine_1_0_BLUE*tmp_coords_jac_6_BLUE; + const real_t jac_affine_inv_1_1_BLUE = jac_affine_0_0_BLUE*tmp_coords_jac_6_BLUE; + const real_t abs_det_jac_affine_BLUE = abs(tmp_coords_jac_5_BLUE); { /* FaceType.BLUE */ - const real_t tmp_coords_jac_0_BLUE = 1.0 / (micro_edges_per_macro_edge_float)*1.0; - const real_t tmp_coords_jac_1_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0); - const real_t tmp_coords_jac_2_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1); - const real_t tmp_coords_jac_3_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0); - const real_t tmp_coords_jac_4_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1); - const real_t p_affine_const_0_0_BLUE = tmp_coords_jac_1_BLUE; - const real_t p_affine_const_0_1_BLUE = tmp_coords_jac_2_BLUE; - const real_t p_affine_const_1_0_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_3_BLUE; - const real_t p_affine_const_1_1_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_4_BLUE; - const real_t p_affine_const_2_0_BLUE = tmp_coords_jac_1_BLUE + tmp_coords_jac_3_BLUE; - const real_t p_affine_const_2_1_BLUE = tmp_coords_jac_2_BLUE + tmp_coords_jac_4_BLUE; - const real_t jac_affine_0_0_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_1_0_BLUE; - const real_t jac_affine_0_1_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_2_0_BLUE; - const real_t jac_affine_1_0_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_1_1_BLUE; - const real_t jac_affine_1_1_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_2_1_BLUE; - const real_t tmp_coords_jac_5_BLUE = 1.0 / (jac_affine_0_0_BLUE*jac_affine_1_1_BLUE - jac_affine_0_1_BLUE*jac_affine_1_0_BLUE); - const real_t jac_affine_inv_0_0_BLUE = jac_affine_1_1_BLUE*tmp_coords_jac_5_BLUE; - const real_t jac_affine_inv_0_1_BLUE = -jac_affine_0_1_BLUE*tmp_coords_jac_5_BLUE; - const real_t jac_affine_inv_1_0_BLUE = -jac_affine_1_0_BLUE*tmp_coords_jac_5_BLUE; - const real_t jac_affine_inv_1_1_BLUE = jac_affine_0_0_BLUE*tmp_coords_jac_5_BLUE; for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1) for (int64_t ctr_0 = 0; ctr_0 < -ctr_1 + micro_edges_per_macro_edge - 1; ctr_0 += 1) { - const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1)); - const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1)); - const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1 + 1)); - const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1 + 1)); - const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1 + 1)); - const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1 + 1)); + + const int64_t phantom_ctr_0 = ctr_0; + real_t _data_float_loop_ctr_array_dim_0[4]; + _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0; + _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1; + _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2; + _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3; + real_t _data_float_loop_ctr_array_dim_1[4]; + _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1; + + const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; const real_t src_dof_0 = _data_src_vertex_0[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; const real_t src_dof_1 = _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; const real_t src_dof_2 = _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]; @@ -364,7 +398,7 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotU::apply_macro_2D( real_t * RESTRICT const real_t tmp_qloop_14 = -tmp_qloop_1 + tmp_qloop_4*-4.0 + tmp_qloop_7; const real_t tmp_qloop_15 = tmp_qloop_2*4.0; const real_t tmp_qloop_16 = tmp_qloop_0 - tmp_qloop_1 - tmp_qloop_15; - const real_t tmp_qloop_17 = 1.0 / (rho_dof_0*tmp_qloop_6 + rho_dof_1*tmp_qloop_12 + rho_dof_2*tmp_qloop_13 + rho_dof_3*tmp_qloop_1 + rho_dof_4*tmp_qloop_14 + rho_dof_5*tmp_qloop_16)*_data_q_w[q]; + const real_t tmp_qloop_17 = abs_det_jac_affine_BLUE*1.0 / (rho_dof_0*tmp_qloop_6 + rho_dof_1*tmp_qloop_12 + rho_dof_2*tmp_qloop_13 + rho_dof_3*tmp_qloop_1 + rho_dof_4*tmp_qloop_14 + rho_dof_5*tmp_qloop_16)*_data_q_w[q]; const real_t tmp_qloop_18 = tmp_qloop_17*(1.0 - _data_q_p_0[q] - _data_q_p_1[q]); const real_t tmp_qloop_19 = tmp_qloop_11*tmp_qloop_18; const real_t tmp_qloop_20 = jac_affine_inv_0_1_BLUE*tmp_qloop_9 + jac_affine_inv_1_1_BLUE*tmp_qloop_10; diff --git a/operators/grad_rho_by_rho_dot_u/noarch/P2VectorToP1ElementwiseGradRhoByRhoDotU_apply_macro_3D.cpp b/operators/grad_rho_by_rho_dot_u/noarch/P2VectorToP1ElementwiseGradRhoByRhoDotU_apply_P2VectorToP1ElementwiseGradRhoByRhoDotU_macro_3D.cpp similarity index 78% rename from operators/grad_rho_by_rho_dot_u/noarch/P2VectorToP1ElementwiseGradRhoByRhoDotU_apply_macro_3D.cpp rename to operators/grad_rho_by_rho_dot_u/noarch/P2VectorToP1ElementwiseGradRhoByRhoDotU_apply_P2VectorToP1ElementwiseGradRhoByRhoDotU_macro_3D.cpp index 45073514..f8bf62a3 100644 --- a/operators/grad_rho_by_rho_dot_u/noarch/P2VectorToP1ElementwiseGradRhoByRhoDotU_apply_macro_3D.cpp +++ b/operators/grad_rho_by_rho_dot_u/noarch/P2VectorToP1ElementwiseGradRhoByRhoDotU_apply_P2VectorToP1ElementwiseGradRhoByRhoDotU_macro_3D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P2VectorToP1ElementwiseGradRhoByRhoDotU::apply_macro_3D( real_t * RESTRICT _data_dst, real_t * RESTRICT _data_rhoEdge, real_t * RESTRICT _data_rhoVertex, real_t * RESTRICT _data_src_edge_0, real_t * RESTRICT _data_src_edge_1, real_t * RESTRICT _data_src_edge_2, real_t * RESTRICT _data_src_vertex_0, real_t * RESTRICT _data_src_vertex_1, real_t * RESTRICT _data_src_vertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2VectorToP1ElementwiseGradRhoByRhoDotU::apply_P2VectorToP1ElementwiseGradRhoByRhoDotU_macro_3D( real_t * RESTRICT _data_dst, real_t * RESTRICT _data_rhoEdge, real_t * RESTRICT _data_rhoVertex, real_t * RESTRICT _data_src_edge_0, real_t * RESTRICT _data_src_edge_1, real_t * RESTRICT _data_src_edge_2, real_t * RESTRICT _data_src_vertex_0, real_t * RESTRICT _data_src_vertex_1, real_t * RESTRICT _data_src_vertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983}; @@ -69,61 +73,81 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotU::apply_macro_3D( real_t * RESTRICT const real_t _data_q_p_2 [] = {0.25, 0.5, 0.16666666666666666, 0.16666666666666666, 0.16666666666666666}; + const real_t tmp_coords_jac_0_WHITE_UP = 1.0 / (micro_edges_per_macro_edge_float)*1.0; + const real_t p_affine_const_0_0_WHITE_UP = macro_vertex_coord_id_0comp0; + const real_t p_affine_const_0_1_WHITE_UP = macro_vertex_coord_id_0comp1; + const real_t p_affine_const_0_2_WHITE_UP = macro_vertex_coord_id_0comp2; + const real_t p_affine_const_1_0_WHITE_UP = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0); + const real_t p_affine_const_1_1_WHITE_UP = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1); + const real_t p_affine_const_1_2_WHITE_UP = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2); + const real_t p_affine_const_2_0_WHITE_UP = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0); + const real_t p_affine_const_2_1_WHITE_UP = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1); + const real_t p_affine_const_2_2_WHITE_UP = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2); + const real_t p_affine_const_3_0_WHITE_UP = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0); + const real_t p_affine_const_3_1_WHITE_UP = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1); + const real_t p_affine_const_3_2_WHITE_UP = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2); + const real_t jac_affine_0_0_WHITE_UP = -p_affine_const_0_0_WHITE_UP + p_affine_const_1_0_WHITE_UP; + const real_t jac_affine_0_1_WHITE_UP = -p_affine_const_0_0_WHITE_UP + p_affine_const_2_0_WHITE_UP; + const real_t jac_affine_0_2_WHITE_UP = -p_affine_const_0_0_WHITE_UP + p_affine_const_3_0_WHITE_UP; + const real_t jac_affine_1_0_WHITE_UP = -p_affine_const_0_1_WHITE_UP + p_affine_const_1_1_WHITE_UP; + const real_t jac_affine_1_1_WHITE_UP = -p_affine_const_0_1_WHITE_UP + p_affine_const_2_1_WHITE_UP; + const real_t tmp_coords_jac_5_WHITE_UP = jac_affine_0_2_WHITE_UP*jac_affine_1_1_WHITE_UP; + const real_t jac_affine_1_2_WHITE_UP = -p_affine_const_0_1_WHITE_UP + p_affine_const_3_1_WHITE_UP; + const real_t tmp_coords_jac_3_WHITE_UP = jac_affine_0_1_WHITE_UP*jac_affine_1_2_WHITE_UP; + const real_t jac_affine_2_0_WHITE_UP = -p_affine_const_0_2_WHITE_UP + p_affine_const_1_2_WHITE_UP; + const real_t jac_affine_2_1_WHITE_UP = -p_affine_const_0_2_WHITE_UP + p_affine_const_2_2_WHITE_UP; + const real_t tmp_coords_jac_2_WHITE_UP = jac_affine_1_2_WHITE_UP*jac_affine_2_1_WHITE_UP; + const real_t jac_affine_2_2_WHITE_UP = -p_affine_const_0_2_WHITE_UP + p_affine_const_3_2_WHITE_UP; + const real_t tmp_coords_jac_1_WHITE_UP = jac_affine_1_1_WHITE_UP*jac_affine_2_2_WHITE_UP; + const real_t tmp_coords_jac_4_WHITE_UP = jac_affine_0_1_WHITE_UP*jac_affine_2_2_WHITE_UP; + const real_t tmp_coords_jac_6_WHITE_UP = jac_affine_0_0_WHITE_UP*tmp_coords_jac_1_WHITE_UP - jac_affine_0_0_WHITE_UP*tmp_coords_jac_2_WHITE_UP + jac_affine_0_2_WHITE_UP*jac_affine_1_0_WHITE_UP*jac_affine_2_1_WHITE_UP - jac_affine_1_0_WHITE_UP*tmp_coords_jac_4_WHITE_UP + jac_affine_2_0_WHITE_UP*tmp_coords_jac_3_WHITE_UP - jac_affine_2_0_WHITE_UP*tmp_coords_jac_5_WHITE_UP; + const real_t tmp_coords_jac_7_WHITE_UP = 1.0 / (tmp_coords_jac_6_WHITE_UP); + const real_t jac_affine_inv_0_0_WHITE_UP = tmp_coords_jac_7_WHITE_UP*(tmp_coords_jac_1_WHITE_UP - tmp_coords_jac_2_WHITE_UP); + const real_t jac_affine_inv_0_1_WHITE_UP = tmp_coords_jac_7_WHITE_UP*(jac_affine_0_2_WHITE_UP*jac_affine_2_1_WHITE_UP - tmp_coords_jac_4_WHITE_UP); + const real_t jac_affine_inv_0_2_WHITE_UP = tmp_coords_jac_7_WHITE_UP*(tmp_coords_jac_3_WHITE_UP - tmp_coords_jac_5_WHITE_UP); + const real_t jac_affine_inv_1_0_WHITE_UP = tmp_coords_jac_7_WHITE_UP*(-jac_affine_1_0_WHITE_UP*jac_affine_2_2_WHITE_UP + jac_affine_1_2_WHITE_UP*jac_affine_2_0_WHITE_UP); + const real_t jac_affine_inv_1_1_WHITE_UP = tmp_coords_jac_7_WHITE_UP*(jac_affine_0_0_WHITE_UP*jac_affine_2_2_WHITE_UP - jac_affine_0_2_WHITE_UP*jac_affine_2_0_WHITE_UP); + const real_t jac_affine_inv_1_2_WHITE_UP = tmp_coords_jac_7_WHITE_UP*(-jac_affine_0_0_WHITE_UP*jac_affine_1_2_WHITE_UP + jac_affine_0_2_WHITE_UP*jac_affine_1_0_WHITE_UP); + const real_t jac_affine_inv_2_0_WHITE_UP = tmp_coords_jac_7_WHITE_UP*(jac_affine_1_0_WHITE_UP*jac_affine_2_1_WHITE_UP - jac_affine_1_1_WHITE_UP*jac_affine_2_0_WHITE_UP); + const real_t jac_affine_inv_2_1_WHITE_UP = tmp_coords_jac_7_WHITE_UP*(-jac_affine_0_0_WHITE_UP*jac_affine_2_1_WHITE_UP + jac_affine_0_1_WHITE_UP*jac_affine_2_0_WHITE_UP); + const real_t jac_affine_inv_2_2_WHITE_UP = tmp_coords_jac_7_WHITE_UP*(jac_affine_0_0_WHITE_UP*jac_affine_1_1_WHITE_UP - jac_affine_0_1_WHITE_UP*jac_affine_1_0_WHITE_UP); + const real_t abs_det_jac_affine_WHITE_UP = abs(tmp_coords_jac_6_WHITE_UP); { /* CellType.WHITE_UP */ - const real_t tmp_coords_jac_0_WHITE_UP = 1.0 / (micro_edges_per_macro_edge_float)*1.0; - const real_t p_affine_const_0_0_WHITE_UP = macro_vertex_coord_id_0comp0; - const real_t p_affine_const_0_1_WHITE_UP = macro_vertex_coord_id_0comp1; - const real_t p_affine_const_0_2_WHITE_UP = macro_vertex_coord_id_0comp2; - const real_t p_affine_const_1_0_WHITE_UP = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0); - const real_t p_affine_const_1_1_WHITE_UP = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1); - const real_t p_affine_const_1_2_WHITE_UP = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2); - const real_t p_affine_const_2_0_WHITE_UP = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0); - const real_t p_affine_const_2_1_WHITE_UP = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1); - const real_t p_affine_const_2_2_WHITE_UP = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2); - const real_t p_affine_const_3_0_WHITE_UP = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0); - const real_t p_affine_const_3_1_WHITE_UP = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1); - const real_t p_affine_const_3_2_WHITE_UP = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2); - const real_t jac_affine_0_0_WHITE_UP = -p_affine_const_0_0_WHITE_UP + p_affine_const_1_0_WHITE_UP; - const real_t jac_affine_0_1_WHITE_UP = -p_affine_const_0_0_WHITE_UP + p_affine_const_2_0_WHITE_UP; - const real_t jac_affine_0_2_WHITE_UP = -p_affine_const_0_0_WHITE_UP + p_affine_const_3_0_WHITE_UP; - const real_t jac_affine_1_0_WHITE_UP = -p_affine_const_0_1_WHITE_UP + p_affine_const_1_1_WHITE_UP; - const real_t jac_affine_1_1_WHITE_UP = -p_affine_const_0_1_WHITE_UP + p_affine_const_2_1_WHITE_UP; - const real_t tmp_coords_jac_5_WHITE_UP = jac_affine_0_2_WHITE_UP*jac_affine_1_1_WHITE_UP; - const real_t jac_affine_1_2_WHITE_UP = -p_affine_const_0_1_WHITE_UP + p_affine_const_3_1_WHITE_UP; - const real_t tmp_coords_jac_3_WHITE_UP = jac_affine_0_1_WHITE_UP*jac_affine_1_2_WHITE_UP; - const real_t jac_affine_2_0_WHITE_UP = -p_affine_const_0_2_WHITE_UP + p_affine_const_1_2_WHITE_UP; - const real_t jac_affine_2_1_WHITE_UP = -p_affine_const_0_2_WHITE_UP + p_affine_const_2_2_WHITE_UP; - const real_t tmp_coords_jac_2_WHITE_UP = jac_affine_1_2_WHITE_UP*jac_affine_2_1_WHITE_UP; - const real_t jac_affine_2_2_WHITE_UP = -p_affine_const_0_2_WHITE_UP + p_affine_const_3_2_WHITE_UP; - const real_t tmp_coords_jac_1_WHITE_UP = jac_affine_1_1_WHITE_UP*jac_affine_2_2_WHITE_UP; - const real_t tmp_coords_jac_4_WHITE_UP = jac_affine_0_1_WHITE_UP*jac_affine_2_2_WHITE_UP; - const real_t tmp_coords_jac_6_WHITE_UP = 1.0 / (jac_affine_0_0_WHITE_UP*tmp_coords_jac_1_WHITE_UP - jac_affine_0_0_WHITE_UP*tmp_coords_jac_2_WHITE_UP + jac_affine_0_2_WHITE_UP*jac_affine_1_0_WHITE_UP*jac_affine_2_1_WHITE_UP - jac_affine_1_0_WHITE_UP*tmp_coords_jac_4_WHITE_UP + jac_affine_2_0_WHITE_UP*tmp_coords_jac_3_WHITE_UP - jac_affine_2_0_WHITE_UP*tmp_coords_jac_5_WHITE_UP); - const real_t jac_affine_inv_0_0_WHITE_UP = tmp_coords_jac_6_WHITE_UP*(tmp_coords_jac_1_WHITE_UP - tmp_coords_jac_2_WHITE_UP); - const real_t jac_affine_inv_0_1_WHITE_UP = tmp_coords_jac_6_WHITE_UP*(jac_affine_0_2_WHITE_UP*jac_affine_2_1_WHITE_UP - tmp_coords_jac_4_WHITE_UP); - const real_t jac_affine_inv_0_2_WHITE_UP = tmp_coords_jac_6_WHITE_UP*(tmp_coords_jac_3_WHITE_UP - tmp_coords_jac_5_WHITE_UP); - const real_t jac_affine_inv_1_0_WHITE_UP = tmp_coords_jac_6_WHITE_UP*(-jac_affine_1_0_WHITE_UP*jac_affine_2_2_WHITE_UP + jac_affine_1_2_WHITE_UP*jac_affine_2_0_WHITE_UP); - const real_t jac_affine_inv_1_1_WHITE_UP = tmp_coords_jac_6_WHITE_UP*(jac_affine_0_0_WHITE_UP*jac_affine_2_2_WHITE_UP - jac_affine_0_2_WHITE_UP*jac_affine_2_0_WHITE_UP); - const real_t jac_affine_inv_1_2_WHITE_UP = tmp_coords_jac_6_WHITE_UP*(-jac_affine_0_0_WHITE_UP*jac_affine_1_2_WHITE_UP + jac_affine_0_2_WHITE_UP*jac_affine_1_0_WHITE_UP); - const real_t jac_affine_inv_2_0_WHITE_UP = tmp_coords_jac_6_WHITE_UP*(jac_affine_1_0_WHITE_UP*jac_affine_2_1_WHITE_UP - jac_affine_1_1_WHITE_UP*jac_affine_2_0_WHITE_UP); - const real_t jac_affine_inv_2_1_WHITE_UP = tmp_coords_jac_6_WHITE_UP*(-jac_affine_0_0_WHITE_UP*jac_affine_2_1_WHITE_UP + jac_affine_0_1_WHITE_UP*jac_affine_2_0_WHITE_UP); - const real_t jac_affine_inv_2_2_WHITE_UP = tmp_coords_jac_6_WHITE_UP*(jac_affine_0_0_WHITE_UP*jac_affine_1_1_WHITE_UP - jac_affine_0_1_WHITE_UP*jac_affine_1_0_WHITE_UP); for (int64_t ctr_2 = 0; ctr_2 < micro_edges_per_macro_edge; ctr_2 += 1) for (int64_t ctr_1 = 0; ctr_1 < -ctr_2 + micro_edges_per_macro_edge; ctr_1 += 1) for (int64_t ctr_0 = 0; ctr_0 < -ctr_1 - ctr_2 + micro_edges_per_macro_edge; ctr_0 += 1) { - const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2)); - const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2)); - const real_t p_affine_0_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2)); - const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2)); - const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2)); - const real_t p_affine_1_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2)); - const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2)); - const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2)); - const real_t p_affine_2_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2)); - const real_t p_affine_3_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2 + 1)); - const real_t p_affine_3_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2 + 1)); - const real_t p_affine_3_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2 + 1)); + + const int64_t phantom_ctr_0 = ctr_0; + real_t _data_float_loop_ctr_array_dim_0[4]; + _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0; + _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1; + _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2; + _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3; + real_t _data_float_loop_ctr_array_dim_1[4]; + _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1; + real_t _data_float_loop_ctr_array_dim_2[4]; + _data_float_loop_ctr_array_dim_2[0] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[1] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[2] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[3] = (real_t) ctr_2; + + const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_0_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_2_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_3_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_3_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_3_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; const real_t src_dof_0 = _data_src_vertex_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]; const real_t src_dof_1 = _data_src_vertex_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]; const real_t src_dof_2 = _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]; @@ -318,7 +342,7 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotU::apply_macro_3D( real_t * RESTRICT const real_t tmp_qloop_29 = -tmp_qloop_28 + tmp_qloop_6 - tmp_qloop_7 - tmp_qloop_9; const real_t tmp_qloop_30 = tmp_qloop_0*4.0; const real_t tmp_qloop_31 = -tmp_qloop_11 - tmp_qloop_30 + tmp_qloop_8; - const real_t tmp_qloop_32 = 1.0 / (rho_dof_0*tmp_qloop_12 + rho_dof_1*tmp_qloop_24 + rho_dof_2*tmp_qloop_25 + rho_dof_3*tmp_qloop_26 + rho_dof_4*tmp_qloop_7 + rho_dof_5*tmp_qloop_10 + rho_dof_6*tmp_qloop_9 + rho_dof_7*tmp_qloop_27 + rho_dof_8*tmp_qloop_29 + rho_dof_9*tmp_qloop_31)*_data_q_w[q]; + const real_t tmp_qloop_32 = abs_det_jac_affine_WHITE_UP*1.0 / (rho_dof_0*tmp_qloop_12 + rho_dof_1*tmp_qloop_24 + rho_dof_2*tmp_qloop_25 + rho_dof_3*tmp_qloop_26 + rho_dof_4*tmp_qloop_7 + rho_dof_5*tmp_qloop_10 + rho_dof_6*tmp_qloop_9 + rho_dof_7*tmp_qloop_27 + rho_dof_8*tmp_qloop_29 + rho_dof_9*tmp_qloop_31)*_data_q_w[q]; const real_t tmp_qloop_33 = tmp_qloop_32*(1.0 - _data_q_p_0[q] - _data_q_p_1[q] - _data_q_p_2[q]); const real_t tmp_qloop_34 = tmp_qloop_23*tmp_qloop_33; const real_t tmp_qloop_35 = jac_affine_inv_0_1_WHITE_UP*tmp_qloop_18 + jac_affine_inv_1_1_WHITE_UP*tmp_qloop_21 + jac_affine_inv_2_1_WHITE_UP*tmp_qloop_22; @@ -598,73 +622,93 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotU::apply_macro_3D( real_t * RESTRICT _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))] = elMatVec_3 + _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]; } } + const real_t tmp_coords_jac_0_WHITE_DOWN = 1.0 / (micro_edges_per_macro_edge_float)*1.0; + const real_t tmp_coords_jac_1_WHITE_DOWN = tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0); + const real_t tmp_coords_jac_2_WHITE_DOWN = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0); + const real_t tmp_coords_jac_3_WHITE_DOWN = tmp_coords_jac_1_WHITE_DOWN + tmp_coords_jac_2_WHITE_DOWN; + const real_t tmp_coords_jac_4_WHITE_DOWN = tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1); + const real_t tmp_coords_jac_5_WHITE_DOWN = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1); + const real_t tmp_coords_jac_6_WHITE_DOWN = tmp_coords_jac_4_WHITE_DOWN + tmp_coords_jac_5_WHITE_DOWN; + const real_t tmp_coords_jac_7_WHITE_DOWN = tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2); + const real_t tmp_coords_jac_8_WHITE_DOWN = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2); + const real_t tmp_coords_jac_9_WHITE_DOWN = tmp_coords_jac_7_WHITE_DOWN + tmp_coords_jac_8_WHITE_DOWN; + const real_t tmp_coords_jac_10_WHITE_DOWN = tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0); + const real_t tmp_coords_jac_11_WHITE_DOWN = tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1); + const real_t tmp_coords_jac_12_WHITE_DOWN = tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2); + const real_t p_affine_const_0_0_WHITE_DOWN = tmp_coords_jac_3_WHITE_DOWN; + const real_t p_affine_const_0_1_WHITE_DOWN = tmp_coords_jac_6_WHITE_DOWN; + const real_t p_affine_const_0_2_WHITE_DOWN = tmp_coords_jac_9_WHITE_DOWN; + const real_t p_affine_const_1_0_WHITE_DOWN = tmp_coords_jac_10_WHITE_DOWN + tmp_coords_jac_2_WHITE_DOWN; + const real_t p_affine_const_1_1_WHITE_DOWN = tmp_coords_jac_11_WHITE_DOWN + tmp_coords_jac_5_WHITE_DOWN; + const real_t p_affine_const_1_2_WHITE_DOWN = tmp_coords_jac_12_WHITE_DOWN + tmp_coords_jac_8_WHITE_DOWN; + const real_t p_affine_const_2_0_WHITE_DOWN = macro_vertex_coord_id_0comp0 + tmp_coords_jac_10_WHITE_DOWN + tmp_coords_jac_1_WHITE_DOWN; + const real_t p_affine_const_2_1_WHITE_DOWN = macro_vertex_coord_id_0comp1 + tmp_coords_jac_11_WHITE_DOWN + tmp_coords_jac_4_WHITE_DOWN; + const real_t p_affine_const_2_2_WHITE_DOWN = macro_vertex_coord_id_0comp2 + tmp_coords_jac_12_WHITE_DOWN + tmp_coords_jac_7_WHITE_DOWN; + const real_t p_affine_const_3_0_WHITE_DOWN = tmp_coords_jac_10_WHITE_DOWN + tmp_coords_jac_3_WHITE_DOWN; + const real_t p_affine_const_3_1_WHITE_DOWN = tmp_coords_jac_11_WHITE_DOWN + tmp_coords_jac_6_WHITE_DOWN; + const real_t p_affine_const_3_2_WHITE_DOWN = tmp_coords_jac_12_WHITE_DOWN + tmp_coords_jac_9_WHITE_DOWN; + const real_t jac_affine_0_0_WHITE_DOWN = -p_affine_const_0_0_WHITE_DOWN + p_affine_const_1_0_WHITE_DOWN; + const real_t jac_affine_0_1_WHITE_DOWN = -p_affine_const_0_0_WHITE_DOWN + p_affine_const_2_0_WHITE_DOWN; + const real_t jac_affine_0_2_WHITE_DOWN = -p_affine_const_0_0_WHITE_DOWN + p_affine_const_3_0_WHITE_DOWN; + const real_t jac_affine_1_0_WHITE_DOWN = -p_affine_const_0_1_WHITE_DOWN + p_affine_const_1_1_WHITE_DOWN; + const real_t jac_affine_1_1_WHITE_DOWN = -p_affine_const_0_1_WHITE_DOWN + p_affine_const_2_1_WHITE_DOWN; + const real_t tmp_coords_jac_17_WHITE_DOWN = jac_affine_0_2_WHITE_DOWN*jac_affine_1_1_WHITE_DOWN; + const real_t jac_affine_1_2_WHITE_DOWN = -p_affine_const_0_1_WHITE_DOWN + p_affine_const_3_1_WHITE_DOWN; + const real_t tmp_coords_jac_15_WHITE_DOWN = jac_affine_0_1_WHITE_DOWN*jac_affine_1_2_WHITE_DOWN; + const real_t jac_affine_2_0_WHITE_DOWN = -p_affine_const_0_2_WHITE_DOWN + p_affine_const_1_2_WHITE_DOWN; + const real_t jac_affine_2_1_WHITE_DOWN = -p_affine_const_0_2_WHITE_DOWN + p_affine_const_2_2_WHITE_DOWN; + const real_t tmp_coords_jac_14_WHITE_DOWN = jac_affine_1_2_WHITE_DOWN*jac_affine_2_1_WHITE_DOWN; + const real_t jac_affine_2_2_WHITE_DOWN = -p_affine_const_0_2_WHITE_DOWN + p_affine_const_3_2_WHITE_DOWN; + const real_t tmp_coords_jac_13_WHITE_DOWN = jac_affine_1_1_WHITE_DOWN*jac_affine_2_2_WHITE_DOWN; + const real_t tmp_coords_jac_16_WHITE_DOWN = jac_affine_0_1_WHITE_DOWN*jac_affine_2_2_WHITE_DOWN; + const real_t tmp_coords_jac_18_WHITE_DOWN = jac_affine_0_0_WHITE_DOWN*tmp_coords_jac_13_WHITE_DOWN - jac_affine_0_0_WHITE_DOWN*tmp_coords_jac_14_WHITE_DOWN + jac_affine_0_2_WHITE_DOWN*jac_affine_1_0_WHITE_DOWN*jac_affine_2_1_WHITE_DOWN - jac_affine_1_0_WHITE_DOWN*tmp_coords_jac_16_WHITE_DOWN + jac_affine_2_0_WHITE_DOWN*tmp_coords_jac_15_WHITE_DOWN - jac_affine_2_0_WHITE_DOWN*tmp_coords_jac_17_WHITE_DOWN; + const real_t tmp_coords_jac_19_WHITE_DOWN = 1.0 / (tmp_coords_jac_18_WHITE_DOWN); + const real_t jac_affine_inv_0_0_WHITE_DOWN = tmp_coords_jac_19_WHITE_DOWN*(tmp_coords_jac_13_WHITE_DOWN - tmp_coords_jac_14_WHITE_DOWN); + const real_t jac_affine_inv_0_1_WHITE_DOWN = tmp_coords_jac_19_WHITE_DOWN*(jac_affine_0_2_WHITE_DOWN*jac_affine_2_1_WHITE_DOWN - tmp_coords_jac_16_WHITE_DOWN); + const real_t jac_affine_inv_0_2_WHITE_DOWN = tmp_coords_jac_19_WHITE_DOWN*(tmp_coords_jac_15_WHITE_DOWN - tmp_coords_jac_17_WHITE_DOWN); + const real_t jac_affine_inv_1_0_WHITE_DOWN = tmp_coords_jac_19_WHITE_DOWN*(-jac_affine_1_0_WHITE_DOWN*jac_affine_2_2_WHITE_DOWN + jac_affine_1_2_WHITE_DOWN*jac_affine_2_0_WHITE_DOWN); + const real_t jac_affine_inv_1_1_WHITE_DOWN = tmp_coords_jac_19_WHITE_DOWN*(jac_affine_0_0_WHITE_DOWN*jac_affine_2_2_WHITE_DOWN - jac_affine_0_2_WHITE_DOWN*jac_affine_2_0_WHITE_DOWN); + const real_t jac_affine_inv_1_2_WHITE_DOWN = tmp_coords_jac_19_WHITE_DOWN*(-jac_affine_0_0_WHITE_DOWN*jac_affine_1_2_WHITE_DOWN + jac_affine_0_2_WHITE_DOWN*jac_affine_1_0_WHITE_DOWN); + const real_t jac_affine_inv_2_0_WHITE_DOWN = tmp_coords_jac_19_WHITE_DOWN*(jac_affine_1_0_WHITE_DOWN*jac_affine_2_1_WHITE_DOWN - jac_affine_1_1_WHITE_DOWN*jac_affine_2_0_WHITE_DOWN); + const real_t jac_affine_inv_2_1_WHITE_DOWN = tmp_coords_jac_19_WHITE_DOWN*(-jac_affine_0_0_WHITE_DOWN*jac_affine_2_1_WHITE_DOWN + jac_affine_0_1_WHITE_DOWN*jac_affine_2_0_WHITE_DOWN); + const real_t jac_affine_inv_2_2_WHITE_DOWN = tmp_coords_jac_19_WHITE_DOWN*(jac_affine_0_0_WHITE_DOWN*jac_affine_1_1_WHITE_DOWN - jac_affine_0_1_WHITE_DOWN*jac_affine_1_0_WHITE_DOWN); + const real_t abs_det_jac_affine_WHITE_DOWN = abs(tmp_coords_jac_18_WHITE_DOWN); { /* CellType.WHITE_DOWN */ - const real_t tmp_coords_jac_0_WHITE_DOWN = 1.0 / (micro_edges_per_macro_edge_float)*1.0; - const real_t tmp_coords_jac_1_WHITE_DOWN = tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0); - const real_t tmp_coords_jac_2_WHITE_DOWN = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0); - const real_t tmp_coords_jac_3_WHITE_DOWN = tmp_coords_jac_1_WHITE_DOWN + tmp_coords_jac_2_WHITE_DOWN; - const real_t tmp_coords_jac_4_WHITE_DOWN = tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1); - const real_t tmp_coords_jac_5_WHITE_DOWN = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1); - const real_t tmp_coords_jac_6_WHITE_DOWN = tmp_coords_jac_4_WHITE_DOWN + tmp_coords_jac_5_WHITE_DOWN; - const real_t tmp_coords_jac_7_WHITE_DOWN = tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2); - const real_t tmp_coords_jac_8_WHITE_DOWN = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2); - const real_t tmp_coords_jac_9_WHITE_DOWN = tmp_coords_jac_7_WHITE_DOWN + tmp_coords_jac_8_WHITE_DOWN; - const real_t tmp_coords_jac_10_WHITE_DOWN = tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0); - const real_t tmp_coords_jac_11_WHITE_DOWN = tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1); - const real_t tmp_coords_jac_12_WHITE_DOWN = tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2); - const real_t p_affine_const_0_0_WHITE_DOWN = tmp_coords_jac_3_WHITE_DOWN; - const real_t p_affine_const_0_1_WHITE_DOWN = tmp_coords_jac_6_WHITE_DOWN; - const real_t p_affine_const_0_2_WHITE_DOWN = tmp_coords_jac_9_WHITE_DOWN; - const real_t p_affine_const_1_0_WHITE_DOWN = tmp_coords_jac_10_WHITE_DOWN + tmp_coords_jac_2_WHITE_DOWN; - const real_t p_affine_const_1_1_WHITE_DOWN = tmp_coords_jac_11_WHITE_DOWN + tmp_coords_jac_5_WHITE_DOWN; - const real_t p_affine_const_1_2_WHITE_DOWN = tmp_coords_jac_12_WHITE_DOWN + tmp_coords_jac_8_WHITE_DOWN; - const real_t p_affine_const_2_0_WHITE_DOWN = macro_vertex_coord_id_0comp0 + tmp_coords_jac_10_WHITE_DOWN + tmp_coords_jac_1_WHITE_DOWN; - const real_t p_affine_const_2_1_WHITE_DOWN = macro_vertex_coord_id_0comp1 + tmp_coords_jac_11_WHITE_DOWN + tmp_coords_jac_4_WHITE_DOWN; - const real_t p_affine_const_2_2_WHITE_DOWN = macro_vertex_coord_id_0comp2 + tmp_coords_jac_12_WHITE_DOWN + tmp_coords_jac_7_WHITE_DOWN; - const real_t p_affine_const_3_0_WHITE_DOWN = tmp_coords_jac_10_WHITE_DOWN + tmp_coords_jac_3_WHITE_DOWN; - const real_t p_affine_const_3_1_WHITE_DOWN = tmp_coords_jac_11_WHITE_DOWN + tmp_coords_jac_6_WHITE_DOWN; - const real_t p_affine_const_3_2_WHITE_DOWN = tmp_coords_jac_12_WHITE_DOWN + tmp_coords_jac_9_WHITE_DOWN; - const real_t jac_affine_0_0_WHITE_DOWN = -p_affine_const_0_0_WHITE_DOWN + p_affine_const_1_0_WHITE_DOWN; - const real_t jac_affine_0_1_WHITE_DOWN = -p_affine_const_0_0_WHITE_DOWN + p_affine_const_2_0_WHITE_DOWN; - const real_t jac_affine_0_2_WHITE_DOWN = -p_affine_const_0_0_WHITE_DOWN + p_affine_const_3_0_WHITE_DOWN; - const real_t jac_affine_1_0_WHITE_DOWN = -p_affine_const_0_1_WHITE_DOWN + p_affine_const_1_1_WHITE_DOWN; - const real_t jac_affine_1_1_WHITE_DOWN = -p_affine_const_0_1_WHITE_DOWN + p_affine_const_2_1_WHITE_DOWN; - const real_t tmp_coords_jac_17_WHITE_DOWN = jac_affine_0_2_WHITE_DOWN*jac_affine_1_1_WHITE_DOWN; - const real_t jac_affine_1_2_WHITE_DOWN = -p_affine_const_0_1_WHITE_DOWN + p_affine_const_3_1_WHITE_DOWN; - const real_t tmp_coords_jac_15_WHITE_DOWN = jac_affine_0_1_WHITE_DOWN*jac_affine_1_2_WHITE_DOWN; - const real_t jac_affine_2_0_WHITE_DOWN = -p_affine_const_0_2_WHITE_DOWN + p_affine_const_1_2_WHITE_DOWN; - const real_t jac_affine_2_1_WHITE_DOWN = -p_affine_const_0_2_WHITE_DOWN + p_affine_const_2_2_WHITE_DOWN; - const real_t tmp_coords_jac_14_WHITE_DOWN = jac_affine_1_2_WHITE_DOWN*jac_affine_2_1_WHITE_DOWN; - const real_t jac_affine_2_2_WHITE_DOWN = -p_affine_const_0_2_WHITE_DOWN + p_affine_const_3_2_WHITE_DOWN; - const real_t tmp_coords_jac_13_WHITE_DOWN = jac_affine_1_1_WHITE_DOWN*jac_affine_2_2_WHITE_DOWN; - const real_t tmp_coords_jac_16_WHITE_DOWN = jac_affine_0_1_WHITE_DOWN*jac_affine_2_2_WHITE_DOWN; - const real_t tmp_coords_jac_18_WHITE_DOWN = 1.0 / (jac_affine_0_0_WHITE_DOWN*tmp_coords_jac_13_WHITE_DOWN - jac_affine_0_0_WHITE_DOWN*tmp_coords_jac_14_WHITE_DOWN + jac_affine_0_2_WHITE_DOWN*jac_affine_1_0_WHITE_DOWN*jac_affine_2_1_WHITE_DOWN - jac_affine_1_0_WHITE_DOWN*tmp_coords_jac_16_WHITE_DOWN + jac_affine_2_0_WHITE_DOWN*tmp_coords_jac_15_WHITE_DOWN - jac_affine_2_0_WHITE_DOWN*tmp_coords_jac_17_WHITE_DOWN); - const real_t jac_affine_inv_0_0_WHITE_DOWN = tmp_coords_jac_18_WHITE_DOWN*(tmp_coords_jac_13_WHITE_DOWN - tmp_coords_jac_14_WHITE_DOWN); - const real_t jac_affine_inv_0_1_WHITE_DOWN = tmp_coords_jac_18_WHITE_DOWN*(jac_affine_0_2_WHITE_DOWN*jac_affine_2_1_WHITE_DOWN - tmp_coords_jac_16_WHITE_DOWN); - const real_t jac_affine_inv_0_2_WHITE_DOWN = tmp_coords_jac_18_WHITE_DOWN*(tmp_coords_jac_15_WHITE_DOWN - tmp_coords_jac_17_WHITE_DOWN); - const real_t jac_affine_inv_1_0_WHITE_DOWN = tmp_coords_jac_18_WHITE_DOWN*(-jac_affine_1_0_WHITE_DOWN*jac_affine_2_2_WHITE_DOWN + jac_affine_1_2_WHITE_DOWN*jac_affine_2_0_WHITE_DOWN); - const real_t jac_affine_inv_1_1_WHITE_DOWN = tmp_coords_jac_18_WHITE_DOWN*(jac_affine_0_0_WHITE_DOWN*jac_affine_2_2_WHITE_DOWN - jac_affine_0_2_WHITE_DOWN*jac_affine_2_0_WHITE_DOWN); - const real_t jac_affine_inv_1_2_WHITE_DOWN = tmp_coords_jac_18_WHITE_DOWN*(-jac_affine_0_0_WHITE_DOWN*jac_affine_1_2_WHITE_DOWN + jac_affine_0_2_WHITE_DOWN*jac_affine_1_0_WHITE_DOWN); - const real_t jac_affine_inv_2_0_WHITE_DOWN = tmp_coords_jac_18_WHITE_DOWN*(jac_affine_1_0_WHITE_DOWN*jac_affine_2_1_WHITE_DOWN - jac_affine_1_1_WHITE_DOWN*jac_affine_2_0_WHITE_DOWN); - const real_t jac_affine_inv_2_1_WHITE_DOWN = tmp_coords_jac_18_WHITE_DOWN*(-jac_affine_0_0_WHITE_DOWN*jac_affine_2_1_WHITE_DOWN + jac_affine_0_1_WHITE_DOWN*jac_affine_2_0_WHITE_DOWN); - const real_t jac_affine_inv_2_2_WHITE_DOWN = tmp_coords_jac_18_WHITE_DOWN*(jac_affine_0_0_WHITE_DOWN*jac_affine_1_1_WHITE_DOWN - jac_affine_0_1_WHITE_DOWN*jac_affine_1_0_WHITE_DOWN); for (int64_t ctr_2 = 0; ctr_2 < micro_edges_per_macro_edge; ctr_2 += 1) for (int64_t ctr_1 = 0; ctr_1 < -ctr_2 + micro_edges_per_macro_edge; ctr_1 += 1) for (int64_t ctr_0 = 0; ctr_0 < -ctr_1 - ctr_2 + micro_edges_per_macro_edge - 2; ctr_0 += 1) { - const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2)); - const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2)); - const real_t p_affine_0_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2)); - const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2 + 1)); - const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2 + 1)); - const real_t p_affine_1_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2 + 1)); - const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2 + 1)); - const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2 + 1)); - const real_t p_affine_2_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2 + 1)); - const real_t p_affine_3_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2 + 1)); - const real_t p_affine_3_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2 + 1)); - const real_t p_affine_3_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2 + 1)); + + const int64_t phantom_ctr_0 = ctr_0; + real_t _data_float_loop_ctr_array_dim_0[4]; + _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0; + _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1; + _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2; + _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3; + real_t _data_float_loop_ctr_array_dim_1[4]; + _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1; + real_t _data_float_loop_ctr_array_dim_2[4]; + _data_float_loop_ctr_array_dim_2[0] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[1] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[2] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[3] = (real_t) ctr_2; + + const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_0_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_1_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_2_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_3_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_3_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_3_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; const real_t src_dof_0 = _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]; const real_t src_dof_1 = _data_src_vertex_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]; const real_t src_dof_2 = _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]; @@ -859,7 +903,7 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotU::apply_macro_3D( real_t * RESTRICT const real_t tmp_qloop_29 = -tmp_qloop_28 + tmp_qloop_6 - tmp_qloop_7 - tmp_qloop_9; const real_t tmp_qloop_30 = tmp_qloop_0*4.0; const real_t tmp_qloop_31 = -tmp_qloop_11 - tmp_qloop_30 + tmp_qloop_8; - const real_t tmp_qloop_32 = 1.0 / (rho_dof_0*tmp_qloop_12 + rho_dof_1*tmp_qloop_24 + rho_dof_2*tmp_qloop_25 + rho_dof_3*tmp_qloop_26 + rho_dof_4*tmp_qloop_7 + rho_dof_5*tmp_qloop_10 + rho_dof_6*tmp_qloop_9 + rho_dof_7*tmp_qloop_27 + rho_dof_8*tmp_qloop_29 + rho_dof_9*tmp_qloop_31)*_data_q_w[q]; + const real_t tmp_qloop_32 = abs_det_jac_affine_WHITE_DOWN*1.0 / (rho_dof_0*tmp_qloop_12 + rho_dof_1*tmp_qloop_24 + rho_dof_2*tmp_qloop_25 + rho_dof_3*tmp_qloop_26 + rho_dof_4*tmp_qloop_7 + rho_dof_5*tmp_qloop_10 + rho_dof_6*tmp_qloop_9 + rho_dof_7*tmp_qloop_27 + rho_dof_8*tmp_qloop_29 + rho_dof_9*tmp_qloop_31)*_data_q_w[q]; const real_t tmp_qloop_33 = tmp_qloop_32*(1.0 - _data_q_p_0[q] - _data_q_p_1[q] - _data_q_p_2[q]); const real_t tmp_qloop_34 = tmp_qloop_23*tmp_qloop_33; const real_t tmp_qloop_35 = jac_affine_inv_0_1_WHITE_DOWN*tmp_qloop_18 + jac_affine_inv_1_1_WHITE_DOWN*tmp_qloop_21 + jac_affine_inv_2_1_WHITE_DOWN*tmp_qloop_22; @@ -1139,67 +1183,87 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotU::apply_macro_3D( real_t * RESTRICT _data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1] = elMatVec_3 + _data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]; } } + const real_t tmp_coords_jac_0_BLUE_UP = 1.0 / (micro_edges_per_macro_edge_float)*1.0; + const real_t tmp_coords_jac_1_BLUE_UP = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0); + const real_t tmp_coords_jac_2_BLUE_UP = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1); + const real_t tmp_coords_jac_3_BLUE_UP = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2); + const real_t tmp_coords_jac_4_BLUE_UP = tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0); + const real_t tmp_coords_jac_5_BLUE_UP = tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1); + const real_t tmp_coords_jac_6_BLUE_UP = tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2); + const real_t p_affine_const_0_0_BLUE_UP = tmp_coords_jac_1_BLUE_UP; + const real_t p_affine_const_0_1_BLUE_UP = tmp_coords_jac_2_BLUE_UP; + const real_t p_affine_const_0_2_BLUE_UP = tmp_coords_jac_3_BLUE_UP; + const real_t p_affine_const_1_0_BLUE_UP = macro_vertex_coord_id_0comp0 + tmp_coords_jac_4_BLUE_UP; + const real_t p_affine_const_1_1_BLUE_UP = macro_vertex_coord_id_0comp1 + tmp_coords_jac_5_BLUE_UP; + const real_t p_affine_const_1_2_BLUE_UP = macro_vertex_coord_id_0comp2 + tmp_coords_jac_6_BLUE_UP; + const real_t p_affine_const_2_0_BLUE_UP = tmp_coords_jac_1_BLUE_UP + tmp_coords_jac_4_BLUE_UP; + const real_t p_affine_const_2_1_BLUE_UP = tmp_coords_jac_2_BLUE_UP + tmp_coords_jac_5_BLUE_UP; + const real_t p_affine_const_2_2_BLUE_UP = tmp_coords_jac_3_BLUE_UP + tmp_coords_jac_6_BLUE_UP; + const real_t p_affine_const_3_0_BLUE_UP = tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0) + tmp_coords_jac_1_BLUE_UP; + const real_t p_affine_const_3_1_BLUE_UP = tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1) + tmp_coords_jac_2_BLUE_UP; + const real_t p_affine_const_3_2_BLUE_UP = tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2) + tmp_coords_jac_3_BLUE_UP; + const real_t jac_affine_0_0_BLUE_UP = -p_affine_const_0_0_BLUE_UP + p_affine_const_1_0_BLUE_UP; + const real_t jac_affine_0_1_BLUE_UP = -p_affine_const_0_0_BLUE_UP + p_affine_const_2_0_BLUE_UP; + const real_t jac_affine_0_2_BLUE_UP = -p_affine_const_0_0_BLUE_UP + p_affine_const_3_0_BLUE_UP; + const real_t jac_affine_1_0_BLUE_UP = -p_affine_const_0_1_BLUE_UP + p_affine_const_1_1_BLUE_UP; + const real_t jac_affine_1_1_BLUE_UP = -p_affine_const_0_1_BLUE_UP + p_affine_const_2_1_BLUE_UP; + const real_t tmp_coords_jac_11_BLUE_UP = jac_affine_0_2_BLUE_UP*jac_affine_1_1_BLUE_UP; + const real_t jac_affine_1_2_BLUE_UP = -p_affine_const_0_1_BLUE_UP + p_affine_const_3_1_BLUE_UP; + const real_t tmp_coords_jac_9_BLUE_UP = jac_affine_0_1_BLUE_UP*jac_affine_1_2_BLUE_UP; + const real_t jac_affine_2_0_BLUE_UP = -p_affine_const_0_2_BLUE_UP + p_affine_const_1_2_BLUE_UP; + const real_t jac_affine_2_1_BLUE_UP = -p_affine_const_0_2_BLUE_UP + p_affine_const_2_2_BLUE_UP; + const real_t tmp_coords_jac_8_BLUE_UP = jac_affine_1_2_BLUE_UP*jac_affine_2_1_BLUE_UP; + const real_t jac_affine_2_2_BLUE_UP = -p_affine_const_0_2_BLUE_UP + p_affine_const_3_2_BLUE_UP; + const real_t tmp_coords_jac_7_BLUE_UP = jac_affine_1_1_BLUE_UP*jac_affine_2_2_BLUE_UP; + const real_t tmp_coords_jac_10_BLUE_UP = jac_affine_0_1_BLUE_UP*jac_affine_2_2_BLUE_UP; + const real_t tmp_coords_jac_12_BLUE_UP = jac_affine_0_0_BLUE_UP*tmp_coords_jac_7_BLUE_UP - jac_affine_0_0_BLUE_UP*tmp_coords_jac_8_BLUE_UP + jac_affine_0_2_BLUE_UP*jac_affine_1_0_BLUE_UP*jac_affine_2_1_BLUE_UP - jac_affine_1_0_BLUE_UP*tmp_coords_jac_10_BLUE_UP - jac_affine_2_0_BLUE_UP*tmp_coords_jac_11_BLUE_UP + jac_affine_2_0_BLUE_UP*tmp_coords_jac_9_BLUE_UP; + const real_t tmp_coords_jac_13_BLUE_UP = 1.0 / (tmp_coords_jac_12_BLUE_UP); + const real_t jac_affine_inv_0_0_BLUE_UP = tmp_coords_jac_13_BLUE_UP*(tmp_coords_jac_7_BLUE_UP - tmp_coords_jac_8_BLUE_UP); + const real_t jac_affine_inv_0_1_BLUE_UP = tmp_coords_jac_13_BLUE_UP*(jac_affine_0_2_BLUE_UP*jac_affine_2_1_BLUE_UP - tmp_coords_jac_10_BLUE_UP); + const real_t jac_affine_inv_0_2_BLUE_UP = tmp_coords_jac_13_BLUE_UP*(-tmp_coords_jac_11_BLUE_UP + tmp_coords_jac_9_BLUE_UP); + const real_t jac_affine_inv_1_0_BLUE_UP = tmp_coords_jac_13_BLUE_UP*(-jac_affine_1_0_BLUE_UP*jac_affine_2_2_BLUE_UP + jac_affine_1_2_BLUE_UP*jac_affine_2_0_BLUE_UP); + const real_t jac_affine_inv_1_1_BLUE_UP = tmp_coords_jac_13_BLUE_UP*(jac_affine_0_0_BLUE_UP*jac_affine_2_2_BLUE_UP - jac_affine_0_2_BLUE_UP*jac_affine_2_0_BLUE_UP); + const real_t jac_affine_inv_1_2_BLUE_UP = tmp_coords_jac_13_BLUE_UP*(-jac_affine_0_0_BLUE_UP*jac_affine_1_2_BLUE_UP + jac_affine_0_2_BLUE_UP*jac_affine_1_0_BLUE_UP); + const real_t jac_affine_inv_2_0_BLUE_UP = tmp_coords_jac_13_BLUE_UP*(jac_affine_1_0_BLUE_UP*jac_affine_2_1_BLUE_UP - jac_affine_1_1_BLUE_UP*jac_affine_2_0_BLUE_UP); + const real_t jac_affine_inv_2_1_BLUE_UP = tmp_coords_jac_13_BLUE_UP*(-jac_affine_0_0_BLUE_UP*jac_affine_2_1_BLUE_UP + jac_affine_0_1_BLUE_UP*jac_affine_2_0_BLUE_UP); + const real_t jac_affine_inv_2_2_BLUE_UP = tmp_coords_jac_13_BLUE_UP*(jac_affine_0_0_BLUE_UP*jac_affine_1_1_BLUE_UP - jac_affine_0_1_BLUE_UP*jac_affine_1_0_BLUE_UP); + const real_t abs_det_jac_affine_BLUE_UP = abs(tmp_coords_jac_12_BLUE_UP); { /* CellType.BLUE_UP */ - const real_t tmp_coords_jac_0_BLUE_UP = 1.0 / (micro_edges_per_macro_edge_float)*1.0; - const real_t tmp_coords_jac_1_BLUE_UP = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0); - const real_t tmp_coords_jac_2_BLUE_UP = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1); - const real_t tmp_coords_jac_3_BLUE_UP = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2); - const real_t tmp_coords_jac_4_BLUE_UP = tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0); - const real_t tmp_coords_jac_5_BLUE_UP = tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1); - const real_t tmp_coords_jac_6_BLUE_UP = tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2); - const real_t p_affine_const_0_0_BLUE_UP = tmp_coords_jac_1_BLUE_UP; - const real_t p_affine_const_0_1_BLUE_UP = tmp_coords_jac_2_BLUE_UP; - const real_t p_affine_const_0_2_BLUE_UP = tmp_coords_jac_3_BLUE_UP; - const real_t p_affine_const_1_0_BLUE_UP = macro_vertex_coord_id_0comp0 + tmp_coords_jac_4_BLUE_UP; - const real_t p_affine_const_1_1_BLUE_UP = macro_vertex_coord_id_0comp1 + tmp_coords_jac_5_BLUE_UP; - const real_t p_affine_const_1_2_BLUE_UP = macro_vertex_coord_id_0comp2 + tmp_coords_jac_6_BLUE_UP; - const real_t p_affine_const_2_0_BLUE_UP = tmp_coords_jac_1_BLUE_UP + tmp_coords_jac_4_BLUE_UP; - const real_t p_affine_const_2_1_BLUE_UP = tmp_coords_jac_2_BLUE_UP + tmp_coords_jac_5_BLUE_UP; - const real_t p_affine_const_2_2_BLUE_UP = tmp_coords_jac_3_BLUE_UP + tmp_coords_jac_6_BLUE_UP; - const real_t p_affine_const_3_0_BLUE_UP = tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0) + tmp_coords_jac_1_BLUE_UP; - const real_t p_affine_const_3_1_BLUE_UP = tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1) + tmp_coords_jac_2_BLUE_UP; - const real_t p_affine_const_3_2_BLUE_UP = tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2) + tmp_coords_jac_3_BLUE_UP; - const real_t jac_affine_0_0_BLUE_UP = -p_affine_const_0_0_BLUE_UP + p_affine_const_1_0_BLUE_UP; - const real_t jac_affine_0_1_BLUE_UP = -p_affine_const_0_0_BLUE_UP + p_affine_const_2_0_BLUE_UP; - const real_t jac_affine_0_2_BLUE_UP = -p_affine_const_0_0_BLUE_UP + p_affine_const_3_0_BLUE_UP; - const real_t jac_affine_1_0_BLUE_UP = -p_affine_const_0_1_BLUE_UP + p_affine_const_1_1_BLUE_UP; - const real_t jac_affine_1_1_BLUE_UP = -p_affine_const_0_1_BLUE_UP + p_affine_const_2_1_BLUE_UP; - const real_t tmp_coords_jac_11_BLUE_UP = jac_affine_0_2_BLUE_UP*jac_affine_1_1_BLUE_UP; - const real_t jac_affine_1_2_BLUE_UP = -p_affine_const_0_1_BLUE_UP + p_affine_const_3_1_BLUE_UP; - const real_t tmp_coords_jac_9_BLUE_UP = jac_affine_0_1_BLUE_UP*jac_affine_1_2_BLUE_UP; - const real_t jac_affine_2_0_BLUE_UP = -p_affine_const_0_2_BLUE_UP + p_affine_const_1_2_BLUE_UP; - const real_t jac_affine_2_1_BLUE_UP = -p_affine_const_0_2_BLUE_UP + p_affine_const_2_2_BLUE_UP; - const real_t tmp_coords_jac_8_BLUE_UP = jac_affine_1_2_BLUE_UP*jac_affine_2_1_BLUE_UP; - const real_t jac_affine_2_2_BLUE_UP = -p_affine_const_0_2_BLUE_UP + p_affine_const_3_2_BLUE_UP; - const real_t tmp_coords_jac_7_BLUE_UP = jac_affine_1_1_BLUE_UP*jac_affine_2_2_BLUE_UP; - const real_t tmp_coords_jac_10_BLUE_UP = jac_affine_0_1_BLUE_UP*jac_affine_2_2_BLUE_UP; - const real_t tmp_coords_jac_12_BLUE_UP = 1.0 / (jac_affine_0_0_BLUE_UP*tmp_coords_jac_7_BLUE_UP - jac_affine_0_0_BLUE_UP*tmp_coords_jac_8_BLUE_UP + jac_affine_0_2_BLUE_UP*jac_affine_1_0_BLUE_UP*jac_affine_2_1_BLUE_UP - jac_affine_1_0_BLUE_UP*tmp_coords_jac_10_BLUE_UP - jac_affine_2_0_BLUE_UP*tmp_coords_jac_11_BLUE_UP + jac_affine_2_0_BLUE_UP*tmp_coords_jac_9_BLUE_UP); - const real_t jac_affine_inv_0_0_BLUE_UP = tmp_coords_jac_12_BLUE_UP*(tmp_coords_jac_7_BLUE_UP - tmp_coords_jac_8_BLUE_UP); - const real_t jac_affine_inv_0_1_BLUE_UP = tmp_coords_jac_12_BLUE_UP*(jac_affine_0_2_BLUE_UP*jac_affine_2_1_BLUE_UP - tmp_coords_jac_10_BLUE_UP); - const real_t jac_affine_inv_0_2_BLUE_UP = tmp_coords_jac_12_BLUE_UP*(-tmp_coords_jac_11_BLUE_UP + tmp_coords_jac_9_BLUE_UP); - const real_t jac_affine_inv_1_0_BLUE_UP = tmp_coords_jac_12_BLUE_UP*(-jac_affine_1_0_BLUE_UP*jac_affine_2_2_BLUE_UP + jac_affine_1_2_BLUE_UP*jac_affine_2_0_BLUE_UP); - const real_t jac_affine_inv_1_1_BLUE_UP = tmp_coords_jac_12_BLUE_UP*(jac_affine_0_0_BLUE_UP*jac_affine_2_2_BLUE_UP - jac_affine_0_2_BLUE_UP*jac_affine_2_0_BLUE_UP); - const real_t jac_affine_inv_1_2_BLUE_UP = tmp_coords_jac_12_BLUE_UP*(-jac_affine_0_0_BLUE_UP*jac_affine_1_2_BLUE_UP + jac_affine_0_2_BLUE_UP*jac_affine_1_0_BLUE_UP); - const real_t jac_affine_inv_2_0_BLUE_UP = tmp_coords_jac_12_BLUE_UP*(jac_affine_1_0_BLUE_UP*jac_affine_2_1_BLUE_UP - jac_affine_1_1_BLUE_UP*jac_affine_2_0_BLUE_UP); - const real_t jac_affine_inv_2_1_BLUE_UP = tmp_coords_jac_12_BLUE_UP*(-jac_affine_0_0_BLUE_UP*jac_affine_2_1_BLUE_UP + jac_affine_0_1_BLUE_UP*jac_affine_2_0_BLUE_UP); - const real_t jac_affine_inv_2_2_BLUE_UP = tmp_coords_jac_12_BLUE_UP*(jac_affine_0_0_BLUE_UP*jac_affine_1_1_BLUE_UP - jac_affine_0_1_BLUE_UP*jac_affine_1_0_BLUE_UP); for (int64_t ctr_2 = 0; ctr_2 < micro_edges_per_macro_edge; ctr_2 += 1) for (int64_t ctr_1 = 0; ctr_1 < -ctr_2 + micro_edges_per_macro_edge; ctr_1 += 1) for (int64_t ctr_0 = 0; ctr_0 < -ctr_1 - ctr_2 + micro_edges_per_macro_edge - 1; ctr_0 += 1) { - const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2)); - const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2)); - const real_t p_affine_0_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2)); - const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2)); - const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2)); - const real_t p_affine_1_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2)); - const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2)); - const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2)); - const real_t p_affine_2_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2)); - const real_t p_affine_3_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2 + 1)); - const real_t p_affine_3_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2 + 1)); - const real_t p_affine_3_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2 + 1)); + + const int64_t phantom_ctr_0 = ctr_0; + real_t _data_float_loop_ctr_array_dim_0[4]; + _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0; + _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1; + _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2; + _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3; + real_t _data_float_loop_ctr_array_dim_1[4]; + _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1; + real_t _data_float_loop_ctr_array_dim_2[4]; + _data_float_loop_ctr_array_dim_2[0] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[1] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[2] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[3] = (real_t) ctr_2; + + const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_0_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_2_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_3_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_3_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_3_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; const real_t src_dof_0 = _data_src_vertex_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]; const real_t src_dof_1 = _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]; const real_t src_dof_2 = _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]; @@ -1394,7 +1458,7 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotU::apply_macro_3D( real_t * RESTRICT const real_t tmp_qloop_29 = -tmp_qloop_28 + tmp_qloop_6 - tmp_qloop_7 - tmp_qloop_9; const real_t tmp_qloop_30 = tmp_qloop_0*4.0; const real_t tmp_qloop_31 = -tmp_qloop_11 - tmp_qloop_30 + tmp_qloop_8; - const real_t tmp_qloop_32 = 1.0 / (rho_dof_0*tmp_qloop_12 + rho_dof_1*tmp_qloop_24 + rho_dof_2*tmp_qloop_25 + rho_dof_3*tmp_qloop_26 + rho_dof_4*tmp_qloop_7 + rho_dof_5*tmp_qloop_10 + rho_dof_6*tmp_qloop_9 + rho_dof_7*tmp_qloop_27 + rho_dof_8*tmp_qloop_29 + rho_dof_9*tmp_qloop_31)*_data_q_w[q]; + const real_t tmp_qloop_32 = abs_det_jac_affine_BLUE_UP*1.0 / (rho_dof_0*tmp_qloop_12 + rho_dof_1*tmp_qloop_24 + rho_dof_2*tmp_qloop_25 + rho_dof_3*tmp_qloop_26 + rho_dof_4*tmp_qloop_7 + rho_dof_5*tmp_qloop_10 + rho_dof_6*tmp_qloop_9 + rho_dof_7*tmp_qloop_27 + rho_dof_8*tmp_qloop_29 + rho_dof_9*tmp_qloop_31)*_data_q_w[q]; const real_t tmp_qloop_33 = tmp_qloop_32*(1.0 - _data_q_p_0[q] - _data_q_p_1[q] - _data_q_p_2[q]); const real_t tmp_qloop_34 = tmp_qloop_23*tmp_qloop_33; const real_t tmp_qloop_35 = jac_affine_inv_0_1_BLUE_UP*tmp_qloop_18 + jac_affine_inv_1_1_BLUE_UP*tmp_qloop_21 + jac_affine_inv_2_1_BLUE_UP*tmp_qloop_22; @@ -1674,70 +1738,90 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotU::apply_macro_3D( real_t * RESTRICT _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1] = elMatVec_3 + _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]; } } + const real_t tmp_coords_jac_0_BLUE_DOWN = 1.0 / (micro_edges_per_macro_edge_float)*1.0; + const real_t tmp_coords_jac_1_BLUE_DOWN = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0); + const real_t tmp_coords_jac_2_BLUE_DOWN = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1); + const real_t tmp_coords_jac_3_BLUE_DOWN = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2); + const real_t tmp_coords_jac_4_BLUE_DOWN = tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0); + const real_t tmp_coords_jac_5_BLUE_DOWN = macro_vertex_coord_id_0comp0 + tmp_coords_jac_4_BLUE_DOWN; + const real_t tmp_coords_jac_6_BLUE_DOWN = tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1); + const real_t tmp_coords_jac_7_BLUE_DOWN = macro_vertex_coord_id_0comp1 + tmp_coords_jac_6_BLUE_DOWN; + const real_t tmp_coords_jac_8_BLUE_DOWN = tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2); + const real_t tmp_coords_jac_9_BLUE_DOWN = macro_vertex_coord_id_0comp2 + tmp_coords_jac_8_BLUE_DOWN; + const real_t p_affine_const_0_0_BLUE_DOWN = tmp_coords_jac_1_BLUE_DOWN; + const real_t p_affine_const_0_1_BLUE_DOWN = tmp_coords_jac_2_BLUE_DOWN; + const real_t p_affine_const_0_2_BLUE_DOWN = tmp_coords_jac_3_BLUE_DOWN; + const real_t p_affine_const_1_0_BLUE_DOWN = tmp_coords_jac_5_BLUE_DOWN; + const real_t p_affine_const_1_1_BLUE_DOWN = tmp_coords_jac_7_BLUE_DOWN; + const real_t p_affine_const_1_2_BLUE_DOWN = tmp_coords_jac_9_BLUE_DOWN; + const real_t p_affine_const_2_0_BLUE_DOWN = tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0) + tmp_coords_jac_5_BLUE_DOWN; + const real_t p_affine_const_2_1_BLUE_DOWN = tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1) + tmp_coords_jac_7_BLUE_DOWN; + const real_t p_affine_const_2_2_BLUE_DOWN = tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2) + tmp_coords_jac_9_BLUE_DOWN; + const real_t p_affine_const_3_0_BLUE_DOWN = tmp_coords_jac_1_BLUE_DOWN + tmp_coords_jac_4_BLUE_DOWN; + const real_t p_affine_const_3_1_BLUE_DOWN = tmp_coords_jac_2_BLUE_DOWN + tmp_coords_jac_6_BLUE_DOWN; + const real_t p_affine_const_3_2_BLUE_DOWN = tmp_coords_jac_3_BLUE_DOWN + tmp_coords_jac_8_BLUE_DOWN; + const real_t jac_affine_0_0_BLUE_DOWN = -p_affine_const_0_0_BLUE_DOWN + p_affine_const_1_0_BLUE_DOWN; + const real_t jac_affine_0_1_BLUE_DOWN = -p_affine_const_0_0_BLUE_DOWN + p_affine_const_2_0_BLUE_DOWN; + const real_t jac_affine_0_2_BLUE_DOWN = -p_affine_const_0_0_BLUE_DOWN + p_affine_const_3_0_BLUE_DOWN; + const real_t jac_affine_1_0_BLUE_DOWN = -p_affine_const_0_1_BLUE_DOWN + p_affine_const_1_1_BLUE_DOWN; + const real_t jac_affine_1_1_BLUE_DOWN = -p_affine_const_0_1_BLUE_DOWN + p_affine_const_2_1_BLUE_DOWN; + const real_t tmp_coords_jac_14_BLUE_DOWN = jac_affine_0_2_BLUE_DOWN*jac_affine_1_1_BLUE_DOWN; + const real_t jac_affine_1_2_BLUE_DOWN = -p_affine_const_0_1_BLUE_DOWN + p_affine_const_3_1_BLUE_DOWN; + const real_t tmp_coords_jac_12_BLUE_DOWN = jac_affine_0_1_BLUE_DOWN*jac_affine_1_2_BLUE_DOWN; + const real_t jac_affine_2_0_BLUE_DOWN = -p_affine_const_0_2_BLUE_DOWN + p_affine_const_1_2_BLUE_DOWN; + const real_t jac_affine_2_1_BLUE_DOWN = -p_affine_const_0_2_BLUE_DOWN + p_affine_const_2_2_BLUE_DOWN; + const real_t tmp_coords_jac_11_BLUE_DOWN = jac_affine_1_2_BLUE_DOWN*jac_affine_2_1_BLUE_DOWN; + const real_t jac_affine_2_2_BLUE_DOWN = -p_affine_const_0_2_BLUE_DOWN + p_affine_const_3_2_BLUE_DOWN; + const real_t tmp_coords_jac_10_BLUE_DOWN = jac_affine_1_1_BLUE_DOWN*jac_affine_2_2_BLUE_DOWN; + const real_t tmp_coords_jac_13_BLUE_DOWN = jac_affine_0_1_BLUE_DOWN*jac_affine_2_2_BLUE_DOWN; + const real_t tmp_coords_jac_15_BLUE_DOWN = jac_affine_0_0_BLUE_DOWN*tmp_coords_jac_10_BLUE_DOWN - jac_affine_0_0_BLUE_DOWN*tmp_coords_jac_11_BLUE_DOWN + jac_affine_0_2_BLUE_DOWN*jac_affine_1_0_BLUE_DOWN*jac_affine_2_1_BLUE_DOWN - jac_affine_1_0_BLUE_DOWN*tmp_coords_jac_13_BLUE_DOWN + jac_affine_2_0_BLUE_DOWN*tmp_coords_jac_12_BLUE_DOWN - jac_affine_2_0_BLUE_DOWN*tmp_coords_jac_14_BLUE_DOWN; + const real_t tmp_coords_jac_16_BLUE_DOWN = 1.0 / (tmp_coords_jac_15_BLUE_DOWN); + const real_t jac_affine_inv_0_0_BLUE_DOWN = tmp_coords_jac_16_BLUE_DOWN*(tmp_coords_jac_10_BLUE_DOWN - tmp_coords_jac_11_BLUE_DOWN); + const real_t jac_affine_inv_0_1_BLUE_DOWN = tmp_coords_jac_16_BLUE_DOWN*(jac_affine_0_2_BLUE_DOWN*jac_affine_2_1_BLUE_DOWN - tmp_coords_jac_13_BLUE_DOWN); + const real_t jac_affine_inv_0_2_BLUE_DOWN = tmp_coords_jac_16_BLUE_DOWN*(tmp_coords_jac_12_BLUE_DOWN - tmp_coords_jac_14_BLUE_DOWN); + const real_t jac_affine_inv_1_0_BLUE_DOWN = tmp_coords_jac_16_BLUE_DOWN*(-jac_affine_1_0_BLUE_DOWN*jac_affine_2_2_BLUE_DOWN + jac_affine_1_2_BLUE_DOWN*jac_affine_2_0_BLUE_DOWN); + const real_t jac_affine_inv_1_1_BLUE_DOWN = tmp_coords_jac_16_BLUE_DOWN*(jac_affine_0_0_BLUE_DOWN*jac_affine_2_2_BLUE_DOWN - jac_affine_0_2_BLUE_DOWN*jac_affine_2_0_BLUE_DOWN); + const real_t jac_affine_inv_1_2_BLUE_DOWN = tmp_coords_jac_16_BLUE_DOWN*(-jac_affine_0_0_BLUE_DOWN*jac_affine_1_2_BLUE_DOWN + jac_affine_0_2_BLUE_DOWN*jac_affine_1_0_BLUE_DOWN); + const real_t jac_affine_inv_2_0_BLUE_DOWN = tmp_coords_jac_16_BLUE_DOWN*(jac_affine_1_0_BLUE_DOWN*jac_affine_2_1_BLUE_DOWN - jac_affine_1_1_BLUE_DOWN*jac_affine_2_0_BLUE_DOWN); + const real_t jac_affine_inv_2_1_BLUE_DOWN = tmp_coords_jac_16_BLUE_DOWN*(-jac_affine_0_0_BLUE_DOWN*jac_affine_2_1_BLUE_DOWN + jac_affine_0_1_BLUE_DOWN*jac_affine_2_0_BLUE_DOWN); + const real_t jac_affine_inv_2_2_BLUE_DOWN = tmp_coords_jac_16_BLUE_DOWN*(jac_affine_0_0_BLUE_DOWN*jac_affine_1_1_BLUE_DOWN - jac_affine_0_1_BLUE_DOWN*jac_affine_1_0_BLUE_DOWN); + const real_t abs_det_jac_affine_BLUE_DOWN = abs(tmp_coords_jac_15_BLUE_DOWN); { /* CellType.BLUE_DOWN */ - const real_t tmp_coords_jac_0_BLUE_DOWN = 1.0 / (micro_edges_per_macro_edge_float)*1.0; - const real_t tmp_coords_jac_1_BLUE_DOWN = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0); - const real_t tmp_coords_jac_2_BLUE_DOWN = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1); - const real_t tmp_coords_jac_3_BLUE_DOWN = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2); - const real_t tmp_coords_jac_4_BLUE_DOWN = tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0); - const real_t tmp_coords_jac_5_BLUE_DOWN = macro_vertex_coord_id_0comp0 + tmp_coords_jac_4_BLUE_DOWN; - const real_t tmp_coords_jac_6_BLUE_DOWN = tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1); - const real_t tmp_coords_jac_7_BLUE_DOWN = macro_vertex_coord_id_0comp1 + tmp_coords_jac_6_BLUE_DOWN; - const real_t tmp_coords_jac_8_BLUE_DOWN = tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2); - const real_t tmp_coords_jac_9_BLUE_DOWN = macro_vertex_coord_id_0comp2 + tmp_coords_jac_8_BLUE_DOWN; - const real_t p_affine_const_0_0_BLUE_DOWN = tmp_coords_jac_1_BLUE_DOWN; - const real_t p_affine_const_0_1_BLUE_DOWN = tmp_coords_jac_2_BLUE_DOWN; - const real_t p_affine_const_0_2_BLUE_DOWN = tmp_coords_jac_3_BLUE_DOWN; - const real_t p_affine_const_1_0_BLUE_DOWN = tmp_coords_jac_5_BLUE_DOWN; - const real_t p_affine_const_1_1_BLUE_DOWN = tmp_coords_jac_7_BLUE_DOWN; - const real_t p_affine_const_1_2_BLUE_DOWN = tmp_coords_jac_9_BLUE_DOWN; - const real_t p_affine_const_2_0_BLUE_DOWN = tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0) + tmp_coords_jac_5_BLUE_DOWN; - const real_t p_affine_const_2_1_BLUE_DOWN = tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1) + tmp_coords_jac_7_BLUE_DOWN; - const real_t p_affine_const_2_2_BLUE_DOWN = tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2) + tmp_coords_jac_9_BLUE_DOWN; - const real_t p_affine_const_3_0_BLUE_DOWN = tmp_coords_jac_1_BLUE_DOWN + tmp_coords_jac_4_BLUE_DOWN; - const real_t p_affine_const_3_1_BLUE_DOWN = tmp_coords_jac_2_BLUE_DOWN + tmp_coords_jac_6_BLUE_DOWN; - const real_t p_affine_const_3_2_BLUE_DOWN = tmp_coords_jac_3_BLUE_DOWN + tmp_coords_jac_8_BLUE_DOWN; - const real_t jac_affine_0_0_BLUE_DOWN = -p_affine_const_0_0_BLUE_DOWN + p_affine_const_1_0_BLUE_DOWN; - const real_t jac_affine_0_1_BLUE_DOWN = -p_affine_const_0_0_BLUE_DOWN + p_affine_const_2_0_BLUE_DOWN; - const real_t jac_affine_0_2_BLUE_DOWN = -p_affine_const_0_0_BLUE_DOWN + p_affine_const_3_0_BLUE_DOWN; - const real_t jac_affine_1_0_BLUE_DOWN = -p_affine_const_0_1_BLUE_DOWN + p_affine_const_1_1_BLUE_DOWN; - const real_t jac_affine_1_1_BLUE_DOWN = -p_affine_const_0_1_BLUE_DOWN + p_affine_const_2_1_BLUE_DOWN; - const real_t tmp_coords_jac_14_BLUE_DOWN = jac_affine_0_2_BLUE_DOWN*jac_affine_1_1_BLUE_DOWN; - const real_t jac_affine_1_2_BLUE_DOWN = -p_affine_const_0_1_BLUE_DOWN + p_affine_const_3_1_BLUE_DOWN; - const real_t tmp_coords_jac_12_BLUE_DOWN = jac_affine_0_1_BLUE_DOWN*jac_affine_1_2_BLUE_DOWN; - const real_t jac_affine_2_0_BLUE_DOWN = -p_affine_const_0_2_BLUE_DOWN + p_affine_const_1_2_BLUE_DOWN; - const real_t jac_affine_2_1_BLUE_DOWN = -p_affine_const_0_2_BLUE_DOWN + p_affine_const_2_2_BLUE_DOWN; - const real_t tmp_coords_jac_11_BLUE_DOWN = jac_affine_1_2_BLUE_DOWN*jac_affine_2_1_BLUE_DOWN; - const real_t jac_affine_2_2_BLUE_DOWN = -p_affine_const_0_2_BLUE_DOWN + p_affine_const_3_2_BLUE_DOWN; - const real_t tmp_coords_jac_10_BLUE_DOWN = jac_affine_1_1_BLUE_DOWN*jac_affine_2_2_BLUE_DOWN; - const real_t tmp_coords_jac_13_BLUE_DOWN = jac_affine_0_1_BLUE_DOWN*jac_affine_2_2_BLUE_DOWN; - const real_t tmp_coords_jac_15_BLUE_DOWN = 1.0 / (jac_affine_0_0_BLUE_DOWN*tmp_coords_jac_10_BLUE_DOWN - jac_affine_0_0_BLUE_DOWN*tmp_coords_jac_11_BLUE_DOWN + jac_affine_0_2_BLUE_DOWN*jac_affine_1_0_BLUE_DOWN*jac_affine_2_1_BLUE_DOWN - jac_affine_1_0_BLUE_DOWN*tmp_coords_jac_13_BLUE_DOWN + jac_affine_2_0_BLUE_DOWN*tmp_coords_jac_12_BLUE_DOWN - jac_affine_2_0_BLUE_DOWN*tmp_coords_jac_14_BLUE_DOWN); - const real_t jac_affine_inv_0_0_BLUE_DOWN = tmp_coords_jac_15_BLUE_DOWN*(tmp_coords_jac_10_BLUE_DOWN - tmp_coords_jac_11_BLUE_DOWN); - const real_t jac_affine_inv_0_1_BLUE_DOWN = tmp_coords_jac_15_BLUE_DOWN*(jac_affine_0_2_BLUE_DOWN*jac_affine_2_1_BLUE_DOWN - tmp_coords_jac_13_BLUE_DOWN); - const real_t jac_affine_inv_0_2_BLUE_DOWN = tmp_coords_jac_15_BLUE_DOWN*(tmp_coords_jac_12_BLUE_DOWN - tmp_coords_jac_14_BLUE_DOWN); - const real_t jac_affine_inv_1_0_BLUE_DOWN = tmp_coords_jac_15_BLUE_DOWN*(-jac_affine_1_0_BLUE_DOWN*jac_affine_2_2_BLUE_DOWN + jac_affine_1_2_BLUE_DOWN*jac_affine_2_0_BLUE_DOWN); - const real_t jac_affine_inv_1_1_BLUE_DOWN = tmp_coords_jac_15_BLUE_DOWN*(jac_affine_0_0_BLUE_DOWN*jac_affine_2_2_BLUE_DOWN - jac_affine_0_2_BLUE_DOWN*jac_affine_2_0_BLUE_DOWN); - const real_t jac_affine_inv_1_2_BLUE_DOWN = tmp_coords_jac_15_BLUE_DOWN*(-jac_affine_0_0_BLUE_DOWN*jac_affine_1_2_BLUE_DOWN + jac_affine_0_2_BLUE_DOWN*jac_affine_1_0_BLUE_DOWN); - const real_t jac_affine_inv_2_0_BLUE_DOWN = tmp_coords_jac_15_BLUE_DOWN*(jac_affine_1_0_BLUE_DOWN*jac_affine_2_1_BLUE_DOWN - jac_affine_1_1_BLUE_DOWN*jac_affine_2_0_BLUE_DOWN); - const real_t jac_affine_inv_2_1_BLUE_DOWN = tmp_coords_jac_15_BLUE_DOWN*(-jac_affine_0_0_BLUE_DOWN*jac_affine_2_1_BLUE_DOWN + jac_affine_0_1_BLUE_DOWN*jac_affine_2_0_BLUE_DOWN); - const real_t jac_affine_inv_2_2_BLUE_DOWN = tmp_coords_jac_15_BLUE_DOWN*(jac_affine_0_0_BLUE_DOWN*jac_affine_1_1_BLUE_DOWN - jac_affine_0_1_BLUE_DOWN*jac_affine_1_0_BLUE_DOWN); for (int64_t ctr_2 = 0; ctr_2 < micro_edges_per_macro_edge; ctr_2 += 1) for (int64_t ctr_1 = 0; ctr_1 < -ctr_2 + micro_edges_per_macro_edge; ctr_1 += 1) for (int64_t ctr_0 = 0; ctr_0 < -ctr_1 - ctr_2 + micro_edges_per_macro_edge - 1; ctr_0 += 1) { - const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2)); - const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2)); - const real_t p_affine_0_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2)); - const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2 + 1)); - const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2 + 1)); - const real_t p_affine_1_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2 + 1)); - const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2 + 1)); - const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2 + 1)); - const real_t p_affine_2_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2 + 1)); - const real_t p_affine_3_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2 + 1)); - const real_t p_affine_3_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2 + 1)); - const real_t p_affine_3_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2 + 1)); + + const int64_t phantom_ctr_0 = ctr_0; + real_t _data_float_loop_ctr_array_dim_0[4]; + _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0; + _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1; + _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2; + _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3; + real_t _data_float_loop_ctr_array_dim_1[4]; + _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1; + real_t _data_float_loop_ctr_array_dim_2[4]; + _data_float_loop_ctr_array_dim_2[0] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[1] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[2] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[3] = (real_t) ctr_2; + + const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_0_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_1_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_2_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_3_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_3_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_3_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; const real_t src_dof_0 = _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]; const real_t src_dof_1 = _data_src_vertex_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]; const real_t src_dof_2 = _data_src_vertex_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]; @@ -1932,7 +2016,7 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotU::apply_macro_3D( real_t * RESTRICT const real_t tmp_qloop_29 = -tmp_qloop_28 + tmp_qloop_6 - tmp_qloop_7 - tmp_qloop_9; const real_t tmp_qloop_30 = tmp_qloop_0*4.0; const real_t tmp_qloop_31 = -tmp_qloop_11 - tmp_qloop_30 + tmp_qloop_8; - const real_t tmp_qloop_32 = 1.0 / (rho_dof_0*tmp_qloop_12 + rho_dof_1*tmp_qloop_24 + rho_dof_2*tmp_qloop_25 + rho_dof_3*tmp_qloop_26 + rho_dof_4*tmp_qloop_7 + rho_dof_5*tmp_qloop_10 + rho_dof_6*tmp_qloop_9 + rho_dof_7*tmp_qloop_27 + rho_dof_8*tmp_qloop_29 + rho_dof_9*tmp_qloop_31)*_data_q_w[q]; + const real_t tmp_qloop_32 = abs_det_jac_affine_BLUE_DOWN*1.0 / (rho_dof_0*tmp_qloop_12 + rho_dof_1*tmp_qloop_24 + rho_dof_2*tmp_qloop_25 + rho_dof_3*tmp_qloop_26 + rho_dof_4*tmp_qloop_7 + rho_dof_5*tmp_qloop_10 + rho_dof_6*tmp_qloop_9 + rho_dof_7*tmp_qloop_27 + rho_dof_8*tmp_qloop_29 + rho_dof_9*tmp_qloop_31)*_data_q_w[q]; const real_t tmp_qloop_33 = tmp_qloop_32*(1.0 - _data_q_p_0[q] - _data_q_p_1[q] - _data_q_p_2[q]); const real_t tmp_qloop_34 = tmp_qloop_23*tmp_qloop_33; const real_t tmp_qloop_35 = jac_affine_inv_0_1_BLUE_DOWN*tmp_qloop_18 + jac_affine_inv_1_1_BLUE_DOWN*tmp_qloop_21 + jac_affine_inv_2_1_BLUE_DOWN*tmp_qloop_22; @@ -2212,67 +2296,87 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotU::apply_macro_3D( real_t * RESTRICT _data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))] = elMatVec_3 + _data_dst[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]; } } + const real_t tmp_coords_jac_0_GREEN_UP = 1.0 / (micro_edges_per_macro_edge_float)*1.0; + const real_t tmp_coords_jac_1_GREEN_UP = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0); + const real_t tmp_coords_jac_2_GREEN_UP = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1); + const real_t tmp_coords_jac_3_GREEN_UP = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2); + const real_t tmp_coords_jac_4_GREEN_UP = tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0); + const real_t tmp_coords_jac_5_GREEN_UP = tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1); + const real_t tmp_coords_jac_6_GREEN_UP = tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2); + const real_t p_affine_const_0_0_GREEN_UP = tmp_coords_jac_1_GREEN_UP; + const real_t p_affine_const_0_1_GREEN_UP = tmp_coords_jac_2_GREEN_UP; + const real_t p_affine_const_0_2_GREEN_UP = tmp_coords_jac_3_GREEN_UP; + const real_t p_affine_const_1_0_GREEN_UP = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0); + const real_t p_affine_const_1_1_GREEN_UP = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1); + const real_t p_affine_const_1_2_GREEN_UP = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2); + const real_t p_affine_const_2_0_GREEN_UP = macro_vertex_coord_id_0comp0 + tmp_coords_jac_4_GREEN_UP; + const real_t p_affine_const_2_1_GREEN_UP = macro_vertex_coord_id_0comp1 + tmp_coords_jac_5_GREEN_UP; + const real_t p_affine_const_2_2_GREEN_UP = macro_vertex_coord_id_0comp2 + tmp_coords_jac_6_GREEN_UP; + const real_t p_affine_const_3_0_GREEN_UP = tmp_coords_jac_1_GREEN_UP + tmp_coords_jac_4_GREEN_UP; + const real_t p_affine_const_3_1_GREEN_UP = tmp_coords_jac_2_GREEN_UP + tmp_coords_jac_5_GREEN_UP; + const real_t p_affine_const_3_2_GREEN_UP = tmp_coords_jac_3_GREEN_UP + tmp_coords_jac_6_GREEN_UP; + const real_t jac_affine_0_0_GREEN_UP = -p_affine_const_0_0_GREEN_UP + p_affine_const_1_0_GREEN_UP; + const real_t jac_affine_0_1_GREEN_UP = -p_affine_const_0_0_GREEN_UP + p_affine_const_2_0_GREEN_UP; + const real_t jac_affine_0_2_GREEN_UP = -p_affine_const_0_0_GREEN_UP + p_affine_const_3_0_GREEN_UP; + const real_t jac_affine_1_0_GREEN_UP = -p_affine_const_0_1_GREEN_UP + p_affine_const_1_1_GREEN_UP; + const real_t jac_affine_1_1_GREEN_UP = -p_affine_const_0_1_GREEN_UP + p_affine_const_2_1_GREEN_UP; + const real_t tmp_coords_jac_11_GREEN_UP = jac_affine_0_2_GREEN_UP*jac_affine_1_1_GREEN_UP; + const real_t jac_affine_1_2_GREEN_UP = -p_affine_const_0_1_GREEN_UP + p_affine_const_3_1_GREEN_UP; + const real_t tmp_coords_jac_9_GREEN_UP = jac_affine_0_1_GREEN_UP*jac_affine_1_2_GREEN_UP; + const real_t jac_affine_2_0_GREEN_UP = -p_affine_const_0_2_GREEN_UP + p_affine_const_1_2_GREEN_UP; + const real_t jac_affine_2_1_GREEN_UP = -p_affine_const_0_2_GREEN_UP + p_affine_const_2_2_GREEN_UP; + const real_t tmp_coords_jac_8_GREEN_UP = jac_affine_1_2_GREEN_UP*jac_affine_2_1_GREEN_UP; + const real_t jac_affine_2_2_GREEN_UP = -p_affine_const_0_2_GREEN_UP + p_affine_const_3_2_GREEN_UP; + const real_t tmp_coords_jac_7_GREEN_UP = jac_affine_1_1_GREEN_UP*jac_affine_2_2_GREEN_UP; + const real_t tmp_coords_jac_10_GREEN_UP = jac_affine_0_1_GREEN_UP*jac_affine_2_2_GREEN_UP; + const real_t tmp_coords_jac_12_GREEN_UP = jac_affine_0_0_GREEN_UP*tmp_coords_jac_7_GREEN_UP - jac_affine_0_0_GREEN_UP*tmp_coords_jac_8_GREEN_UP + jac_affine_0_2_GREEN_UP*jac_affine_1_0_GREEN_UP*jac_affine_2_1_GREEN_UP - jac_affine_1_0_GREEN_UP*tmp_coords_jac_10_GREEN_UP - jac_affine_2_0_GREEN_UP*tmp_coords_jac_11_GREEN_UP + jac_affine_2_0_GREEN_UP*tmp_coords_jac_9_GREEN_UP; + const real_t tmp_coords_jac_13_GREEN_UP = 1.0 / (tmp_coords_jac_12_GREEN_UP); + const real_t jac_affine_inv_0_0_GREEN_UP = tmp_coords_jac_13_GREEN_UP*(tmp_coords_jac_7_GREEN_UP - tmp_coords_jac_8_GREEN_UP); + const real_t jac_affine_inv_0_1_GREEN_UP = tmp_coords_jac_13_GREEN_UP*(jac_affine_0_2_GREEN_UP*jac_affine_2_1_GREEN_UP - tmp_coords_jac_10_GREEN_UP); + const real_t jac_affine_inv_0_2_GREEN_UP = tmp_coords_jac_13_GREEN_UP*(-tmp_coords_jac_11_GREEN_UP + tmp_coords_jac_9_GREEN_UP); + const real_t jac_affine_inv_1_0_GREEN_UP = tmp_coords_jac_13_GREEN_UP*(-jac_affine_1_0_GREEN_UP*jac_affine_2_2_GREEN_UP + jac_affine_1_2_GREEN_UP*jac_affine_2_0_GREEN_UP); + const real_t jac_affine_inv_1_1_GREEN_UP = tmp_coords_jac_13_GREEN_UP*(jac_affine_0_0_GREEN_UP*jac_affine_2_2_GREEN_UP - jac_affine_0_2_GREEN_UP*jac_affine_2_0_GREEN_UP); + const real_t jac_affine_inv_1_2_GREEN_UP = tmp_coords_jac_13_GREEN_UP*(-jac_affine_0_0_GREEN_UP*jac_affine_1_2_GREEN_UP + jac_affine_0_2_GREEN_UP*jac_affine_1_0_GREEN_UP); + const real_t jac_affine_inv_2_0_GREEN_UP = tmp_coords_jac_13_GREEN_UP*(jac_affine_1_0_GREEN_UP*jac_affine_2_1_GREEN_UP - jac_affine_1_1_GREEN_UP*jac_affine_2_0_GREEN_UP); + const real_t jac_affine_inv_2_1_GREEN_UP = tmp_coords_jac_13_GREEN_UP*(-jac_affine_0_0_GREEN_UP*jac_affine_2_1_GREEN_UP + jac_affine_0_1_GREEN_UP*jac_affine_2_0_GREEN_UP); + const real_t jac_affine_inv_2_2_GREEN_UP = tmp_coords_jac_13_GREEN_UP*(jac_affine_0_0_GREEN_UP*jac_affine_1_1_GREEN_UP - jac_affine_0_1_GREEN_UP*jac_affine_1_0_GREEN_UP); + const real_t abs_det_jac_affine_GREEN_UP = abs(tmp_coords_jac_12_GREEN_UP); { /* CellType.GREEN_UP */ - const real_t tmp_coords_jac_0_GREEN_UP = 1.0 / (micro_edges_per_macro_edge_float)*1.0; - const real_t tmp_coords_jac_1_GREEN_UP = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0); - const real_t tmp_coords_jac_2_GREEN_UP = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1); - const real_t tmp_coords_jac_3_GREEN_UP = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2); - const real_t tmp_coords_jac_4_GREEN_UP = tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0); - const real_t tmp_coords_jac_5_GREEN_UP = tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1); - const real_t tmp_coords_jac_6_GREEN_UP = tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2); - const real_t p_affine_const_0_0_GREEN_UP = tmp_coords_jac_1_GREEN_UP; - const real_t p_affine_const_0_1_GREEN_UP = tmp_coords_jac_2_GREEN_UP; - const real_t p_affine_const_0_2_GREEN_UP = tmp_coords_jac_3_GREEN_UP; - const real_t p_affine_const_1_0_GREEN_UP = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0); - const real_t p_affine_const_1_1_GREEN_UP = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1); - const real_t p_affine_const_1_2_GREEN_UP = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2); - const real_t p_affine_const_2_0_GREEN_UP = macro_vertex_coord_id_0comp0 + tmp_coords_jac_4_GREEN_UP; - const real_t p_affine_const_2_1_GREEN_UP = macro_vertex_coord_id_0comp1 + tmp_coords_jac_5_GREEN_UP; - const real_t p_affine_const_2_2_GREEN_UP = macro_vertex_coord_id_0comp2 + tmp_coords_jac_6_GREEN_UP; - const real_t p_affine_const_3_0_GREEN_UP = tmp_coords_jac_1_GREEN_UP + tmp_coords_jac_4_GREEN_UP; - const real_t p_affine_const_3_1_GREEN_UP = tmp_coords_jac_2_GREEN_UP + tmp_coords_jac_5_GREEN_UP; - const real_t p_affine_const_3_2_GREEN_UP = tmp_coords_jac_3_GREEN_UP + tmp_coords_jac_6_GREEN_UP; - const real_t jac_affine_0_0_GREEN_UP = -p_affine_const_0_0_GREEN_UP + p_affine_const_1_0_GREEN_UP; - const real_t jac_affine_0_1_GREEN_UP = -p_affine_const_0_0_GREEN_UP + p_affine_const_2_0_GREEN_UP; - const real_t jac_affine_0_2_GREEN_UP = -p_affine_const_0_0_GREEN_UP + p_affine_const_3_0_GREEN_UP; - const real_t jac_affine_1_0_GREEN_UP = -p_affine_const_0_1_GREEN_UP + p_affine_const_1_1_GREEN_UP; - const real_t jac_affine_1_1_GREEN_UP = -p_affine_const_0_1_GREEN_UP + p_affine_const_2_1_GREEN_UP; - const real_t tmp_coords_jac_11_GREEN_UP = jac_affine_0_2_GREEN_UP*jac_affine_1_1_GREEN_UP; - const real_t jac_affine_1_2_GREEN_UP = -p_affine_const_0_1_GREEN_UP + p_affine_const_3_1_GREEN_UP; - const real_t tmp_coords_jac_9_GREEN_UP = jac_affine_0_1_GREEN_UP*jac_affine_1_2_GREEN_UP; - const real_t jac_affine_2_0_GREEN_UP = -p_affine_const_0_2_GREEN_UP + p_affine_const_1_2_GREEN_UP; - const real_t jac_affine_2_1_GREEN_UP = -p_affine_const_0_2_GREEN_UP + p_affine_const_2_2_GREEN_UP; - const real_t tmp_coords_jac_8_GREEN_UP = jac_affine_1_2_GREEN_UP*jac_affine_2_1_GREEN_UP; - const real_t jac_affine_2_2_GREEN_UP = -p_affine_const_0_2_GREEN_UP + p_affine_const_3_2_GREEN_UP; - const real_t tmp_coords_jac_7_GREEN_UP = jac_affine_1_1_GREEN_UP*jac_affine_2_2_GREEN_UP; - const real_t tmp_coords_jac_10_GREEN_UP = jac_affine_0_1_GREEN_UP*jac_affine_2_2_GREEN_UP; - const real_t tmp_coords_jac_12_GREEN_UP = 1.0 / (jac_affine_0_0_GREEN_UP*tmp_coords_jac_7_GREEN_UP - jac_affine_0_0_GREEN_UP*tmp_coords_jac_8_GREEN_UP + jac_affine_0_2_GREEN_UP*jac_affine_1_0_GREEN_UP*jac_affine_2_1_GREEN_UP - jac_affine_1_0_GREEN_UP*tmp_coords_jac_10_GREEN_UP - jac_affine_2_0_GREEN_UP*tmp_coords_jac_11_GREEN_UP + jac_affine_2_0_GREEN_UP*tmp_coords_jac_9_GREEN_UP); - const real_t jac_affine_inv_0_0_GREEN_UP = tmp_coords_jac_12_GREEN_UP*(tmp_coords_jac_7_GREEN_UP - tmp_coords_jac_8_GREEN_UP); - const real_t jac_affine_inv_0_1_GREEN_UP = tmp_coords_jac_12_GREEN_UP*(jac_affine_0_2_GREEN_UP*jac_affine_2_1_GREEN_UP - tmp_coords_jac_10_GREEN_UP); - const real_t jac_affine_inv_0_2_GREEN_UP = tmp_coords_jac_12_GREEN_UP*(-tmp_coords_jac_11_GREEN_UP + tmp_coords_jac_9_GREEN_UP); - const real_t jac_affine_inv_1_0_GREEN_UP = tmp_coords_jac_12_GREEN_UP*(-jac_affine_1_0_GREEN_UP*jac_affine_2_2_GREEN_UP + jac_affine_1_2_GREEN_UP*jac_affine_2_0_GREEN_UP); - const real_t jac_affine_inv_1_1_GREEN_UP = tmp_coords_jac_12_GREEN_UP*(jac_affine_0_0_GREEN_UP*jac_affine_2_2_GREEN_UP - jac_affine_0_2_GREEN_UP*jac_affine_2_0_GREEN_UP); - const real_t jac_affine_inv_1_2_GREEN_UP = tmp_coords_jac_12_GREEN_UP*(-jac_affine_0_0_GREEN_UP*jac_affine_1_2_GREEN_UP + jac_affine_0_2_GREEN_UP*jac_affine_1_0_GREEN_UP); - const real_t jac_affine_inv_2_0_GREEN_UP = tmp_coords_jac_12_GREEN_UP*(jac_affine_1_0_GREEN_UP*jac_affine_2_1_GREEN_UP - jac_affine_1_1_GREEN_UP*jac_affine_2_0_GREEN_UP); - const real_t jac_affine_inv_2_1_GREEN_UP = tmp_coords_jac_12_GREEN_UP*(-jac_affine_0_0_GREEN_UP*jac_affine_2_1_GREEN_UP + jac_affine_0_1_GREEN_UP*jac_affine_2_0_GREEN_UP); - const real_t jac_affine_inv_2_2_GREEN_UP = tmp_coords_jac_12_GREEN_UP*(jac_affine_0_0_GREEN_UP*jac_affine_1_1_GREEN_UP - jac_affine_0_1_GREEN_UP*jac_affine_1_0_GREEN_UP); for (int64_t ctr_2 = 0; ctr_2 < micro_edges_per_macro_edge; ctr_2 += 1) for (int64_t ctr_1 = 0; ctr_1 < -ctr_2 + micro_edges_per_macro_edge; ctr_1 += 1) for (int64_t ctr_0 = 0; ctr_0 < -ctr_1 - ctr_2 + micro_edges_per_macro_edge - 1; ctr_0 += 1) { - const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2)); - const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2)); - const real_t p_affine_0_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2)); - const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2)); - const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2)); - const real_t p_affine_1_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2)); - const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2 + 1)); - const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2 + 1)); - const real_t p_affine_2_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2 + 1)); - const real_t p_affine_3_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2 + 1)); - const real_t p_affine_3_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2 + 1)); - const real_t p_affine_3_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2 + 1)); + + const int64_t phantom_ctr_0 = ctr_0; + real_t _data_float_loop_ctr_array_dim_0[4]; + _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0; + _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1; + _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2; + _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3; + real_t _data_float_loop_ctr_array_dim_1[4]; + _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1; + real_t _data_float_loop_ctr_array_dim_2[4]; + _data_float_loop_ctr_array_dim_2[0] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[1] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[2] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[3] = (real_t) ctr_2; + + const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_0_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_2_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_3_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_3_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_3_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; const real_t src_dof_0 = _data_src_vertex_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]; const real_t src_dof_1 = _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]; const real_t src_dof_2 = _data_src_vertex_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]; @@ -2467,7 +2571,7 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotU::apply_macro_3D( real_t * RESTRICT const real_t tmp_qloop_29 = -tmp_qloop_28 + tmp_qloop_6 - tmp_qloop_7 - tmp_qloop_9; const real_t tmp_qloop_30 = tmp_qloop_0*4.0; const real_t tmp_qloop_31 = -tmp_qloop_11 - tmp_qloop_30 + tmp_qloop_8; - const real_t tmp_qloop_32 = 1.0 / (rho_dof_0*tmp_qloop_12 + rho_dof_1*tmp_qloop_24 + rho_dof_2*tmp_qloop_25 + rho_dof_3*tmp_qloop_26 + rho_dof_4*tmp_qloop_7 + rho_dof_5*tmp_qloop_10 + rho_dof_6*tmp_qloop_9 + rho_dof_7*tmp_qloop_27 + rho_dof_8*tmp_qloop_29 + rho_dof_9*tmp_qloop_31)*_data_q_w[q]; + const real_t tmp_qloop_32 = abs_det_jac_affine_GREEN_UP*1.0 / (rho_dof_0*tmp_qloop_12 + rho_dof_1*tmp_qloop_24 + rho_dof_2*tmp_qloop_25 + rho_dof_3*tmp_qloop_26 + rho_dof_4*tmp_qloop_7 + rho_dof_5*tmp_qloop_10 + rho_dof_6*tmp_qloop_9 + rho_dof_7*tmp_qloop_27 + rho_dof_8*tmp_qloop_29 + rho_dof_9*tmp_qloop_31)*_data_q_w[q]; const real_t tmp_qloop_33 = tmp_qloop_32*(1.0 - _data_q_p_0[q] - _data_q_p_1[q] - _data_q_p_2[q]); const real_t tmp_qloop_34 = tmp_qloop_23*tmp_qloop_33; const real_t tmp_qloop_35 = jac_affine_inv_0_1_GREEN_UP*tmp_qloop_18 + jac_affine_inv_1_1_GREEN_UP*tmp_qloop_21 + jac_affine_inv_2_1_GREEN_UP*tmp_qloop_22; @@ -2747,70 +2851,90 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotU::apply_macro_3D( real_t * RESTRICT _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1] = elMatVec_3 + _data_dst[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]; } } + const real_t tmp_coords_jac_0_GREEN_DOWN = 1.0 / (micro_edges_per_macro_edge_float)*1.0; + const real_t tmp_coords_jac_1_GREEN_DOWN = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0); + const real_t tmp_coords_jac_2_GREEN_DOWN = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1); + const real_t tmp_coords_jac_3_GREEN_DOWN = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2); + const real_t tmp_coords_jac_4_GREEN_DOWN = tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0); + const real_t tmp_coords_jac_5_GREEN_DOWN = tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1); + const real_t tmp_coords_jac_6_GREEN_DOWN = tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2); + const real_t tmp_coords_jac_7_GREEN_DOWN = tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0); + const real_t tmp_coords_jac_8_GREEN_DOWN = tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1); + const real_t tmp_coords_jac_9_GREEN_DOWN = tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2); + const real_t p_affine_const_0_0_GREEN_DOWN = tmp_coords_jac_1_GREEN_DOWN; + const real_t p_affine_const_0_1_GREEN_DOWN = tmp_coords_jac_2_GREEN_DOWN; + const real_t p_affine_const_0_2_GREEN_DOWN = tmp_coords_jac_3_GREEN_DOWN; + const real_t p_affine_const_1_0_GREEN_DOWN = tmp_coords_jac_1_GREEN_DOWN + tmp_coords_jac_4_GREEN_DOWN; + const real_t p_affine_const_1_1_GREEN_DOWN = tmp_coords_jac_2_GREEN_DOWN + tmp_coords_jac_5_GREEN_DOWN; + const real_t p_affine_const_1_2_GREEN_DOWN = tmp_coords_jac_3_GREEN_DOWN + tmp_coords_jac_6_GREEN_DOWN; + const real_t p_affine_const_2_0_GREEN_DOWN = macro_vertex_coord_id_0comp0 + tmp_coords_jac_4_GREEN_DOWN + tmp_coords_jac_7_GREEN_DOWN; + const real_t p_affine_const_2_1_GREEN_DOWN = macro_vertex_coord_id_0comp1 + tmp_coords_jac_5_GREEN_DOWN + tmp_coords_jac_8_GREEN_DOWN; + const real_t p_affine_const_2_2_GREEN_DOWN = macro_vertex_coord_id_0comp2 + tmp_coords_jac_6_GREEN_DOWN + tmp_coords_jac_9_GREEN_DOWN; + const real_t p_affine_const_3_0_GREEN_DOWN = tmp_coords_jac_1_GREEN_DOWN + tmp_coords_jac_7_GREEN_DOWN; + const real_t p_affine_const_3_1_GREEN_DOWN = tmp_coords_jac_2_GREEN_DOWN + tmp_coords_jac_8_GREEN_DOWN; + const real_t p_affine_const_3_2_GREEN_DOWN = tmp_coords_jac_3_GREEN_DOWN + tmp_coords_jac_9_GREEN_DOWN; + const real_t jac_affine_0_0_GREEN_DOWN = -p_affine_const_0_0_GREEN_DOWN + p_affine_const_1_0_GREEN_DOWN; + const real_t jac_affine_0_1_GREEN_DOWN = -p_affine_const_0_0_GREEN_DOWN + p_affine_const_2_0_GREEN_DOWN; + const real_t jac_affine_0_2_GREEN_DOWN = -p_affine_const_0_0_GREEN_DOWN + p_affine_const_3_0_GREEN_DOWN; + const real_t jac_affine_1_0_GREEN_DOWN = -p_affine_const_0_1_GREEN_DOWN + p_affine_const_1_1_GREEN_DOWN; + const real_t jac_affine_1_1_GREEN_DOWN = -p_affine_const_0_1_GREEN_DOWN + p_affine_const_2_1_GREEN_DOWN; + const real_t tmp_coords_jac_14_GREEN_DOWN = jac_affine_0_2_GREEN_DOWN*jac_affine_1_1_GREEN_DOWN; + const real_t jac_affine_1_2_GREEN_DOWN = -p_affine_const_0_1_GREEN_DOWN + p_affine_const_3_1_GREEN_DOWN; + const real_t tmp_coords_jac_12_GREEN_DOWN = jac_affine_0_1_GREEN_DOWN*jac_affine_1_2_GREEN_DOWN; + const real_t jac_affine_2_0_GREEN_DOWN = -p_affine_const_0_2_GREEN_DOWN + p_affine_const_1_2_GREEN_DOWN; + const real_t jac_affine_2_1_GREEN_DOWN = -p_affine_const_0_2_GREEN_DOWN + p_affine_const_2_2_GREEN_DOWN; + const real_t tmp_coords_jac_11_GREEN_DOWN = jac_affine_1_2_GREEN_DOWN*jac_affine_2_1_GREEN_DOWN; + const real_t jac_affine_2_2_GREEN_DOWN = -p_affine_const_0_2_GREEN_DOWN + p_affine_const_3_2_GREEN_DOWN; + const real_t tmp_coords_jac_10_GREEN_DOWN = jac_affine_1_1_GREEN_DOWN*jac_affine_2_2_GREEN_DOWN; + const real_t tmp_coords_jac_13_GREEN_DOWN = jac_affine_0_1_GREEN_DOWN*jac_affine_2_2_GREEN_DOWN; + const real_t tmp_coords_jac_15_GREEN_DOWN = jac_affine_0_0_GREEN_DOWN*tmp_coords_jac_10_GREEN_DOWN - jac_affine_0_0_GREEN_DOWN*tmp_coords_jac_11_GREEN_DOWN + jac_affine_0_2_GREEN_DOWN*jac_affine_1_0_GREEN_DOWN*jac_affine_2_1_GREEN_DOWN - jac_affine_1_0_GREEN_DOWN*tmp_coords_jac_13_GREEN_DOWN + jac_affine_2_0_GREEN_DOWN*tmp_coords_jac_12_GREEN_DOWN - jac_affine_2_0_GREEN_DOWN*tmp_coords_jac_14_GREEN_DOWN; + const real_t tmp_coords_jac_16_GREEN_DOWN = 1.0 / (tmp_coords_jac_15_GREEN_DOWN); + const real_t jac_affine_inv_0_0_GREEN_DOWN = tmp_coords_jac_16_GREEN_DOWN*(tmp_coords_jac_10_GREEN_DOWN - tmp_coords_jac_11_GREEN_DOWN); + const real_t jac_affine_inv_0_1_GREEN_DOWN = tmp_coords_jac_16_GREEN_DOWN*(jac_affine_0_2_GREEN_DOWN*jac_affine_2_1_GREEN_DOWN - tmp_coords_jac_13_GREEN_DOWN); + const real_t jac_affine_inv_0_2_GREEN_DOWN = tmp_coords_jac_16_GREEN_DOWN*(tmp_coords_jac_12_GREEN_DOWN - tmp_coords_jac_14_GREEN_DOWN); + const real_t jac_affine_inv_1_0_GREEN_DOWN = tmp_coords_jac_16_GREEN_DOWN*(-jac_affine_1_0_GREEN_DOWN*jac_affine_2_2_GREEN_DOWN + jac_affine_1_2_GREEN_DOWN*jac_affine_2_0_GREEN_DOWN); + const real_t jac_affine_inv_1_1_GREEN_DOWN = tmp_coords_jac_16_GREEN_DOWN*(jac_affine_0_0_GREEN_DOWN*jac_affine_2_2_GREEN_DOWN - jac_affine_0_2_GREEN_DOWN*jac_affine_2_0_GREEN_DOWN); + const real_t jac_affine_inv_1_2_GREEN_DOWN = tmp_coords_jac_16_GREEN_DOWN*(-jac_affine_0_0_GREEN_DOWN*jac_affine_1_2_GREEN_DOWN + jac_affine_0_2_GREEN_DOWN*jac_affine_1_0_GREEN_DOWN); + const real_t jac_affine_inv_2_0_GREEN_DOWN = tmp_coords_jac_16_GREEN_DOWN*(jac_affine_1_0_GREEN_DOWN*jac_affine_2_1_GREEN_DOWN - jac_affine_1_1_GREEN_DOWN*jac_affine_2_0_GREEN_DOWN); + const real_t jac_affine_inv_2_1_GREEN_DOWN = tmp_coords_jac_16_GREEN_DOWN*(-jac_affine_0_0_GREEN_DOWN*jac_affine_2_1_GREEN_DOWN + jac_affine_0_1_GREEN_DOWN*jac_affine_2_0_GREEN_DOWN); + const real_t jac_affine_inv_2_2_GREEN_DOWN = tmp_coords_jac_16_GREEN_DOWN*(jac_affine_0_0_GREEN_DOWN*jac_affine_1_1_GREEN_DOWN - jac_affine_0_1_GREEN_DOWN*jac_affine_1_0_GREEN_DOWN); + const real_t abs_det_jac_affine_GREEN_DOWN = abs(tmp_coords_jac_15_GREEN_DOWN); { /* CellType.GREEN_DOWN */ - const real_t tmp_coords_jac_0_GREEN_DOWN = 1.0 / (micro_edges_per_macro_edge_float)*1.0; - const real_t tmp_coords_jac_1_GREEN_DOWN = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0); - const real_t tmp_coords_jac_2_GREEN_DOWN = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1); - const real_t tmp_coords_jac_3_GREEN_DOWN = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2); - const real_t tmp_coords_jac_4_GREEN_DOWN = tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0); - const real_t tmp_coords_jac_5_GREEN_DOWN = tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1); - const real_t tmp_coords_jac_6_GREEN_DOWN = tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2); - const real_t tmp_coords_jac_7_GREEN_DOWN = tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0); - const real_t tmp_coords_jac_8_GREEN_DOWN = tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1); - const real_t tmp_coords_jac_9_GREEN_DOWN = tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2); - const real_t p_affine_const_0_0_GREEN_DOWN = tmp_coords_jac_1_GREEN_DOWN; - const real_t p_affine_const_0_1_GREEN_DOWN = tmp_coords_jac_2_GREEN_DOWN; - const real_t p_affine_const_0_2_GREEN_DOWN = tmp_coords_jac_3_GREEN_DOWN; - const real_t p_affine_const_1_0_GREEN_DOWN = tmp_coords_jac_1_GREEN_DOWN + tmp_coords_jac_4_GREEN_DOWN; - const real_t p_affine_const_1_1_GREEN_DOWN = tmp_coords_jac_2_GREEN_DOWN + tmp_coords_jac_5_GREEN_DOWN; - const real_t p_affine_const_1_2_GREEN_DOWN = tmp_coords_jac_3_GREEN_DOWN + tmp_coords_jac_6_GREEN_DOWN; - const real_t p_affine_const_2_0_GREEN_DOWN = macro_vertex_coord_id_0comp0 + tmp_coords_jac_4_GREEN_DOWN + tmp_coords_jac_7_GREEN_DOWN; - const real_t p_affine_const_2_1_GREEN_DOWN = macro_vertex_coord_id_0comp1 + tmp_coords_jac_5_GREEN_DOWN + tmp_coords_jac_8_GREEN_DOWN; - const real_t p_affine_const_2_2_GREEN_DOWN = macro_vertex_coord_id_0comp2 + tmp_coords_jac_6_GREEN_DOWN + tmp_coords_jac_9_GREEN_DOWN; - const real_t p_affine_const_3_0_GREEN_DOWN = tmp_coords_jac_1_GREEN_DOWN + tmp_coords_jac_7_GREEN_DOWN; - const real_t p_affine_const_3_1_GREEN_DOWN = tmp_coords_jac_2_GREEN_DOWN + tmp_coords_jac_8_GREEN_DOWN; - const real_t p_affine_const_3_2_GREEN_DOWN = tmp_coords_jac_3_GREEN_DOWN + tmp_coords_jac_9_GREEN_DOWN; - const real_t jac_affine_0_0_GREEN_DOWN = -p_affine_const_0_0_GREEN_DOWN + p_affine_const_1_0_GREEN_DOWN; - const real_t jac_affine_0_1_GREEN_DOWN = -p_affine_const_0_0_GREEN_DOWN + p_affine_const_2_0_GREEN_DOWN; - const real_t jac_affine_0_2_GREEN_DOWN = -p_affine_const_0_0_GREEN_DOWN + p_affine_const_3_0_GREEN_DOWN; - const real_t jac_affine_1_0_GREEN_DOWN = -p_affine_const_0_1_GREEN_DOWN + p_affine_const_1_1_GREEN_DOWN; - const real_t jac_affine_1_1_GREEN_DOWN = -p_affine_const_0_1_GREEN_DOWN + p_affine_const_2_1_GREEN_DOWN; - const real_t tmp_coords_jac_14_GREEN_DOWN = jac_affine_0_2_GREEN_DOWN*jac_affine_1_1_GREEN_DOWN; - const real_t jac_affine_1_2_GREEN_DOWN = -p_affine_const_0_1_GREEN_DOWN + p_affine_const_3_1_GREEN_DOWN; - const real_t tmp_coords_jac_12_GREEN_DOWN = jac_affine_0_1_GREEN_DOWN*jac_affine_1_2_GREEN_DOWN; - const real_t jac_affine_2_0_GREEN_DOWN = -p_affine_const_0_2_GREEN_DOWN + p_affine_const_1_2_GREEN_DOWN; - const real_t jac_affine_2_1_GREEN_DOWN = -p_affine_const_0_2_GREEN_DOWN + p_affine_const_2_2_GREEN_DOWN; - const real_t tmp_coords_jac_11_GREEN_DOWN = jac_affine_1_2_GREEN_DOWN*jac_affine_2_1_GREEN_DOWN; - const real_t jac_affine_2_2_GREEN_DOWN = -p_affine_const_0_2_GREEN_DOWN + p_affine_const_3_2_GREEN_DOWN; - const real_t tmp_coords_jac_10_GREEN_DOWN = jac_affine_1_1_GREEN_DOWN*jac_affine_2_2_GREEN_DOWN; - const real_t tmp_coords_jac_13_GREEN_DOWN = jac_affine_0_1_GREEN_DOWN*jac_affine_2_2_GREEN_DOWN; - const real_t tmp_coords_jac_15_GREEN_DOWN = 1.0 / (jac_affine_0_0_GREEN_DOWN*tmp_coords_jac_10_GREEN_DOWN - jac_affine_0_0_GREEN_DOWN*tmp_coords_jac_11_GREEN_DOWN + jac_affine_0_2_GREEN_DOWN*jac_affine_1_0_GREEN_DOWN*jac_affine_2_1_GREEN_DOWN - jac_affine_1_0_GREEN_DOWN*tmp_coords_jac_13_GREEN_DOWN + jac_affine_2_0_GREEN_DOWN*tmp_coords_jac_12_GREEN_DOWN - jac_affine_2_0_GREEN_DOWN*tmp_coords_jac_14_GREEN_DOWN); - const real_t jac_affine_inv_0_0_GREEN_DOWN = tmp_coords_jac_15_GREEN_DOWN*(tmp_coords_jac_10_GREEN_DOWN - tmp_coords_jac_11_GREEN_DOWN); - const real_t jac_affine_inv_0_1_GREEN_DOWN = tmp_coords_jac_15_GREEN_DOWN*(jac_affine_0_2_GREEN_DOWN*jac_affine_2_1_GREEN_DOWN - tmp_coords_jac_13_GREEN_DOWN); - const real_t jac_affine_inv_0_2_GREEN_DOWN = tmp_coords_jac_15_GREEN_DOWN*(tmp_coords_jac_12_GREEN_DOWN - tmp_coords_jac_14_GREEN_DOWN); - const real_t jac_affine_inv_1_0_GREEN_DOWN = tmp_coords_jac_15_GREEN_DOWN*(-jac_affine_1_0_GREEN_DOWN*jac_affine_2_2_GREEN_DOWN + jac_affine_1_2_GREEN_DOWN*jac_affine_2_0_GREEN_DOWN); - const real_t jac_affine_inv_1_1_GREEN_DOWN = tmp_coords_jac_15_GREEN_DOWN*(jac_affine_0_0_GREEN_DOWN*jac_affine_2_2_GREEN_DOWN - jac_affine_0_2_GREEN_DOWN*jac_affine_2_0_GREEN_DOWN); - const real_t jac_affine_inv_1_2_GREEN_DOWN = tmp_coords_jac_15_GREEN_DOWN*(-jac_affine_0_0_GREEN_DOWN*jac_affine_1_2_GREEN_DOWN + jac_affine_0_2_GREEN_DOWN*jac_affine_1_0_GREEN_DOWN); - const real_t jac_affine_inv_2_0_GREEN_DOWN = tmp_coords_jac_15_GREEN_DOWN*(jac_affine_1_0_GREEN_DOWN*jac_affine_2_1_GREEN_DOWN - jac_affine_1_1_GREEN_DOWN*jac_affine_2_0_GREEN_DOWN); - const real_t jac_affine_inv_2_1_GREEN_DOWN = tmp_coords_jac_15_GREEN_DOWN*(-jac_affine_0_0_GREEN_DOWN*jac_affine_2_1_GREEN_DOWN + jac_affine_0_1_GREEN_DOWN*jac_affine_2_0_GREEN_DOWN); - const real_t jac_affine_inv_2_2_GREEN_DOWN = tmp_coords_jac_15_GREEN_DOWN*(jac_affine_0_0_GREEN_DOWN*jac_affine_1_1_GREEN_DOWN - jac_affine_0_1_GREEN_DOWN*jac_affine_1_0_GREEN_DOWN); for (int64_t ctr_2 = 0; ctr_2 < micro_edges_per_macro_edge; ctr_2 += 1) for (int64_t ctr_1 = 0; ctr_1 < -ctr_2 + micro_edges_per_macro_edge; ctr_1 += 1) for (int64_t ctr_0 = 0; ctr_0 < -ctr_1 - ctr_2 + micro_edges_per_macro_edge - 1; ctr_0 += 1) { - const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2)); - const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2)); - const real_t p_affine_0_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2)); - const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2)); - const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2)); - const real_t p_affine_1_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2)); - const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2 + 1)); - const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2 + 1)); - const real_t p_affine_2_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2 + 1)); - const real_t p_affine_3_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2 + 1)); - const real_t p_affine_3_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2 + 1)); - const real_t p_affine_3_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2 + 1)); + + const int64_t phantom_ctr_0 = ctr_0; + real_t _data_float_loop_ctr_array_dim_0[4]; + _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0; + _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1; + _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2; + _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3; + real_t _data_float_loop_ctr_array_dim_1[4]; + _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1; + real_t _data_float_loop_ctr_array_dim_2[4]; + _data_float_loop_ctr_array_dim_2[0] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[1] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[2] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[3] = (real_t) ctr_2; + + const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_0_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_2_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_3_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_3_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_3_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; const real_t src_dof_0 = _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]; const real_t src_dof_1 = _data_src_vertex_0[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]; const real_t src_dof_2 = _data_src_vertex_0[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]; @@ -3005,7 +3129,7 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotU::apply_macro_3D( real_t * RESTRICT const real_t tmp_qloop_29 = -tmp_qloop_28 + tmp_qloop_6 - tmp_qloop_7 - tmp_qloop_9; const real_t tmp_qloop_30 = tmp_qloop_0*4.0; const real_t tmp_qloop_31 = -tmp_qloop_11 - tmp_qloop_30 + tmp_qloop_8; - const real_t tmp_qloop_32 = 1.0 / (rho_dof_0*tmp_qloop_12 + rho_dof_1*tmp_qloop_24 + rho_dof_2*tmp_qloop_25 + rho_dof_3*tmp_qloop_26 + rho_dof_4*tmp_qloop_7 + rho_dof_5*tmp_qloop_10 + rho_dof_6*tmp_qloop_9 + rho_dof_7*tmp_qloop_27 + rho_dof_8*tmp_qloop_29 + rho_dof_9*tmp_qloop_31)*_data_q_w[q]; + const real_t tmp_qloop_32 = abs_det_jac_affine_GREEN_DOWN*1.0 / (rho_dof_0*tmp_qloop_12 + rho_dof_1*tmp_qloop_24 + rho_dof_2*tmp_qloop_25 + rho_dof_3*tmp_qloop_26 + rho_dof_4*tmp_qloop_7 + rho_dof_5*tmp_qloop_10 + rho_dof_6*tmp_qloop_9 + rho_dof_7*tmp_qloop_27 + rho_dof_8*tmp_qloop_29 + rho_dof_9*tmp_qloop_31)*_data_q_w[q]; const real_t tmp_qloop_33 = tmp_qloop_32*(1.0 - _data_q_p_0[q] - _data_q_p_1[q] - _data_q_p_2[q]); const real_t tmp_qloop_34 = tmp_qloop_23*tmp_qloop_33; const real_t tmp_qloop_35 = jac_affine_inv_0_1_GREEN_DOWN*tmp_qloop_18 + jac_affine_inv_1_1_GREEN_DOWN*tmp_qloop_21 + jac_affine_inv_2_1_GREEN_DOWN*tmp_qloop_22; diff --git a/operators/grad_rho_by_rho_dot_u/noarch/P2VectorToP1ElementwiseGradRhoByRhoDotU_toMatrix_macro_2D.cpp b/operators/grad_rho_by_rho_dot_u/noarch/P2VectorToP1ElementwiseGradRhoByRhoDotU_toMatrix_P2VectorToP1ElementwiseGradRhoByRhoDotU_macro_2D.cpp similarity index 77% rename from operators/grad_rho_by_rho_dot_u/noarch/P2VectorToP1ElementwiseGradRhoByRhoDotU_toMatrix_macro_2D.cpp rename to operators/grad_rho_by_rho_dot_u/noarch/P2VectorToP1ElementwiseGradRhoByRhoDotU_toMatrix_P2VectorToP1ElementwiseGradRhoByRhoDotU_macro_2D.cpp index 8d57d088..d1381740 100644 --- a/operators/grad_rho_by_rho_dot_u/noarch/P2VectorToP1ElementwiseGradRhoByRhoDotU_toMatrix_macro_2D.cpp +++ b/operators/grad_rho_by_rho_dot_u/noarch/P2VectorToP1ElementwiseGradRhoByRhoDotU_toMatrix_P2VectorToP1ElementwiseGradRhoByRhoDotU_macro_2D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P2VectorToP1ElementwiseGradRhoByRhoDotU::toMatrix_macro_2D( idx_t * RESTRICT _data_dst, real_t * RESTRICT _data_rhoEdge, real_t * RESTRICT _data_rhoVertex, idx_t * RESTRICT _data_src_edge_0, idx_t * RESTRICT _data_src_edge_1, idx_t * RESTRICT _data_src_vertex_0, idx_t * RESTRICT _data_src_vertex_1, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2VectorToP1ElementwiseGradRhoByRhoDotU::toMatrix_P2VectorToP1ElementwiseGradRhoByRhoDotU_macro_2D( idx_t * RESTRICT _data_dst, real_t * RESTRICT _data_rhoEdge, real_t * RESTRICT _data_rhoVertex, idx_t * RESTRICT _data_src_edge_0, idx_t * RESTRICT _data_src_edge_1, idx_t * RESTRICT _data_src_vertex_0, idx_t * RESTRICT _data_src_vertex_1, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669}; @@ -67,33 +71,48 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotU::toMatrix_macro_2D( idx_t * RESTRIC const real_t _data_q_p_1 [] = {0.33333333333333331, 0.59999999999999998, 0.20000000000000001, 0.20000000000000001}; + const real_t tmp_coords_jac_0_GRAY = 1.0 / (micro_edges_per_macro_edge_float)*1.0; + const real_t p_affine_const_0_0_GRAY = macro_vertex_coord_id_0comp0; + const real_t p_affine_const_0_1_GRAY = macro_vertex_coord_id_0comp1; + const real_t p_affine_const_1_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0); + const real_t p_affine_const_1_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1); + const real_t p_affine_const_2_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0); + const real_t p_affine_const_2_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1); + const real_t jac_affine_0_0_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_1_0_GRAY; + const real_t jac_affine_0_1_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_2_0_GRAY; + const real_t jac_affine_1_0_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_1_1_GRAY; + const real_t jac_affine_1_1_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_2_1_GRAY; + const real_t tmp_coords_jac_1_GRAY = jac_affine_0_0_GRAY*jac_affine_1_1_GRAY - jac_affine_0_1_GRAY*jac_affine_1_0_GRAY; + const real_t tmp_coords_jac_2_GRAY = 1.0 / (tmp_coords_jac_1_GRAY); + const real_t jac_affine_inv_0_0_GRAY = jac_affine_1_1_GRAY*tmp_coords_jac_2_GRAY; + const real_t jac_affine_inv_0_1_GRAY = -jac_affine_0_1_GRAY*tmp_coords_jac_2_GRAY; + const real_t jac_affine_inv_1_0_GRAY = -jac_affine_1_0_GRAY*tmp_coords_jac_2_GRAY; + const real_t jac_affine_inv_1_1_GRAY = jac_affine_0_0_GRAY*tmp_coords_jac_2_GRAY; + const real_t abs_det_jac_affine_GRAY = abs(tmp_coords_jac_1_GRAY); { /* FaceType.GRAY */ - const real_t tmp_coords_jac_0_GRAY = 1.0 / (micro_edges_per_macro_edge_float)*1.0; - const real_t p_affine_const_0_0_GRAY = macro_vertex_coord_id_0comp0; - const real_t p_affine_const_0_1_GRAY = macro_vertex_coord_id_0comp1; - const real_t p_affine_const_1_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0); - const real_t p_affine_const_1_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1); - const real_t p_affine_const_2_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0); - const real_t p_affine_const_2_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1); - const real_t jac_affine_0_0_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_1_0_GRAY; - const real_t jac_affine_0_1_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_2_0_GRAY; - const real_t jac_affine_1_0_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_1_1_GRAY; - const real_t jac_affine_1_1_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_2_1_GRAY; - const real_t tmp_coords_jac_1_GRAY = 1.0 / (jac_affine_0_0_GRAY*jac_affine_1_1_GRAY - jac_affine_0_1_GRAY*jac_affine_1_0_GRAY); - const real_t jac_affine_inv_0_0_GRAY = jac_affine_1_1_GRAY*tmp_coords_jac_1_GRAY; - const real_t jac_affine_inv_0_1_GRAY = -jac_affine_0_1_GRAY*tmp_coords_jac_1_GRAY; - const real_t jac_affine_inv_1_0_GRAY = -jac_affine_1_0_GRAY*tmp_coords_jac_1_GRAY; - const real_t jac_affine_inv_1_1_GRAY = jac_affine_0_0_GRAY*tmp_coords_jac_1_GRAY; for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1) for (int64_t ctr_0 = 0; ctr_0 < -ctr_1 + micro_edges_per_macro_edge; ctr_0 += 1) { - const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1)); - const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1)); - const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1)); - const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1)); - const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1 + 1)); - const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1 + 1)); + + const int64_t phantom_ctr_0 = ctr_0; + real_t _data_float_loop_ctr_array_dim_0[4]; + _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0; + _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1; + _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2; + _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3; + real_t _data_float_loop_ctr_array_dim_1[4]; + _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1; + + const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; const real_t rho_dof_0 = _data_rhoVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]; const real_t rho_dof_1 = _data_rhoVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; const real_t rho_dof_2 = _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; @@ -155,7 +174,7 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotU::toMatrix_macro_2D( idx_t * RESTRIC const real_t tmp_qloop_14 = -tmp_qloop_1 + tmp_qloop_4*-4.0 + tmp_qloop_7; const real_t tmp_qloop_15 = tmp_qloop_2*4.0; const real_t tmp_qloop_16 = tmp_qloop_0 - tmp_qloop_1 - tmp_qloop_15; - const real_t tmp_qloop_17 = 1.0 / (rho_dof_0*tmp_qloop_6 + rho_dof_1*tmp_qloop_12 + rho_dof_2*tmp_qloop_13 + rho_dof_3*tmp_qloop_1 + rho_dof_4*tmp_qloop_14 + rho_dof_5*tmp_qloop_16)*_data_q_w[q]; + const real_t tmp_qloop_17 = abs_det_jac_affine_GRAY*1.0 / (rho_dof_0*tmp_qloop_6 + rho_dof_1*tmp_qloop_12 + rho_dof_2*tmp_qloop_13 + rho_dof_3*tmp_qloop_1 + rho_dof_4*tmp_qloop_14 + rho_dof_5*tmp_qloop_16)*_data_q_w[q]; const real_t tmp_qloop_18 = tmp_qloop_17*(1.0 - _data_q_p_0[q] - _data_q_p_1[q]); const real_t tmp_qloop_19 = tmp_qloop_11*tmp_qloop_18; const real_t tmp_qloop_20 = jac_affine_inv_0_1_GRAY*tmp_qloop_9 + jac_affine_inv_1_1_GRAY*tmp_qloop_10; @@ -342,37 +361,52 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotU::toMatrix_macro_2D( idx_t * RESTRIC mat->addValues( _data_rowIdx, _data_colIdx, _data_mat ); } } + const real_t tmp_coords_jac_0_BLUE = 1.0 / (micro_edges_per_macro_edge_float)*1.0; + const real_t tmp_coords_jac_1_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0); + const real_t tmp_coords_jac_2_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1); + const real_t tmp_coords_jac_3_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0); + const real_t tmp_coords_jac_4_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1); + const real_t p_affine_const_0_0_BLUE = tmp_coords_jac_1_BLUE; + const real_t p_affine_const_0_1_BLUE = tmp_coords_jac_2_BLUE; + const real_t p_affine_const_1_0_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_3_BLUE; + const real_t p_affine_const_1_1_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_4_BLUE; + const real_t p_affine_const_2_0_BLUE = tmp_coords_jac_1_BLUE + tmp_coords_jac_3_BLUE; + const real_t p_affine_const_2_1_BLUE = tmp_coords_jac_2_BLUE + tmp_coords_jac_4_BLUE; + const real_t jac_affine_0_0_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_1_0_BLUE; + const real_t jac_affine_0_1_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_2_0_BLUE; + const real_t jac_affine_1_0_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_1_1_BLUE; + const real_t jac_affine_1_1_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_2_1_BLUE; + const real_t tmp_coords_jac_5_BLUE = jac_affine_0_0_BLUE*jac_affine_1_1_BLUE - jac_affine_0_1_BLUE*jac_affine_1_0_BLUE; + const real_t tmp_coords_jac_6_BLUE = 1.0 / (tmp_coords_jac_5_BLUE); + const real_t jac_affine_inv_0_0_BLUE = jac_affine_1_1_BLUE*tmp_coords_jac_6_BLUE; + const real_t jac_affine_inv_0_1_BLUE = -jac_affine_0_1_BLUE*tmp_coords_jac_6_BLUE; + const real_t jac_affine_inv_1_0_BLUE = -jac_affine_1_0_BLUE*tmp_coords_jac_6_BLUE; + const real_t jac_affine_inv_1_1_BLUE = jac_affine_0_0_BLUE*tmp_coords_jac_6_BLUE; + const real_t abs_det_jac_affine_BLUE = abs(tmp_coords_jac_5_BLUE); { /* FaceType.BLUE */ - const real_t tmp_coords_jac_0_BLUE = 1.0 / (micro_edges_per_macro_edge_float)*1.0; - const real_t tmp_coords_jac_1_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0); - const real_t tmp_coords_jac_2_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1); - const real_t tmp_coords_jac_3_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0); - const real_t tmp_coords_jac_4_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1); - const real_t p_affine_const_0_0_BLUE = tmp_coords_jac_1_BLUE; - const real_t p_affine_const_0_1_BLUE = tmp_coords_jac_2_BLUE; - const real_t p_affine_const_1_0_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_3_BLUE; - const real_t p_affine_const_1_1_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_4_BLUE; - const real_t p_affine_const_2_0_BLUE = tmp_coords_jac_1_BLUE + tmp_coords_jac_3_BLUE; - const real_t p_affine_const_2_1_BLUE = tmp_coords_jac_2_BLUE + tmp_coords_jac_4_BLUE; - const real_t jac_affine_0_0_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_1_0_BLUE; - const real_t jac_affine_0_1_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_2_0_BLUE; - const real_t jac_affine_1_0_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_1_1_BLUE; - const real_t jac_affine_1_1_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_2_1_BLUE; - const real_t tmp_coords_jac_5_BLUE = 1.0 / (jac_affine_0_0_BLUE*jac_affine_1_1_BLUE - jac_affine_0_1_BLUE*jac_affine_1_0_BLUE); - const real_t jac_affine_inv_0_0_BLUE = jac_affine_1_1_BLUE*tmp_coords_jac_5_BLUE; - const real_t jac_affine_inv_0_1_BLUE = -jac_affine_0_1_BLUE*tmp_coords_jac_5_BLUE; - const real_t jac_affine_inv_1_0_BLUE = -jac_affine_1_0_BLUE*tmp_coords_jac_5_BLUE; - const real_t jac_affine_inv_1_1_BLUE = jac_affine_0_0_BLUE*tmp_coords_jac_5_BLUE; for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1) for (int64_t ctr_0 = 0; ctr_0 < -ctr_1 + micro_edges_per_macro_edge - 1; ctr_0 += 1) { - const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1)); - const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1)); - const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1 + 1)); - const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1 + 1)); - const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1 + 1)); - const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1 + 1)); + + const int64_t phantom_ctr_0 = ctr_0; + real_t _data_float_loop_ctr_array_dim_0[4]; + _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0; + _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1; + _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2; + _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3; + real_t _data_float_loop_ctr_array_dim_1[4]; + _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1; + + const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; const real_t rho_dof_0 = _data_rhoVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; const real_t rho_dof_1 = _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; const real_t rho_dof_2 = _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]; @@ -434,7 +468,7 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotU::toMatrix_macro_2D( idx_t * RESTRIC const real_t tmp_qloop_14 = -tmp_qloop_1 + tmp_qloop_4*-4.0 + tmp_qloop_7; const real_t tmp_qloop_15 = tmp_qloop_2*4.0; const real_t tmp_qloop_16 = tmp_qloop_0 - tmp_qloop_1 - tmp_qloop_15; - const real_t tmp_qloop_17 = 1.0 / (rho_dof_0*tmp_qloop_6 + rho_dof_1*tmp_qloop_12 + rho_dof_2*tmp_qloop_13 + rho_dof_3*tmp_qloop_1 + rho_dof_4*tmp_qloop_14 + rho_dof_5*tmp_qloop_16)*_data_q_w[q]; + const real_t tmp_qloop_17 = abs_det_jac_affine_BLUE*1.0 / (rho_dof_0*tmp_qloop_6 + rho_dof_1*tmp_qloop_12 + rho_dof_2*tmp_qloop_13 + rho_dof_3*tmp_qloop_1 + rho_dof_4*tmp_qloop_14 + rho_dof_5*tmp_qloop_16)*_data_q_w[q]; const real_t tmp_qloop_18 = tmp_qloop_17*(1.0 - _data_q_p_0[q] - _data_q_p_1[q]); const real_t tmp_qloop_19 = tmp_qloop_11*tmp_qloop_18; const real_t tmp_qloop_20 = jac_affine_inv_0_1_BLUE*tmp_qloop_9 + jac_affine_inv_1_1_BLUE*tmp_qloop_10; diff --git a/operators/grad_rho_by_rho_dot_u/noarch/P2VectorToP1ElementwiseGradRhoByRhoDotU_toMatrix_macro_3D.cpp b/operators/grad_rho_by_rho_dot_u/noarch/P2VectorToP1ElementwiseGradRhoByRhoDotU_toMatrix_P2VectorToP1ElementwiseGradRhoByRhoDotU_macro_3D.cpp similarity index 81% rename from operators/grad_rho_by_rho_dot_u/noarch/P2VectorToP1ElementwiseGradRhoByRhoDotU_toMatrix_macro_3D.cpp rename to operators/grad_rho_by_rho_dot_u/noarch/P2VectorToP1ElementwiseGradRhoByRhoDotU_toMatrix_P2VectorToP1ElementwiseGradRhoByRhoDotU_macro_3D.cpp index 0945e0d9..a624289c 100644 --- a/operators/grad_rho_by_rho_dot_u/noarch/P2VectorToP1ElementwiseGradRhoByRhoDotU_toMatrix_macro_3D.cpp +++ b/operators/grad_rho_by_rho_dot_u/noarch/P2VectorToP1ElementwiseGradRhoByRhoDotU_toMatrix_P2VectorToP1ElementwiseGradRhoByRhoDotU_macro_3D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P2VectorToP1ElementwiseGradRhoByRhoDotU::toMatrix_macro_3D( idx_t * RESTRICT _data_dst, real_t * RESTRICT _data_rhoEdge, real_t * RESTRICT _data_rhoVertex, idx_t * RESTRICT _data_src_edge_0, idx_t * RESTRICT _data_src_edge_1, idx_t * RESTRICT _data_src_edge_2, idx_t * RESTRICT _data_src_vertex_0, idx_t * RESTRICT _data_src_vertex_1, idx_t * RESTRICT _data_src_vertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2VectorToP1ElementwiseGradRhoByRhoDotU::toMatrix_P2VectorToP1ElementwiseGradRhoByRhoDotU_macro_3D( idx_t * RESTRICT _data_dst, real_t * RESTRICT _data_rhoEdge, real_t * RESTRICT _data_rhoVertex, idx_t * RESTRICT _data_src_edge_0, idx_t * RESTRICT _data_src_edge_1, idx_t * RESTRICT _data_src_edge_2, idx_t * RESTRICT _data_src_vertex_0, idx_t * RESTRICT _data_src_vertex_1, idx_t * RESTRICT _data_src_vertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983}; @@ -69,61 +73,81 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotU::toMatrix_macro_3D( idx_t * RESTRIC const real_t _data_q_p_2 [] = {0.25, 0.5, 0.16666666666666666, 0.16666666666666666, 0.16666666666666666}; + const real_t tmp_coords_jac_0_WHITE_UP = 1.0 / (micro_edges_per_macro_edge_float)*1.0; + const real_t p_affine_const_0_0_WHITE_UP = macro_vertex_coord_id_0comp0; + const real_t p_affine_const_0_1_WHITE_UP = macro_vertex_coord_id_0comp1; + const real_t p_affine_const_0_2_WHITE_UP = macro_vertex_coord_id_0comp2; + const real_t p_affine_const_1_0_WHITE_UP = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0); + const real_t p_affine_const_1_1_WHITE_UP = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1); + const real_t p_affine_const_1_2_WHITE_UP = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2); + const real_t p_affine_const_2_0_WHITE_UP = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0); + const real_t p_affine_const_2_1_WHITE_UP = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1); + const real_t p_affine_const_2_2_WHITE_UP = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2); + const real_t p_affine_const_3_0_WHITE_UP = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0); + const real_t p_affine_const_3_1_WHITE_UP = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1); + const real_t p_affine_const_3_2_WHITE_UP = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2); + const real_t jac_affine_0_0_WHITE_UP = -p_affine_const_0_0_WHITE_UP + p_affine_const_1_0_WHITE_UP; + const real_t jac_affine_0_1_WHITE_UP = -p_affine_const_0_0_WHITE_UP + p_affine_const_2_0_WHITE_UP; + const real_t jac_affine_0_2_WHITE_UP = -p_affine_const_0_0_WHITE_UP + p_affine_const_3_0_WHITE_UP; + const real_t jac_affine_1_0_WHITE_UP = -p_affine_const_0_1_WHITE_UP + p_affine_const_1_1_WHITE_UP; + const real_t jac_affine_1_1_WHITE_UP = -p_affine_const_0_1_WHITE_UP + p_affine_const_2_1_WHITE_UP; + const real_t tmp_coords_jac_5_WHITE_UP = jac_affine_0_2_WHITE_UP*jac_affine_1_1_WHITE_UP; + const real_t jac_affine_1_2_WHITE_UP = -p_affine_const_0_1_WHITE_UP + p_affine_const_3_1_WHITE_UP; + const real_t tmp_coords_jac_3_WHITE_UP = jac_affine_0_1_WHITE_UP*jac_affine_1_2_WHITE_UP; + const real_t jac_affine_2_0_WHITE_UP = -p_affine_const_0_2_WHITE_UP + p_affine_const_1_2_WHITE_UP; + const real_t jac_affine_2_1_WHITE_UP = -p_affine_const_0_2_WHITE_UP + p_affine_const_2_2_WHITE_UP; + const real_t tmp_coords_jac_2_WHITE_UP = jac_affine_1_2_WHITE_UP*jac_affine_2_1_WHITE_UP; + const real_t jac_affine_2_2_WHITE_UP = -p_affine_const_0_2_WHITE_UP + p_affine_const_3_2_WHITE_UP; + const real_t tmp_coords_jac_1_WHITE_UP = jac_affine_1_1_WHITE_UP*jac_affine_2_2_WHITE_UP; + const real_t tmp_coords_jac_4_WHITE_UP = jac_affine_0_1_WHITE_UP*jac_affine_2_2_WHITE_UP; + const real_t tmp_coords_jac_6_WHITE_UP = jac_affine_0_0_WHITE_UP*tmp_coords_jac_1_WHITE_UP - jac_affine_0_0_WHITE_UP*tmp_coords_jac_2_WHITE_UP + jac_affine_0_2_WHITE_UP*jac_affine_1_0_WHITE_UP*jac_affine_2_1_WHITE_UP - jac_affine_1_0_WHITE_UP*tmp_coords_jac_4_WHITE_UP + jac_affine_2_0_WHITE_UP*tmp_coords_jac_3_WHITE_UP - jac_affine_2_0_WHITE_UP*tmp_coords_jac_5_WHITE_UP; + const real_t tmp_coords_jac_7_WHITE_UP = 1.0 / (tmp_coords_jac_6_WHITE_UP); + const real_t jac_affine_inv_0_0_WHITE_UP = tmp_coords_jac_7_WHITE_UP*(tmp_coords_jac_1_WHITE_UP - tmp_coords_jac_2_WHITE_UP); + const real_t jac_affine_inv_0_1_WHITE_UP = tmp_coords_jac_7_WHITE_UP*(jac_affine_0_2_WHITE_UP*jac_affine_2_1_WHITE_UP - tmp_coords_jac_4_WHITE_UP); + const real_t jac_affine_inv_0_2_WHITE_UP = tmp_coords_jac_7_WHITE_UP*(tmp_coords_jac_3_WHITE_UP - tmp_coords_jac_5_WHITE_UP); + const real_t jac_affine_inv_1_0_WHITE_UP = tmp_coords_jac_7_WHITE_UP*(-jac_affine_1_0_WHITE_UP*jac_affine_2_2_WHITE_UP + jac_affine_1_2_WHITE_UP*jac_affine_2_0_WHITE_UP); + const real_t jac_affine_inv_1_1_WHITE_UP = tmp_coords_jac_7_WHITE_UP*(jac_affine_0_0_WHITE_UP*jac_affine_2_2_WHITE_UP - jac_affine_0_2_WHITE_UP*jac_affine_2_0_WHITE_UP); + const real_t jac_affine_inv_1_2_WHITE_UP = tmp_coords_jac_7_WHITE_UP*(-jac_affine_0_0_WHITE_UP*jac_affine_1_2_WHITE_UP + jac_affine_0_2_WHITE_UP*jac_affine_1_0_WHITE_UP); + const real_t jac_affine_inv_2_0_WHITE_UP = tmp_coords_jac_7_WHITE_UP*(jac_affine_1_0_WHITE_UP*jac_affine_2_1_WHITE_UP - jac_affine_1_1_WHITE_UP*jac_affine_2_0_WHITE_UP); + const real_t jac_affine_inv_2_1_WHITE_UP = tmp_coords_jac_7_WHITE_UP*(-jac_affine_0_0_WHITE_UP*jac_affine_2_1_WHITE_UP + jac_affine_0_1_WHITE_UP*jac_affine_2_0_WHITE_UP); + const real_t jac_affine_inv_2_2_WHITE_UP = tmp_coords_jac_7_WHITE_UP*(jac_affine_0_0_WHITE_UP*jac_affine_1_1_WHITE_UP - jac_affine_0_1_WHITE_UP*jac_affine_1_0_WHITE_UP); + const real_t abs_det_jac_affine_WHITE_UP = abs(tmp_coords_jac_6_WHITE_UP); { /* CellType.WHITE_UP */ - const real_t tmp_coords_jac_0_WHITE_UP = 1.0 / (micro_edges_per_macro_edge_float)*1.0; - const real_t p_affine_const_0_0_WHITE_UP = macro_vertex_coord_id_0comp0; - const real_t p_affine_const_0_1_WHITE_UP = macro_vertex_coord_id_0comp1; - const real_t p_affine_const_0_2_WHITE_UP = macro_vertex_coord_id_0comp2; - const real_t p_affine_const_1_0_WHITE_UP = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0); - const real_t p_affine_const_1_1_WHITE_UP = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1); - const real_t p_affine_const_1_2_WHITE_UP = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2); - const real_t p_affine_const_2_0_WHITE_UP = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0); - const real_t p_affine_const_2_1_WHITE_UP = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1); - const real_t p_affine_const_2_2_WHITE_UP = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2); - const real_t p_affine_const_3_0_WHITE_UP = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0); - const real_t p_affine_const_3_1_WHITE_UP = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1); - const real_t p_affine_const_3_2_WHITE_UP = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_WHITE_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2); - const real_t jac_affine_0_0_WHITE_UP = -p_affine_const_0_0_WHITE_UP + p_affine_const_1_0_WHITE_UP; - const real_t jac_affine_0_1_WHITE_UP = -p_affine_const_0_0_WHITE_UP + p_affine_const_2_0_WHITE_UP; - const real_t jac_affine_0_2_WHITE_UP = -p_affine_const_0_0_WHITE_UP + p_affine_const_3_0_WHITE_UP; - const real_t jac_affine_1_0_WHITE_UP = -p_affine_const_0_1_WHITE_UP + p_affine_const_1_1_WHITE_UP; - const real_t jac_affine_1_1_WHITE_UP = -p_affine_const_0_1_WHITE_UP + p_affine_const_2_1_WHITE_UP; - const real_t tmp_coords_jac_5_WHITE_UP = jac_affine_0_2_WHITE_UP*jac_affine_1_1_WHITE_UP; - const real_t jac_affine_1_2_WHITE_UP = -p_affine_const_0_1_WHITE_UP + p_affine_const_3_1_WHITE_UP; - const real_t tmp_coords_jac_3_WHITE_UP = jac_affine_0_1_WHITE_UP*jac_affine_1_2_WHITE_UP; - const real_t jac_affine_2_0_WHITE_UP = -p_affine_const_0_2_WHITE_UP + p_affine_const_1_2_WHITE_UP; - const real_t jac_affine_2_1_WHITE_UP = -p_affine_const_0_2_WHITE_UP + p_affine_const_2_2_WHITE_UP; - const real_t tmp_coords_jac_2_WHITE_UP = jac_affine_1_2_WHITE_UP*jac_affine_2_1_WHITE_UP; - const real_t jac_affine_2_2_WHITE_UP = -p_affine_const_0_2_WHITE_UP + p_affine_const_3_2_WHITE_UP; - const real_t tmp_coords_jac_1_WHITE_UP = jac_affine_1_1_WHITE_UP*jac_affine_2_2_WHITE_UP; - const real_t tmp_coords_jac_4_WHITE_UP = jac_affine_0_1_WHITE_UP*jac_affine_2_2_WHITE_UP; - const real_t tmp_coords_jac_6_WHITE_UP = 1.0 / (jac_affine_0_0_WHITE_UP*tmp_coords_jac_1_WHITE_UP - jac_affine_0_0_WHITE_UP*tmp_coords_jac_2_WHITE_UP + jac_affine_0_2_WHITE_UP*jac_affine_1_0_WHITE_UP*jac_affine_2_1_WHITE_UP - jac_affine_1_0_WHITE_UP*tmp_coords_jac_4_WHITE_UP + jac_affine_2_0_WHITE_UP*tmp_coords_jac_3_WHITE_UP - jac_affine_2_0_WHITE_UP*tmp_coords_jac_5_WHITE_UP); - const real_t jac_affine_inv_0_0_WHITE_UP = tmp_coords_jac_6_WHITE_UP*(tmp_coords_jac_1_WHITE_UP - tmp_coords_jac_2_WHITE_UP); - const real_t jac_affine_inv_0_1_WHITE_UP = tmp_coords_jac_6_WHITE_UP*(jac_affine_0_2_WHITE_UP*jac_affine_2_1_WHITE_UP - tmp_coords_jac_4_WHITE_UP); - const real_t jac_affine_inv_0_2_WHITE_UP = tmp_coords_jac_6_WHITE_UP*(tmp_coords_jac_3_WHITE_UP - tmp_coords_jac_5_WHITE_UP); - const real_t jac_affine_inv_1_0_WHITE_UP = tmp_coords_jac_6_WHITE_UP*(-jac_affine_1_0_WHITE_UP*jac_affine_2_2_WHITE_UP + jac_affine_1_2_WHITE_UP*jac_affine_2_0_WHITE_UP); - const real_t jac_affine_inv_1_1_WHITE_UP = tmp_coords_jac_6_WHITE_UP*(jac_affine_0_0_WHITE_UP*jac_affine_2_2_WHITE_UP - jac_affine_0_2_WHITE_UP*jac_affine_2_0_WHITE_UP); - const real_t jac_affine_inv_1_2_WHITE_UP = tmp_coords_jac_6_WHITE_UP*(-jac_affine_0_0_WHITE_UP*jac_affine_1_2_WHITE_UP + jac_affine_0_2_WHITE_UP*jac_affine_1_0_WHITE_UP); - const real_t jac_affine_inv_2_0_WHITE_UP = tmp_coords_jac_6_WHITE_UP*(jac_affine_1_0_WHITE_UP*jac_affine_2_1_WHITE_UP - jac_affine_1_1_WHITE_UP*jac_affine_2_0_WHITE_UP); - const real_t jac_affine_inv_2_1_WHITE_UP = tmp_coords_jac_6_WHITE_UP*(-jac_affine_0_0_WHITE_UP*jac_affine_2_1_WHITE_UP + jac_affine_0_1_WHITE_UP*jac_affine_2_0_WHITE_UP); - const real_t jac_affine_inv_2_2_WHITE_UP = tmp_coords_jac_6_WHITE_UP*(jac_affine_0_0_WHITE_UP*jac_affine_1_1_WHITE_UP - jac_affine_0_1_WHITE_UP*jac_affine_1_0_WHITE_UP); for (int64_t ctr_2 = 0; ctr_2 < micro_edges_per_macro_edge; ctr_2 += 1) for (int64_t ctr_1 = 0; ctr_1 < -ctr_2 + micro_edges_per_macro_edge; ctr_1 += 1) for (int64_t ctr_0 = 0; ctr_0 < -ctr_1 - ctr_2 + micro_edges_per_macro_edge; ctr_0 += 1) { - const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2)); - const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2)); - const real_t p_affine_0_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2)); - const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2)); - const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2)); - const real_t p_affine_1_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2)); - const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2)); - const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2)); - const real_t p_affine_2_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2)); - const real_t p_affine_3_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2 + 1)); - const real_t p_affine_3_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2 + 1)); - const real_t p_affine_3_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2 + 1)); + + const int64_t phantom_ctr_0 = ctr_0; + real_t _data_float_loop_ctr_array_dim_0[4]; + _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0; + _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1; + _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2; + _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3; + real_t _data_float_loop_ctr_array_dim_1[4]; + _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1; + real_t _data_float_loop_ctr_array_dim_2[4]; + _data_float_loop_ctr_array_dim_2[0] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[1] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[2] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[3] = (real_t) ctr_2; + + const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_0_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_2_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_3_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_3_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_3_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; const real_t rho_dof_0 = _data_rhoVertex[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]; const real_t rho_dof_1 = _data_rhoVertex[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]; const real_t rho_dof_2 = _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]; @@ -288,7 +312,7 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotU::toMatrix_macro_3D( idx_t * RESTRIC const real_t tmp_qloop_29 = -tmp_qloop_28 + tmp_qloop_6 - tmp_qloop_7 - tmp_qloop_9; const real_t tmp_qloop_30 = tmp_qloop_0*4.0; const real_t tmp_qloop_31 = -tmp_qloop_11 - tmp_qloop_30 + tmp_qloop_8; - const real_t tmp_qloop_32 = 1.0 / (rho_dof_0*tmp_qloop_12 + rho_dof_1*tmp_qloop_24 + rho_dof_2*tmp_qloop_25 + rho_dof_3*tmp_qloop_26 + rho_dof_4*tmp_qloop_7 + rho_dof_5*tmp_qloop_10 + rho_dof_6*tmp_qloop_9 + rho_dof_7*tmp_qloop_27 + rho_dof_8*tmp_qloop_29 + rho_dof_9*tmp_qloop_31)*_data_q_w[q]; + const real_t tmp_qloop_32 = abs_det_jac_affine_WHITE_UP*1.0 / (rho_dof_0*tmp_qloop_12 + rho_dof_1*tmp_qloop_24 + rho_dof_2*tmp_qloop_25 + rho_dof_3*tmp_qloop_26 + rho_dof_4*tmp_qloop_7 + rho_dof_5*tmp_qloop_10 + rho_dof_6*tmp_qloop_9 + rho_dof_7*tmp_qloop_27 + rho_dof_8*tmp_qloop_29 + rho_dof_9*tmp_qloop_31)*_data_q_w[q]; const real_t tmp_qloop_33 = tmp_qloop_32*(1.0 - _data_q_p_0[q] - _data_q_p_1[q] - _data_q_p_2[q]); const real_t tmp_qloop_34 = tmp_qloop_23*tmp_qloop_33; const real_t tmp_qloop_35 = jac_affine_inv_0_1_WHITE_UP*tmp_qloop_18 + jac_affine_inv_1_1_WHITE_UP*tmp_qloop_21 + jac_affine_inv_2_1_WHITE_UP*tmp_qloop_22; @@ -847,73 +871,93 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotU::toMatrix_macro_3D( idx_t * RESTRIC mat->addValues( _data_rowIdx, _data_colIdx, _data_mat ); } } + const real_t tmp_coords_jac_0_WHITE_DOWN = 1.0 / (micro_edges_per_macro_edge_float)*1.0; + const real_t tmp_coords_jac_1_WHITE_DOWN = tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0); + const real_t tmp_coords_jac_2_WHITE_DOWN = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0); + const real_t tmp_coords_jac_3_WHITE_DOWN = tmp_coords_jac_1_WHITE_DOWN + tmp_coords_jac_2_WHITE_DOWN; + const real_t tmp_coords_jac_4_WHITE_DOWN = tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1); + const real_t tmp_coords_jac_5_WHITE_DOWN = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1); + const real_t tmp_coords_jac_6_WHITE_DOWN = tmp_coords_jac_4_WHITE_DOWN + tmp_coords_jac_5_WHITE_DOWN; + const real_t tmp_coords_jac_7_WHITE_DOWN = tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2); + const real_t tmp_coords_jac_8_WHITE_DOWN = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2); + const real_t tmp_coords_jac_9_WHITE_DOWN = tmp_coords_jac_7_WHITE_DOWN + tmp_coords_jac_8_WHITE_DOWN; + const real_t tmp_coords_jac_10_WHITE_DOWN = tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0); + const real_t tmp_coords_jac_11_WHITE_DOWN = tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1); + const real_t tmp_coords_jac_12_WHITE_DOWN = tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2); + const real_t p_affine_const_0_0_WHITE_DOWN = tmp_coords_jac_3_WHITE_DOWN; + const real_t p_affine_const_0_1_WHITE_DOWN = tmp_coords_jac_6_WHITE_DOWN; + const real_t p_affine_const_0_2_WHITE_DOWN = tmp_coords_jac_9_WHITE_DOWN; + const real_t p_affine_const_1_0_WHITE_DOWN = tmp_coords_jac_10_WHITE_DOWN + tmp_coords_jac_2_WHITE_DOWN; + const real_t p_affine_const_1_1_WHITE_DOWN = tmp_coords_jac_11_WHITE_DOWN + tmp_coords_jac_5_WHITE_DOWN; + const real_t p_affine_const_1_2_WHITE_DOWN = tmp_coords_jac_12_WHITE_DOWN + tmp_coords_jac_8_WHITE_DOWN; + const real_t p_affine_const_2_0_WHITE_DOWN = macro_vertex_coord_id_0comp0 + tmp_coords_jac_10_WHITE_DOWN + tmp_coords_jac_1_WHITE_DOWN; + const real_t p_affine_const_2_1_WHITE_DOWN = macro_vertex_coord_id_0comp1 + tmp_coords_jac_11_WHITE_DOWN + tmp_coords_jac_4_WHITE_DOWN; + const real_t p_affine_const_2_2_WHITE_DOWN = macro_vertex_coord_id_0comp2 + tmp_coords_jac_12_WHITE_DOWN + tmp_coords_jac_7_WHITE_DOWN; + const real_t p_affine_const_3_0_WHITE_DOWN = tmp_coords_jac_10_WHITE_DOWN + tmp_coords_jac_3_WHITE_DOWN; + const real_t p_affine_const_3_1_WHITE_DOWN = tmp_coords_jac_11_WHITE_DOWN + tmp_coords_jac_6_WHITE_DOWN; + const real_t p_affine_const_3_2_WHITE_DOWN = tmp_coords_jac_12_WHITE_DOWN + tmp_coords_jac_9_WHITE_DOWN; + const real_t jac_affine_0_0_WHITE_DOWN = -p_affine_const_0_0_WHITE_DOWN + p_affine_const_1_0_WHITE_DOWN; + const real_t jac_affine_0_1_WHITE_DOWN = -p_affine_const_0_0_WHITE_DOWN + p_affine_const_2_0_WHITE_DOWN; + const real_t jac_affine_0_2_WHITE_DOWN = -p_affine_const_0_0_WHITE_DOWN + p_affine_const_3_0_WHITE_DOWN; + const real_t jac_affine_1_0_WHITE_DOWN = -p_affine_const_0_1_WHITE_DOWN + p_affine_const_1_1_WHITE_DOWN; + const real_t jac_affine_1_1_WHITE_DOWN = -p_affine_const_0_1_WHITE_DOWN + p_affine_const_2_1_WHITE_DOWN; + const real_t tmp_coords_jac_17_WHITE_DOWN = jac_affine_0_2_WHITE_DOWN*jac_affine_1_1_WHITE_DOWN; + const real_t jac_affine_1_2_WHITE_DOWN = -p_affine_const_0_1_WHITE_DOWN + p_affine_const_3_1_WHITE_DOWN; + const real_t tmp_coords_jac_15_WHITE_DOWN = jac_affine_0_1_WHITE_DOWN*jac_affine_1_2_WHITE_DOWN; + const real_t jac_affine_2_0_WHITE_DOWN = -p_affine_const_0_2_WHITE_DOWN + p_affine_const_1_2_WHITE_DOWN; + const real_t jac_affine_2_1_WHITE_DOWN = -p_affine_const_0_2_WHITE_DOWN + p_affine_const_2_2_WHITE_DOWN; + const real_t tmp_coords_jac_14_WHITE_DOWN = jac_affine_1_2_WHITE_DOWN*jac_affine_2_1_WHITE_DOWN; + const real_t jac_affine_2_2_WHITE_DOWN = -p_affine_const_0_2_WHITE_DOWN + p_affine_const_3_2_WHITE_DOWN; + const real_t tmp_coords_jac_13_WHITE_DOWN = jac_affine_1_1_WHITE_DOWN*jac_affine_2_2_WHITE_DOWN; + const real_t tmp_coords_jac_16_WHITE_DOWN = jac_affine_0_1_WHITE_DOWN*jac_affine_2_2_WHITE_DOWN; + const real_t tmp_coords_jac_18_WHITE_DOWN = jac_affine_0_0_WHITE_DOWN*tmp_coords_jac_13_WHITE_DOWN - jac_affine_0_0_WHITE_DOWN*tmp_coords_jac_14_WHITE_DOWN + jac_affine_0_2_WHITE_DOWN*jac_affine_1_0_WHITE_DOWN*jac_affine_2_1_WHITE_DOWN - jac_affine_1_0_WHITE_DOWN*tmp_coords_jac_16_WHITE_DOWN + jac_affine_2_0_WHITE_DOWN*tmp_coords_jac_15_WHITE_DOWN - jac_affine_2_0_WHITE_DOWN*tmp_coords_jac_17_WHITE_DOWN; + const real_t tmp_coords_jac_19_WHITE_DOWN = 1.0 / (tmp_coords_jac_18_WHITE_DOWN); + const real_t jac_affine_inv_0_0_WHITE_DOWN = tmp_coords_jac_19_WHITE_DOWN*(tmp_coords_jac_13_WHITE_DOWN - tmp_coords_jac_14_WHITE_DOWN); + const real_t jac_affine_inv_0_1_WHITE_DOWN = tmp_coords_jac_19_WHITE_DOWN*(jac_affine_0_2_WHITE_DOWN*jac_affine_2_1_WHITE_DOWN - tmp_coords_jac_16_WHITE_DOWN); + const real_t jac_affine_inv_0_2_WHITE_DOWN = tmp_coords_jac_19_WHITE_DOWN*(tmp_coords_jac_15_WHITE_DOWN - tmp_coords_jac_17_WHITE_DOWN); + const real_t jac_affine_inv_1_0_WHITE_DOWN = tmp_coords_jac_19_WHITE_DOWN*(-jac_affine_1_0_WHITE_DOWN*jac_affine_2_2_WHITE_DOWN + jac_affine_1_2_WHITE_DOWN*jac_affine_2_0_WHITE_DOWN); + const real_t jac_affine_inv_1_1_WHITE_DOWN = tmp_coords_jac_19_WHITE_DOWN*(jac_affine_0_0_WHITE_DOWN*jac_affine_2_2_WHITE_DOWN - jac_affine_0_2_WHITE_DOWN*jac_affine_2_0_WHITE_DOWN); + const real_t jac_affine_inv_1_2_WHITE_DOWN = tmp_coords_jac_19_WHITE_DOWN*(-jac_affine_0_0_WHITE_DOWN*jac_affine_1_2_WHITE_DOWN + jac_affine_0_2_WHITE_DOWN*jac_affine_1_0_WHITE_DOWN); + const real_t jac_affine_inv_2_0_WHITE_DOWN = tmp_coords_jac_19_WHITE_DOWN*(jac_affine_1_0_WHITE_DOWN*jac_affine_2_1_WHITE_DOWN - jac_affine_1_1_WHITE_DOWN*jac_affine_2_0_WHITE_DOWN); + const real_t jac_affine_inv_2_1_WHITE_DOWN = tmp_coords_jac_19_WHITE_DOWN*(-jac_affine_0_0_WHITE_DOWN*jac_affine_2_1_WHITE_DOWN + jac_affine_0_1_WHITE_DOWN*jac_affine_2_0_WHITE_DOWN); + const real_t jac_affine_inv_2_2_WHITE_DOWN = tmp_coords_jac_19_WHITE_DOWN*(jac_affine_0_0_WHITE_DOWN*jac_affine_1_1_WHITE_DOWN - jac_affine_0_1_WHITE_DOWN*jac_affine_1_0_WHITE_DOWN); + const real_t abs_det_jac_affine_WHITE_DOWN = abs(tmp_coords_jac_18_WHITE_DOWN); { /* CellType.WHITE_DOWN */ - const real_t tmp_coords_jac_0_WHITE_DOWN = 1.0 / (micro_edges_per_macro_edge_float)*1.0; - const real_t tmp_coords_jac_1_WHITE_DOWN = tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0); - const real_t tmp_coords_jac_2_WHITE_DOWN = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0); - const real_t tmp_coords_jac_3_WHITE_DOWN = tmp_coords_jac_1_WHITE_DOWN + tmp_coords_jac_2_WHITE_DOWN; - const real_t tmp_coords_jac_4_WHITE_DOWN = tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1); - const real_t tmp_coords_jac_5_WHITE_DOWN = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1); - const real_t tmp_coords_jac_6_WHITE_DOWN = tmp_coords_jac_4_WHITE_DOWN + tmp_coords_jac_5_WHITE_DOWN; - const real_t tmp_coords_jac_7_WHITE_DOWN = tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2); - const real_t tmp_coords_jac_8_WHITE_DOWN = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2); - const real_t tmp_coords_jac_9_WHITE_DOWN = tmp_coords_jac_7_WHITE_DOWN + tmp_coords_jac_8_WHITE_DOWN; - const real_t tmp_coords_jac_10_WHITE_DOWN = tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0); - const real_t tmp_coords_jac_11_WHITE_DOWN = tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1); - const real_t tmp_coords_jac_12_WHITE_DOWN = tmp_coords_jac_0_WHITE_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2); - const real_t p_affine_const_0_0_WHITE_DOWN = tmp_coords_jac_3_WHITE_DOWN; - const real_t p_affine_const_0_1_WHITE_DOWN = tmp_coords_jac_6_WHITE_DOWN; - const real_t p_affine_const_0_2_WHITE_DOWN = tmp_coords_jac_9_WHITE_DOWN; - const real_t p_affine_const_1_0_WHITE_DOWN = tmp_coords_jac_10_WHITE_DOWN + tmp_coords_jac_2_WHITE_DOWN; - const real_t p_affine_const_1_1_WHITE_DOWN = tmp_coords_jac_11_WHITE_DOWN + tmp_coords_jac_5_WHITE_DOWN; - const real_t p_affine_const_1_2_WHITE_DOWN = tmp_coords_jac_12_WHITE_DOWN + tmp_coords_jac_8_WHITE_DOWN; - const real_t p_affine_const_2_0_WHITE_DOWN = macro_vertex_coord_id_0comp0 + tmp_coords_jac_10_WHITE_DOWN + tmp_coords_jac_1_WHITE_DOWN; - const real_t p_affine_const_2_1_WHITE_DOWN = macro_vertex_coord_id_0comp1 + tmp_coords_jac_11_WHITE_DOWN + tmp_coords_jac_4_WHITE_DOWN; - const real_t p_affine_const_2_2_WHITE_DOWN = macro_vertex_coord_id_0comp2 + tmp_coords_jac_12_WHITE_DOWN + tmp_coords_jac_7_WHITE_DOWN; - const real_t p_affine_const_3_0_WHITE_DOWN = tmp_coords_jac_10_WHITE_DOWN + tmp_coords_jac_3_WHITE_DOWN; - const real_t p_affine_const_3_1_WHITE_DOWN = tmp_coords_jac_11_WHITE_DOWN + tmp_coords_jac_6_WHITE_DOWN; - const real_t p_affine_const_3_2_WHITE_DOWN = tmp_coords_jac_12_WHITE_DOWN + tmp_coords_jac_9_WHITE_DOWN; - const real_t jac_affine_0_0_WHITE_DOWN = -p_affine_const_0_0_WHITE_DOWN + p_affine_const_1_0_WHITE_DOWN; - const real_t jac_affine_0_1_WHITE_DOWN = -p_affine_const_0_0_WHITE_DOWN + p_affine_const_2_0_WHITE_DOWN; - const real_t jac_affine_0_2_WHITE_DOWN = -p_affine_const_0_0_WHITE_DOWN + p_affine_const_3_0_WHITE_DOWN; - const real_t jac_affine_1_0_WHITE_DOWN = -p_affine_const_0_1_WHITE_DOWN + p_affine_const_1_1_WHITE_DOWN; - const real_t jac_affine_1_1_WHITE_DOWN = -p_affine_const_0_1_WHITE_DOWN + p_affine_const_2_1_WHITE_DOWN; - const real_t tmp_coords_jac_17_WHITE_DOWN = jac_affine_0_2_WHITE_DOWN*jac_affine_1_1_WHITE_DOWN; - const real_t jac_affine_1_2_WHITE_DOWN = -p_affine_const_0_1_WHITE_DOWN + p_affine_const_3_1_WHITE_DOWN; - const real_t tmp_coords_jac_15_WHITE_DOWN = jac_affine_0_1_WHITE_DOWN*jac_affine_1_2_WHITE_DOWN; - const real_t jac_affine_2_0_WHITE_DOWN = -p_affine_const_0_2_WHITE_DOWN + p_affine_const_1_2_WHITE_DOWN; - const real_t jac_affine_2_1_WHITE_DOWN = -p_affine_const_0_2_WHITE_DOWN + p_affine_const_2_2_WHITE_DOWN; - const real_t tmp_coords_jac_14_WHITE_DOWN = jac_affine_1_2_WHITE_DOWN*jac_affine_2_1_WHITE_DOWN; - const real_t jac_affine_2_2_WHITE_DOWN = -p_affine_const_0_2_WHITE_DOWN + p_affine_const_3_2_WHITE_DOWN; - const real_t tmp_coords_jac_13_WHITE_DOWN = jac_affine_1_1_WHITE_DOWN*jac_affine_2_2_WHITE_DOWN; - const real_t tmp_coords_jac_16_WHITE_DOWN = jac_affine_0_1_WHITE_DOWN*jac_affine_2_2_WHITE_DOWN; - const real_t tmp_coords_jac_18_WHITE_DOWN = 1.0 / (jac_affine_0_0_WHITE_DOWN*tmp_coords_jac_13_WHITE_DOWN - jac_affine_0_0_WHITE_DOWN*tmp_coords_jac_14_WHITE_DOWN + jac_affine_0_2_WHITE_DOWN*jac_affine_1_0_WHITE_DOWN*jac_affine_2_1_WHITE_DOWN - jac_affine_1_0_WHITE_DOWN*tmp_coords_jac_16_WHITE_DOWN + jac_affine_2_0_WHITE_DOWN*tmp_coords_jac_15_WHITE_DOWN - jac_affine_2_0_WHITE_DOWN*tmp_coords_jac_17_WHITE_DOWN); - const real_t jac_affine_inv_0_0_WHITE_DOWN = tmp_coords_jac_18_WHITE_DOWN*(tmp_coords_jac_13_WHITE_DOWN - tmp_coords_jac_14_WHITE_DOWN); - const real_t jac_affine_inv_0_1_WHITE_DOWN = tmp_coords_jac_18_WHITE_DOWN*(jac_affine_0_2_WHITE_DOWN*jac_affine_2_1_WHITE_DOWN - tmp_coords_jac_16_WHITE_DOWN); - const real_t jac_affine_inv_0_2_WHITE_DOWN = tmp_coords_jac_18_WHITE_DOWN*(tmp_coords_jac_15_WHITE_DOWN - tmp_coords_jac_17_WHITE_DOWN); - const real_t jac_affine_inv_1_0_WHITE_DOWN = tmp_coords_jac_18_WHITE_DOWN*(-jac_affine_1_0_WHITE_DOWN*jac_affine_2_2_WHITE_DOWN + jac_affine_1_2_WHITE_DOWN*jac_affine_2_0_WHITE_DOWN); - const real_t jac_affine_inv_1_1_WHITE_DOWN = tmp_coords_jac_18_WHITE_DOWN*(jac_affine_0_0_WHITE_DOWN*jac_affine_2_2_WHITE_DOWN - jac_affine_0_2_WHITE_DOWN*jac_affine_2_0_WHITE_DOWN); - const real_t jac_affine_inv_1_2_WHITE_DOWN = tmp_coords_jac_18_WHITE_DOWN*(-jac_affine_0_0_WHITE_DOWN*jac_affine_1_2_WHITE_DOWN + jac_affine_0_2_WHITE_DOWN*jac_affine_1_0_WHITE_DOWN); - const real_t jac_affine_inv_2_0_WHITE_DOWN = tmp_coords_jac_18_WHITE_DOWN*(jac_affine_1_0_WHITE_DOWN*jac_affine_2_1_WHITE_DOWN - jac_affine_1_1_WHITE_DOWN*jac_affine_2_0_WHITE_DOWN); - const real_t jac_affine_inv_2_1_WHITE_DOWN = tmp_coords_jac_18_WHITE_DOWN*(-jac_affine_0_0_WHITE_DOWN*jac_affine_2_1_WHITE_DOWN + jac_affine_0_1_WHITE_DOWN*jac_affine_2_0_WHITE_DOWN); - const real_t jac_affine_inv_2_2_WHITE_DOWN = tmp_coords_jac_18_WHITE_DOWN*(jac_affine_0_0_WHITE_DOWN*jac_affine_1_1_WHITE_DOWN - jac_affine_0_1_WHITE_DOWN*jac_affine_1_0_WHITE_DOWN); for (int64_t ctr_2 = 0; ctr_2 < micro_edges_per_macro_edge; ctr_2 += 1) for (int64_t ctr_1 = 0; ctr_1 < -ctr_2 + micro_edges_per_macro_edge; ctr_1 += 1) for (int64_t ctr_0 = 0; ctr_0 < -ctr_1 - ctr_2 + micro_edges_per_macro_edge - 2; ctr_0 += 1) { - const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2)); - const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2)); - const real_t p_affine_0_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2)); - const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2 + 1)); - const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2 + 1)); - const real_t p_affine_1_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2 + 1)); - const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2 + 1)); - const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2 + 1)); - const real_t p_affine_2_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2 + 1)); - const real_t p_affine_3_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2 + 1)); - const real_t p_affine_3_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2 + 1)); - const real_t p_affine_3_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2 + 1)); + + const int64_t phantom_ctr_0 = ctr_0; + real_t _data_float_loop_ctr_array_dim_0[4]; + _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0; + _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1; + _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2; + _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3; + real_t _data_float_loop_ctr_array_dim_1[4]; + _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1; + real_t _data_float_loop_ctr_array_dim_2[4]; + _data_float_loop_ctr_array_dim_2[0] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[1] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[2] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[3] = (real_t) ctr_2; + + const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_0_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_1_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_2_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_3_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_3_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_3_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; const real_t rho_dof_0 = _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]; const real_t rho_dof_1 = _data_rhoVertex[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]; const real_t rho_dof_2 = _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]; @@ -1078,7 +1122,7 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotU::toMatrix_macro_3D( idx_t * RESTRIC const real_t tmp_qloop_29 = -tmp_qloop_28 + tmp_qloop_6 - tmp_qloop_7 - tmp_qloop_9; const real_t tmp_qloop_30 = tmp_qloop_0*4.0; const real_t tmp_qloop_31 = -tmp_qloop_11 - tmp_qloop_30 + tmp_qloop_8; - const real_t tmp_qloop_32 = 1.0 / (rho_dof_0*tmp_qloop_12 + rho_dof_1*tmp_qloop_24 + rho_dof_2*tmp_qloop_25 + rho_dof_3*tmp_qloop_26 + rho_dof_4*tmp_qloop_7 + rho_dof_5*tmp_qloop_10 + rho_dof_6*tmp_qloop_9 + rho_dof_7*tmp_qloop_27 + rho_dof_8*tmp_qloop_29 + rho_dof_9*tmp_qloop_31)*_data_q_w[q]; + const real_t tmp_qloop_32 = abs_det_jac_affine_WHITE_DOWN*1.0 / (rho_dof_0*tmp_qloop_12 + rho_dof_1*tmp_qloop_24 + rho_dof_2*tmp_qloop_25 + rho_dof_3*tmp_qloop_26 + rho_dof_4*tmp_qloop_7 + rho_dof_5*tmp_qloop_10 + rho_dof_6*tmp_qloop_9 + rho_dof_7*tmp_qloop_27 + rho_dof_8*tmp_qloop_29 + rho_dof_9*tmp_qloop_31)*_data_q_w[q]; const real_t tmp_qloop_33 = tmp_qloop_32*(1.0 - _data_q_p_0[q] - _data_q_p_1[q] - _data_q_p_2[q]); const real_t tmp_qloop_34 = tmp_qloop_23*tmp_qloop_33; const real_t tmp_qloop_35 = jac_affine_inv_0_1_WHITE_DOWN*tmp_qloop_18 + jac_affine_inv_1_1_WHITE_DOWN*tmp_qloop_21 + jac_affine_inv_2_1_WHITE_DOWN*tmp_qloop_22; @@ -1637,67 +1681,87 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotU::toMatrix_macro_3D( idx_t * RESTRIC mat->addValues( _data_rowIdx, _data_colIdx, _data_mat ); } } + const real_t tmp_coords_jac_0_BLUE_UP = 1.0 / (micro_edges_per_macro_edge_float)*1.0; + const real_t tmp_coords_jac_1_BLUE_UP = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0); + const real_t tmp_coords_jac_2_BLUE_UP = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1); + const real_t tmp_coords_jac_3_BLUE_UP = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2); + const real_t tmp_coords_jac_4_BLUE_UP = tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0); + const real_t tmp_coords_jac_5_BLUE_UP = tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1); + const real_t tmp_coords_jac_6_BLUE_UP = tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2); + const real_t p_affine_const_0_0_BLUE_UP = tmp_coords_jac_1_BLUE_UP; + const real_t p_affine_const_0_1_BLUE_UP = tmp_coords_jac_2_BLUE_UP; + const real_t p_affine_const_0_2_BLUE_UP = tmp_coords_jac_3_BLUE_UP; + const real_t p_affine_const_1_0_BLUE_UP = macro_vertex_coord_id_0comp0 + tmp_coords_jac_4_BLUE_UP; + const real_t p_affine_const_1_1_BLUE_UP = macro_vertex_coord_id_0comp1 + tmp_coords_jac_5_BLUE_UP; + const real_t p_affine_const_1_2_BLUE_UP = macro_vertex_coord_id_0comp2 + tmp_coords_jac_6_BLUE_UP; + const real_t p_affine_const_2_0_BLUE_UP = tmp_coords_jac_1_BLUE_UP + tmp_coords_jac_4_BLUE_UP; + const real_t p_affine_const_2_1_BLUE_UP = tmp_coords_jac_2_BLUE_UP + tmp_coords_jac_5_BLUE_UP; + const real_t p_affine_const_2_2_BLUE_UP = tmp_coords_jac_3_BLUE_UP + tmp_coords_jac_6_BLUE_UP; + const real_t p_affine_const_3_0_BLUE_UP = tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0) + tmp_coords_jac_1_BLUE_UP; + const real_t p_affine_const_3_1_BLUE_UP = tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1) + tmp_coords_jac_2_BLUE_UP; + const real_t p_affine_const_3_2_BLUE_UP = tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2) + tmp_coords_jac_3_BLUE_UP; + const real_t jac_affine_0_0_BLUE_UP = -p_affine_const_0_0_BLUE_UP + p_affine_const_1_0_BLUE_UP; + const real_t jac_affine_0_1_BLUE_UP = -p_affine_const_0_0_BLUE_UP + p_affine_const_2_0_BLUE_UP; + const real_t jac_affine_0_2_BLUE_UP = -p_affine_const_0_0_BLUE_UP + p_affine_const_3_0_BLUE_UP; + const real_t jac_affine_1_0_BLUE_UP = -p_affine_const_0_1_BLUE_UP + p_affine_const_1_1_BLUE_UP; + const real_t jac_affine_1_1_BLUE_UP = -p_affine_const_0_1_BLUE_UP + p_affine_const_2_1_BLUE_UP; + const real_t tmp_coords_jac_11_BLUE_UP = jac_affine_0_2_BLUE_UP*jac_affine_1_1_BLUE_UP; + const real_t jac_affine_1_2_BLUE_UP = -p_affine_const_0_1_BLUE_UP + p_affine_const_3_1_BLUE_UP; + const real_t tmp_coords_jac_9_BLUE_UP = jac_affine_0_1_BLUE_UP*jac_affine_1_2_BLUE_UP; + const real_t jac_affine_2_0_BLUE_UP = -p_affine_const_0_2_BLUE_UP + p_affine_const_1_2_BLUE_UP; + const real_t jac_affine_2_1_BLUE_UP = -p_affine_const_0_2_BLUE_UP + p_affine_const_2_2_BLUE_UP; + const real_t tmp_coords_jac_8_BLUE_UP = jac_affine_1_2_BLUE_UP*jac_affine_2_1_BLUE_UP; + const real_t jac_affine_2_2_BLUE_UP = -p_affine_const_0_2_BLUE_UP + p_affine_const_3_2_BLUE_UP; + const real_t tmp_coords_jac_7_BLUE_UP = jac_affine_1_1_BLUE_UP*jac_affine_2_2_BLUE_UP; + const real_t tmp_coords_jac_10_BLUE_UP = jac_affine_0_1_BLUE_UP*jac_affine_2_2_BLUE_UP; + const real_t tmp_coords_jac_12_BLUE_UP = jac_affine_0_0_BLUE_UP*tmp_coords_jac_7_BLUE_UP - jac_affine_0_0_BLUE_UP*tmp_coords_jac_8_BLUE_UP + jac_affine_0_2_BLUE_UP*jac_affine_1_0_BLUE_UP*jac_affine_2_1_BLUE_UP - jac_affine_1_0_BLUE_UP*tmp_coords_jac_10_BLUE_UP - jac_affine_2_0_BLUE_UP*tmp_coords_jac_11_BLUE_UP + jac_affine_2_0_BLUE_UP*tmp_coords_jac_9_BLUE_UP; + const real_t tmp_coords_jac_13_BLUE_UP = 1.0 / (tmp_coords_jac_12_BLUE_UP); + const real_t jac_affine_inv_0_0_BLUE_UP = tmp_coords_jac_13_BLUE_UP*(tmp_coords_jac_7_BLUE_UP - tmp_coords_jac_8_BLUE_UP); + const real_t jac_affine_inv_0_1_BLUE_UP = tmp_coords_jac_13_BLUE_UP*(jac_affine_0_2_BLUE_UP*jac_affine_2_1_BLUE_UP - tmp_coords_jac_10_BLUE_UP); + const real_t jac_affine_inv_0_2_BLUE_UP = tmp_coords_jac_13_BLUE_UP*(-tmp_coords_jac_11_BLUE_UP + tmp_coords_jac_9_BLUE_UP); + const real_t jac_affine_inv_1_0_BLUE_UP = tmp_coords_jac_13_BLUE_UP*(-jac_affine_1_0_BLUE_UP*jac_affine_2_2_BLUE_UP + jac_affine_1_2_BLUE_UP*jac_affine_2_0_BLUE_UP); + const real_t jac_affine_inv_1_1_BLUE_UP = tmp_coords_jac_13_BLUE_UP*(jac_affine_0_0_BLUE_UP*jac_affine_2_2_BLUE_UP - jac_affine_0_2_BLUE_UP*jac_affine_2_0_BLUE_UP); + const real_t jac_affine_inv_1_2_BLUE_UP = tmp_coords_jac_13_BLUE_UP*(-jac_affine_0_0_BLUE_UP*jac_affine_1_2_BLUE_UP + jac_affine_0_2_BLUE_UP*jac_affine_1_0_BLUE_UP); + const real_t jac_affine_inv_2_0_BLUE_UP = tmp_coords_jac_13_BLUE_UP*(jac_affine_1_0_BLUE_UP*jac_affine_2_1_BLUE_UP - jac_affine_1_1_BLUE_UP*jac_affine_2_0_BLUE_UP); + const real_t jac_affine_inv_2_1_BLUE_UP = tmp_coords_jac_13_BLUE_UP*(-jac_affine_0_0_BLUE_UP*jac_affine_2_1_BLUE_UP + jac_affine_0_1_BLUE_UP*jac_affine_2_0_BLUE_UP); + const real_t jac_affine_inv_2_2_BLUE_UP = tmp_coords_jac_13_BLUE_UP*(jac_affine_0_0_BLUE_UP*jac_affine_1_1_BLUE_UP - jac_affine_0_1_BLUE_UP*jac_affine_1_0_BLUE_UP); + const real_t abs_det_jac_affine_BLUE_UP = abs(tmp_coords_jac_12_BLUE_UP); { /* CellType.BLUE_UP */ - const real_t tmp_coords_jac_0_BLUE_UP = 1.0 / (micro_edges_per_macro_edge_float)*1.0; - const real_t tmp_coords_jac_1_BLUE_UP = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0); - const real_t tmp_coords_jac_2_BLUE_UP = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1); - const real_t tmp_coords_jac_3_BLUE_UP = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2); - const real_t tmp_coords_jac_4_BLUE_UP = tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0); - const real_t tmp_coords_jac_5_BLUE_UP = tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1); - const real_t tmp_coords_jac_6_BLUE_UP = tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2); - const real_t p_affine_const_0_0_BLUE_UP = tmp_coords_jac_1_BLUE_UP; - const real_t p_affine_const_0_1_BLUE_UP = tmp_coords_jac_2_BLUE_UP; - const real_t p_affine_const_0_2_BLUE_UP = tmp_coords_jac_3_BLUE_UP; - const real_t p_affine_const_1_0_BLUE_UP = macro_vertex_coord_id_0comp0 + tmp_coords_jac_4_BLUE_UP; - const real_t p_affine_const_1_1_BLUE_UP = macro_vertex_coord_id_0comp1 + tmp_coords_jac_5_BLUE_UP; - const real_t p_affine_const_1_2_BLUE_UP = macro_vertex_coord_id_0comp2 + tmp_coords_jac_6_BLUE_UP; - const real_t p_affine_const_2_0_BLUE_UP = tmp_coords_jac_1_BLUE_UP + tmp_coords_jac_4_BLUE_UP; - const real_t p_affine_const_2_1_BLUE_UP = tmp_coords_jac_2_BLUE_UP + tmp_coords_jac_5_BLUE_UP; - const real_t p_affine_const_2_2_BLUE_UP = tmp_coords_jac_3_BLUE_UP + tmp_coords_jac_6_BLUE_UP; - const real_t p_affine_const_3_0_BLUE_UP = tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0) + tmp_coords_jac_1_BLUE_UP; - const real_t p_affine_const_3_1_BLUE_UP = tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1) + tmp_coords_jac_2_BLUE_UP; - const real_t p_affine_const_3_2_BLUE_UP = tmp_coords_jac_0_BLUE_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2) + tmp_coords_jac_3_BLUE_UP; - const real_t jac_affine_0_0_BLUE_UP = -p_affine_const_0_0_BLUE_UP + p_affine_const_1_0_BLUE_UP; - const real_t jac_affine_0_1_BLUE_UP = -p_affine_const_0_0_BLUE_UP + p_affine_const_2_0_BLUE_UP; - const real_t jac_affine_0_2_BLUE_UP = -p_affine_const_0_0_BLUE_UP + p_affine_const_3_0_BLUE_UP; - const real_t jac_affine_1_0_BLUE_UP = -p_affine_const_0_1_BLUE_UP + p_affine_const_1_1_BLUE_UP; - const real_t jac_affine_1_1_BLUE_UP = -p_affine_const_0_1_BLUE_UP + p_affine_const_2_1_BLUE_UP; - const real_t tmp_coords_jac_11_BLUE_UP = jac_affine_0_2_BLUE_UP*jac_affine_1_1_BLUE_UP; - const real_t jac_affine_1_2_BLUE_UP = -p_affine_const_0_1_BLUE_UP + p_affine_const_3_1_BLUE_UP; - const real_t tmp_coords_jac_9_BLUE_UP = jac_affine_0_1_BLUE_UP*jac_affine_1_2_BLUE_UP; - const real_t jac_affine_2_0_BLUE_UP = -p_affine_const_0_2_BLUE_UP + p_affine_const_1_2_BLUE_UP; - const real_t jac_affine_2_1_BLUE_UP = -p_affine_const_0_2_BLUE_UP + p_affine_const_2_2_BLUE_UP; - const real_t tmp_coords_jac_8_BLUE_UP = jac_affine_1_2_BLUE_UP*jac_affine_2_1_BLUE_UP; - const real_t jac_affine_2_2_BLUE_UP = -p_affine_const_0_2_BLUE_UP + p_affine_const_3_2_BLUE_UP; - const real_t tmp_coords_jac_7_BLUE_UP = jac_affine_1_1_BLUE_UP*jac_affine_2_2_BLUE_UP; - const real_t tmp_coords_jac_10_BLUE_UP = jac_affine_0_1_BLUE_UP*jac_affine_2_2_BLUE_UP; - const real_t tmp_coords_jac_12_BLUE_UP = 1.0 / (jac_affine_0_0_BLUE_UP*tmp_coords_jac_7_BLUE_UP - jac_affine_0_0_BLUE_UP*tmp_coords_jac_8_BLUE_UP + jac_affine_0_2_BLUE_UP*jac_affine_1_0_BLUE_UP*jac_affine_2_1_BLUE_UP - jac_affine_1_0_BLUE_UP*tmp_coords_jac_10_BLUE_UP - jac_affine_2_0_BLUE_UP*tmp_coords_jac_11_BLUE_UP + jac_affine_2_0_BLUE_UP*tmp_coords_jac_9_BLUE_UP); - const real_t jac_affine_inv_0_0_BLUE_UP = tmp_coords_jac_12_BLUE_UP*(tmp_coords_jac_7_BLUE_UP - tmp_coords_jac_8_BLUE_UP); - const real_t jac_affine_inv_0_1_BLUE_UP = tmp_coords_jac_12_BLUE_UP*(jac_affine_0_2_BLUE_UP*jac_affine_2_1_BLUE_UP - tmp_coords_jac_10_BLUE_UP); - const real_t jac_affine_inv_0_2_BLUE_UP = tmp_coords_jac_12_BLUE_UP*(-tmp_coords_jac_11_BLUE_UP + tmp_coords_jac_9_BLUE_UP); - const real_t jac_affine_inv_1_0_BLUE_UP = tmp_coords_jac_12_BLUE_UP*(-jac_affine_1_0_BLUE_UP*jac_affine_2_2_BLUE_UP + jac_affine_1_2_BLUE_UP*jac_affine_2_0_BLUE_UP); - const real_t jac_affine_inv_1_1_BLUE_UP = tmp_coords_jac_12_BLUE_UP*(jac_affine_0_0_BLUE_UP*jac_affine_2_2_BLUE_UP - jac_affine_0_2_BLUE_UP*jac_affine_2_0_BLUE_UP); - const real_t jac_affine_inv_1_2_BLUE_UP = tmp_coords_jac_12_BLUE_UP*(-jac_affine_0_0_BLUE_UP*jac_affine_1_2_BLUE_UP + jac_affine_0_2_BLUE_UP*jac_affine_1_0_BLUE_UP); - const real_t jac_affine_inv_2_0_BLUE_UP = tmp_coords_jac_12_BLUE_UP*(jac_affine_1_0_BLUE_UP*jac_affine_2_1_BLUE_UP - jac_affine_1_1_BLUE_UP*jac_affine_2_0_BLUE_UP); - const real_t jac_affine_inv_2_1_BLUE_UP = tmp_coords_jac_12_BLUE_UP*(-jac_affine_0_0_BLUE_UP*jac_affine_2_1_BLUE_UP + jac_affine_0_1_BLUE_UP*jac_affine_2_0_BLUE_UP); - const real_t jac_affine_inv_2_2_BLUE_UP = tmp_coords_jac_12_BLUE_UP*(jac_affine_0_0_BLUE_UP*jac_affine_1_1_BLUE_UP - jac_affine_0_1_BLUE_UP*jac_affine_1_0_BLUE_UP); for (int64_t ctr_2 = 0; ctr_2 < micro_edges_per_macro_edge; ctr_2 += 1) for (int64_t ctr_1 = 0; ctr_1 < -ctr_2 + micro_edges_per_macro_edge; ctr_1 += 1) for (int64_t ctr_0 = 0; ctr_0 < -ctr_1 - ctr_2 + micro_edges_per_macro_edge - 1; ctr_0 += 1) { - const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2)); - const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2)); - const real_t p_affine_0_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2)); - const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2)); - const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2)); - const real_t p_affine_1_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2)); - const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2)); - const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2)); - const real_t p_affine_2_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2)); - const real_t p_affine_3_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2 + 1)); - const real_t p_affine_3_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2 + 1)); - const real_t p_affine_3_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2 + 1)); + + const int64_t phantom_ctr_0 = ctr_0; + real_t _data_float_loop_ctr_array_dim_0[4]; + _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0; + _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1; + _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2; + _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3; + real_t _data_float_loop_ctr_array_dim_1[4]; + _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1; + real_t _data_float_loop_ctr_array_dim_2[4]; + _data_float_loop_ctr_array_dim_2[0] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[1] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[2] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[3] = (real_t) ctr_2; + + const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_0_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_2_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_3_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_3_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_3_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; const real_t rho_dof_0 = _data_rhoVertex[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]; const real_t rho_dof_1 = _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]; const real_t rho_dof_2 = _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]; @@ -1862,7 +1926,7 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotU::toMatrix_macro_3D( idx_t * RESTRIC const real_t tmp_qloop_29 = -tmp_qloop_28 + tmp_qloop_6 - tmp_qloop_7 - tmp_qloop_9; const real_t tmp_qloop_30 = tmp_qloop_0*4.0; const real_t tmp_qloop_31 = -tmp_qloop_11 - tmp_qloop_30 + tmp_qloop_8; - const real_t tmp_qloop_32 = 1.0 / (rho_dof_0*tmp_qloop_12 + rho_dof_1*tmp_qloop_24 + rho_dof_2*tmp_qloop_25 + rho_dof_3*tmp_qloop_26 + rho_dof_4*tmp_qloop_7 + rho_dof_5*tmp_qloop_10 + rho_dof_6*tmp_qloop_9 + rho_dof_7*tmp_qloop_27 + rho_dof_8*tmp_qloop_29 + rho_dof_9*tmp_qloop_31)*_data_q_w[q]; + const real_t tmp_qloop_32 = abs_det_jac_affine_BLUE_UP*1.0 / (rho_dof_0*tmp_qloop_12 + rho_dof_1*tmp_qloop_24 + rho_dof_2*tmp_qloop_25 + rho_dof_3*tmp_qloop_26 + rho_dof_4*tmp_qloop_7 + rho_dof_5*tmp_qloop_10 + rho_dof_6*tmp_qloop_9 + rho_dof_7*tmp_qloop_27 + rho_dof_8*tmp_qloop_29 + rho_dof_9*tmp_qloop_31)*_data_q_w[q]; const real_t tmp_qloop_33 = tmp_qloop_32*(1.0 - _data_q_p_0[q] - _data_q_p_1[q] - _data_q_p_2[q]); const real_t tmp_qloop_34 = tmp_qloop_23*tmp_qloop_33; const real_t tmp_qloop_35 = jac_affine_inv_0_1_BLUE_UP*tmp_qloop_18 + jac_affine_inv_1_1_BLUE_UP*tmp_qloop_21 + jac_affine_inv_2_1_BLUE_UP*tmp_qloop_22; @@ -2421,70 +2485,90 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotU::toMatrix_macro_3D( idx_t * RESTRIC mat->addValues( _data_rowIdx, _data_colIdx, _data_mat ); } } + const real_t tmp_coords_jac_0_BLUE_DOWN = 1.0 / (micro_edges_per_macro_edge_float)*1.0; + const real_t tmp_coords_jac_1_BLUE_DOWN = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0); + const real_t tmp_coords_jac_2_BLUE_DOWN = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1); + const real_t tmp_coords_jac_3_BLUE_DOWN = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2); + const real_t tmp_coords_jac_4_BLUE_DOWN = tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0); + const real_t tmp_coords_jac_5_BLUE_DOWN = macro_vertex_coord_id_0comp0 + tmp_coords_jac_4_BLUE_DOWN; + const real_t tmp_coords_jac_6_BLUE_DOWN = tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1); + const real_t tmp_coords_jac_7_BLUE_DOWN = macro_vertex_coord_id_0comp1 + tmp_coords_jac_6_BLUE_DOWN; + const real_t tmp_coords_jac_8_BLUE_DOWN = tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2); + const real_t tmp_coords_jac_9_BLUE_DOWN = macro_vertex_coord_id_0comp2 + tmp_coords_jac_8_BLUE_DOWN; + const real_t p_affine_const_0_0_BLUE_DOWN = tmp_coords_jac_1_BLUE_DOWN; + const real_t p_affine_const_0_1_BLUE_DOWN = tmp_coords_jac_2_BLUE_DOWN; + const real_t p_affine_const_0_2_BLUE_DOWN = tmp_coords_jac_3_BLUE_DOWN; + const real_t p_affine_const_1_0_BLUE_DOWN = tmp_coords_jac_5_BLUE_DOWN; + const real_t p_affine_const_1_1_BLUE_DOWN = tmp_coords_jac_7_BLUE_DOWN; + const real_t p_affine_const_1_2_BLUE_DOWN = tmp_coords_jac_9_BLUE_DOWN; + const real_t p_affine_const_2_0_BLUE_DOWN = tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0) + tmp_coords_jac_5_BLUE_DOWN; + const real_t p_affine_const_2_1_BLUE_DOWN = tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1) + tmp_coords_jac_7_BLUE_DOWN; + const real_t p_affine_const_2_2_BLUE_DOWN = tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2) + tmp_coords_jac_9_BLUE_DOWN; + const real_t p_affine_const_3_0_BLUE_DOWN = tmp_coords_jac_1_BLUE_DOWN + tmp_coords_jac_4_BLUE_DOWN; + const real_t p_affine_const_3_1_BLUE_DOWN = tmp_coords_jac_2_BLUE_DOWN + tmp_coords_jac_6_BLUE_DOWN; + const real_t p_affine_const_3_2_BLUE_DOWN = tmp_coords_jac_3_BLUE_DOWN + tmp_coords_jac_8_BLUE_DOWN; + const real_t jac_affine_0_0_BLUE_DOWN = -p_affine_const_0_0_BLUE_DOWN + p_affine_const_1_0_BLUE_DOWN; + const real_t jac_affine_0_1_BLUE_DOWN = -p_affine_const_0_0_BLUE_DOWN + p_affine_const_2_0_BLUE_DOWN; + const real_t jac_affine_0_2_BLUE_DOWN = -p_affine_const_0_0_BLUE_DOWN + p_affine_const_3_0_BLUE_DOWN; + const real_t jac_affine_1_0_BLUE_DOWN = -p_affine_const_0_1_BLUE_DOWN + p_affine_const_1_1_BLUE_DOWN; + const real_t jac_affine_1_1_BLUE_DOWN = -p_affine_const_0_1_BLUE_DOWN + p_affine_const_2_1_BLUE_DOWN; + const real_t tmp_coords_jac_14_BLUE_DOWN = jac_affine_0_2_BLUE_DOWN*jac_affine_1_1_BLUE_DOWN; + const real_t jac_affine_1_2_BLUE_DOWN = -p_affine_const_0_1_BLUE_DOWN + p_affine_const_3_1_BLUE_DOWN; + const real_t tmp_coords_jac_12_BLUE_DOWN = jac_affine_0_1_BLUE_DOWN*jac_affine_1_2_BLUE_DOWN; + const real_t jac_affine_2_0_BLUE_DOWN = -p_affine_const_0_2_BLUE_DOWN + p_affine_const_1_2_BLUE_DOWN; + const real_t jac_affine_2_1_BLUE_DOWN = -p_affine_const_0_2_BLUE_DOWN + p_affine_const_2_2_BLUE_DOWN; + const real_t tmp_coords_jac_11_BLUE_DOWN = jac_affine_1_2_BLUE_DOWN*jac_affine_2_1_BLUE_DOWN; + const real_t jac_affine_2_2_BLUE_DOWN = -p_affine_const_0_2_BLUE_DOWN + p_affine_const_3_2_BLUE_DOWN; + const real_t tmp_coords_jac_10_BLUE_DOWN = jac_affine_1_1_BLUE_DOWN*jac_affine_2_2_BLUE_DOWN; + const real_t tmp_coords_jac_13_BLUE_DOWN = jac_affine_0_1_BLUE_DOWN*jac_affine_2_2_BLUE_DOWN; + const real_t tmp_coords_jac_15_BLUE_DOWN = jac_affine_0_0_BLUE_DOWN*tmp_coords_jac_10_BLUE_DOWN - jac_affine_0_0_BLUE_DOWN*tmp_coords_jac_11_BLUE_DOWN + jac_affine_0_2_BLUE_DOWN*jac_affine_1_0_BLUE_DOWN*jac_affine_2_1_BLUE_DOWN - jac_affine_1_0_BLUE_DOWN*tmp_coords_jac_13_BLUE_DOWN + jac_affine_2_0_BLUE_DOWN*tmp_coords_jac_12_BLUE_DOWN - jac_affine_2_0_BLUE_DOWN*tmp_coords_jac_14_BLUE_DOWN; + const real_t tmp_coords_jac_16_BLUE_DOWN = 1.0 / (tmp_coords_jac_15_BLUE_DOWN); + const real_t jac_affine_inv_0_0_BLUE_DOWN = tmp_coords_jac_16_BLUE_DOWN*(tmp_coords_jac_10_BLUE_DOWN - tmp_coords_jac_11_BLUE_DOWN); + const real_t jac_affine_inv_0_1_BLUE_DOWN = tmp_coords_jac_16_BLUE_DOWN*(jac_affine_0_2_BLUE_DOWN*jac_affine_2_1_BLUE_DOWN - tmp_coords_jac_13_BLUE_DOWN); + const real_t jac_affine_inv_0_2_BLUE_DOWN = tmp_coords_jac_16_BLUE_DOWN*(tmp_coords_jac_12_BLUE_DOWN - tmp_coords_jac_14_BLUE_DOWN); + const real_t jac_affine_inv_1_0_BLUE_DOWN = tmp_coords_jac_16_BLUE_DOWN*(-jac_affine_1_0_BLUE_DOWN*jac_affine_2_2_BLUE_DOWN + jac_affine_1_2_BLUE_DOWN*jac_affine_2_0_BLUE_DOWN); + const real_t jac_affine_inv_1_1_BLUE_DOWN = tmp_coords_jac_16_BLUE_DOWN*(jac_affine_0_0_BLUE_DOWN*jac_affine_2_2_BLUE_DOWN - jac_affine_0_2_BLUE_DOWN*jac_affine_2_0_BLUE_DOWN); + const real_t jac_affine_inv_1_2_BLUE_DOWN = tmp_coords_jac_16_BLUE_DOWN*(-jac_affine_0_0_BLUE_DOWN*jac_affine_1_2_BLUE_DOWN + jac_affine_0_2_BLUE_DOWN*jac_affine_1_0_BLUE_DOWN); + const real_t jac_affine_inv_2_0_BLUE_DOWN = tmp_coords_jac_16_BLUE_DOWN*(jac_affine_1_0_BLUE_DOWN*jac_affine_2_1_BLUE_DOWN - jac_affine_1_1_BLUE_DOWN*jac_affine_2_0_BLUE_DOWN); + const real_t jac_affine_inv_2_1_BLUE_DOWN = tmp_coords_jac_16_BLUE_DOWN*(-jac_affine_0_0_BLUE_DOWN*jac_affine_2_1_BLUE_DOWN + jac_affine_0_1_BLUE_DOWN*jac_affine_2_0_BLUE_DOWN); + const real_t jac_affine_inv_2_2_BLUE_DOWN = tmp_coords_jac_16_BLUE_DOWN*(jac_affine_0_0_BLUE_DOWN*jac_affine_1_1_BLUE_DOWN - jac_affine_0_1_BLUE_DOWN*jac_affine_1_0_BLUE_DOWN); + const real_t abs_det_jac_affine_BLUE_DOWN = abs(tmp_coords_jac_15_BLUE_DOWN); { /* CellType.BLUE_DOWN */ - const real_t tmp_coords_jac_0_BLUE_DOWN = 1.0 / (micro_edges_per_macro_edge_float)*1.0; - const real_t tmp_coords_jac_1_BLUE_DOWN = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0); - const real_t tmp_coords_jac_2_BLUE_DOWN = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1); - const real_t tmp_coords_jac_3_BLUE_DOWN = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2); - const real_t tmp_coords_jac_4_BLUE_DOWN = tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0); - const real_t tmp_coords_jac_5_BLUE_DOWN = macro_vertex_coord_id_0comp0 + tmp_coords_jac_4_BLUE_DOWN; - const real_t tmp_coords_jac_6_BLUE_DOWN = tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1); - const real_t tmp_coords_jac_7_BLUE_DOWN = macro_vertex_coord_id_0comp1 + tmp_coords_jac_6_BLUE_DOWN; - const real_t tmp_coords_jac_8_BLUE_DOWN = tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2); - const real_t tmp_coords_jac_9_BLUE_DOWN = macro_vertex_coord_id_0comp2 + tmp_coords_jac_8_BLUE_DOWN; - const real_t p_affine_const_0_0_BLUE_DOWN = tmp_coords_jac_1_BLUE_DOWN; - const real_t p_affine_const_0_1_BLUE_DOWN = tmp_coords_jac_2_BLUE_DOWN; - const real_t p_affine_const_0_2_BLUE_DOWN = tmp_coords_jac_3_BLUE_DOWN; - const real_t p_affine_const_1_0_BLUE_DOWN = tmp_coords_jac_5_BLUE_DOWN; - const real_t p_affine_const_1_1_BLUE_DOWN = tmp_coords_jac_7_BLUE_DOWN; - const real_t p_affine_const_1_2_BLUE_DOWN = tmp_coords_jac_9_BLUE_DOWN; - const real_t p_affine_const_2_0_BLUE_DOWN = tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0) + tmp_coords_jac_5_BLUE_DOWN; - const real_t p_affine_const_2_1_BLUE_DOWN = tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1) + tmp_coords_jac_7_BLUE_DOWN; - const real_t p_affine_const_2_2_BLUE_DOWN = tmp_coords_jac_0_BLUE_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2) + tmp_coords_jac_9_BLUE_DOWN; - const real_t p_affine_const_3_0_BLUE_DOWN = tmp_coords_jac_1_BLUE_DOWN + tmp_coords_jac_4_BLUE_DOWN; - const real_t p_affine_const_3_1_BLUE_DOWN = tmp_coords_jac_2_BLUE_DOWN + tmp_coords_jac_6_BLUE_DOWN; - const real_t p_affine_const_3_2_BLUE_DOWN = tmp_coords_jac_3_BLUE_DOWN + tmp_coords_jac_8_BLUE_DOWN; - const real_t jac_affine_0_0_BLUE_DOWN = -p_affine_const_0_0_BLUE_DOWN + p_affine_const_1_0_BLUE_DOWN; - const real_t jac_affine_0_1_BLUE_DOWN = -p_affine_const_0_0_BLUE_DOWN + p_affine_const_2_0_BLUE_DOWN; - const real_t jac_affine_0_2_BLUE_DOWN = -p_affine_const_0_0_BLUE_DOWN + p_affine_const_3_0_BLUE_DOWN; - const real_t jac_affine_1_0_BLUE_DOWN = -p_affine_const_0_1_BLUE_DOWN + p_affine_const_1_1_BLUE_DOWN; - const real_t jac_affine_1_1_BLUE_DOWN = -p_affine_const_0_1_BLUE_DOWN + p_affine_const_2_1_BLUE_DOWN; - const real_t tmp_coords_jac_14_BLUE_DOWN = jac_affine_0_2_BLUE_DOWN*jac_affine_1_1_BLUE_DOWN; - const real_t jac_affine_1_2_BLUE_DOWN = -p_affine_const_0_1_BLUE_DOWN + p_affine_const_3_1_BLUE_DOWN; - const real_t tmp_coords_jac_12_BLUE_DOWN = jac_affine_0_1_BLUE_DOWN*jac_affine_1_2_BLUE_DOWN; - const real_t jac_affine_2_0_BLUE_DOWN = -p_affine_const_0_2_BLUE_DOWN + p_affine_const_1_2_BLUE_DOWN; - const real_t jac_affine_2_1_BLUE_DOWN = -p_affine_const_0_2_BLUE_DOWN + p_affine_const_2_2_BLUE_DOWN; - const real_t tmp_coords_jac_11_BLUE_DOWN = jac_affine_1_2_BLUE_DOWN*jac_affine_2_1_BLUE_DOWN; - const real_t jac_affine_2_2_BLUE_DOWN = -p_affine_const_0_2_BLUE_DOWN + p_affine_const_3_2_BLUE_DOWN; - const real_t tmp_coords_jac_10_BLUE_DOWN = jac_affine_1_1_BLUE_DOWN*jac_affine_2_2_BLUE_DOWN; - const real_t tmp_coords_jac_13_BLUE_DOWN = jac_affine_0_1_BLUE_DOWN*jac_affine_2_2_BLUE_DOWN; - const real_t tmp_coords_jac_15_BLUE_DOWN = 1.0 / (jac_affine_0_0_BLUE_DOWN*tmp_coords_jac_10_BLUE_DOWN - jac_affine_0_0_BLUE_DOWN*tmp_coords_jac_11_BLUE_DOWN + jac_affine_0_2_BLUE_DOWN*jac_affine_1_0_BLUE_DOWN*jac_affine_2_1_BLUE_DOWN - jac_affine_1_0_BLUE_DOWN*tmp_coords_jac_13_BLUE_DOWN + jac_affine_2_0_BLUE_DOWN*tmp_coords_jac_12_BLUE_DOWN - jac_affine_2_0_BLUE_DOWN*tmp_coords_jac_14_BLUE_DOWN); - const real_t jac_affine_inv_0_0_BLUE_DOWN = tmp_coords_jac_15_BLUE_DOWN*(tmp_coords_jac_10_BLUE_DOWN - tmp_coords_jac_11_BLUE_DOWN); - const real_t jac_affine_inv_0_1_BLUE_DOWN = tmp_coords_jac_15_BLUE_DOWN*(jac_affine_0_2_BLUE_DOWN*jac_affine_2_1_BLUE_DOWN - tmp_coords_jac_13_BLUE_DOWN); - const real_t jac_affine_inv_0_2_BLUE_DOWN = tmp_coords_jac_15_BLUE_DOWN*(tmp_coords_jac_12_BLUE_DOWN - tmp_coords_jac_14_BLUE_DOWN); - const real_t jac_affine_inv_1_0_BLUE_DOWN = tmp_coords_jac_15_BLUE_DOWN*(-jac_affine_1_0_BLUE_DOWN*jac_affine_2_2_BLUE_DOWN + jac_affine_1_2_BLUE_DOWN*jac_affine_2_0_BLUE_DOWN); - const real_t jac_affine_inv_1_1_BLUE_DOWN = tmp_coords_jac_15_BLUE_DOWN*(jac_affine_0_0_BLUE_DOWN*jac_affine_2_2_BLUE_DOWN - jac_affine_0_2_BLUE_DOWN*jac_affine_2_0_BLUE_DOWN); - const real_t jac_affine_inv_1_2_BLUE_DOWN = tmp_coords_jac_15_BLUE_DOWN*(-jac_affine_0_0_BLUE_DOWN*jac_affine_1_2_BLUE_DOWN + jac_affine_0_2_BLUE_DOWN*jac_affine_1_0_BLUE_DOWN); - const real_t jac_affine_inv_2_0_BLUE_DOWN = tmp_coords_jac_15_BLUE_DOWN*(jac_affine_1_0_BLUE_DOWN*jac_affine_2_1_BLUE_DOWN - jac_affine_1_1_BLUE_DOWN*jac_affine_2_0_BLUE_DOWN); - const real_t jac_affine_inv_2_1_BLUE_DOWN = tmp_coords_jac_15_BLUE_DOWN*(-jac_affine_0_0_BLUE_DOWN*jac_affine_2_1_BLUE_DOWN + jac_affine_0_1_BLUE_DOWN*jac_affine_2_0_BLUE_DOWN); - const real_t jac_affine_inv_2_2_BLUE_DOWN = tmp_coords_jac_15_BLUE_DOWN*(jac_affine_0_0_BLUE_DOWN*jac_affine_1_1_BLUE_DOWN - jac_affine_0_1_BLUE_DOWN*jac_affine_1_0_BLUE_DOWN); for (int64_t ctr_2 = 0; ctr_2 < micro_edges_per_macro_edge; ctr_2 += 1) for (int64_t ctr_1 = 0; ctr_1 < -ctr_2 + micro_edges_per_macro_edge; ctr_1 += 1) for (int64_t ctr_0 = 0; ctr_0 < -ctr_1 - ctr_2 + micro_edges_per_macro_edge - 1; ctr_0 += 1) { - const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2)); - const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2)); - const real_t p_affine_0_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2)); - const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2 + 1)); - const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2 + 1)); - const real_t p_affine_1_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2 + 1)); - const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2 + 1)); - const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2 + 1)); - const real_t p_affine_2_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2 + 1)); - const real_t p_affine_3_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2 + 1)); - const real_t p_affine_3_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2 + 1)); - const real_t p_affine_3_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2 + 1)); + + const int64_t phantom_ctr_0 = ctr_0; + real_t _data_float_loop_ctr_array_dim_0[4]; + _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0; + _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1; + _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2; + _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3; + real_t _data_float_loop_ctr_array_dim_1[4]; + _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1; + real_t _data_float_loop_ctr_array_dim_2[4]; + _data_float_loop_ctr_array_dim_2[0] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[1] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[2] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[3] = (real_t) ctr_2; + + const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_0_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_1_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_2_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_3_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_3_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_3_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; const real_t rho_dof_0 = _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]; const real_t rho_dof_1 = _data_rhoVertex[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]; const real_t rho_dof_2 = _data_rhoVertex[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]; @@ -2649,7 +2733,7 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotU::toMatrix_macro_3D( idx_t * RESTRIC const real_t tmp_qloop_29 = -tmp_qloop_28 + tmp_qloop_6 - tmp_qloop_7 - tmp_qloop_9; const real_t tmp_qloop_30 = tmp_qloop_0*4.0; const real_t tmp_qloop_31 = -tmp_qloop_11 - tmp_qloop_30 + tmp_qloop_8; - const real_t tmp_qloop_32 = 1.0 / (rho_dof_0*tmp_qloop_12 + rho_dof_1*tmp_qloop_24 + rho_dof_2*tmp_qloop_25 + rho_dof_3*tmp_qloop_26 + rho_dof_4*tmp_qloop_7 + rho_dof_5*tmp_qloop_10 + rho_dof_6*tmp_qloop_9 + rho_dof_7*tmp_qloop_27 + rho_dof_8*tmp_qloop_29 + rho_dof_9*tmp_qloop_31)*_data_q_w[q]; + const real_t tmp_qloop_32 = abs_det_jac_affine_BLUE_DOWN*1.0 / (rho_dof_0*tmp_qloop_12 + rho_dof_1*tmp_qloop_24 + rho_dof_2*tmp_qloop_25 + rho_dof_3*tmp_qloop_26 + rho_dof_4*tmp_qloop_7 + rho_dof_5*tmp_qloop_10 + rho_dof_6*tmp_qloop_9 + rho_dof_7*tmp_qloop_27 + rho_dof_8*tmp_qloop_29 + rho_dof_9*tmp_qloop_31)*_data_q_w[q]; const real_t tmp_qloop_33 = tmp_qloop_32*(1.0 - _data_q_p_0[q] - _data_q_p_1[q] - _data_q_p_2[q]); const real_t tmp_qloop_34 = tmp_qloop_23*tmp_qloop_33; const real_t tmp_qloop_35 = jac_affine_inv_0_1_BLUE_DOWN*tmp_qloop_18 + jac_affine_inv_1_1_BLUE_DOWN*tmp_qloop_21 + jac_affine_inv_2_1_BLUE_DOWN*tmp_qloop_22; @@ -3208,67 +3292,87 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotU::toMatrix_macro_3D( idx_t * RESTRIC mat->addValues( _data_rowIdx, _data_colIdx, _data_mat ); } } + const real_t tmp_coords_jac_0_GREEN_UP = 1.0 / (micro_edges_per_macro_edge_float)*1.0; + const real_t tmp_coords_jac_1_GREEN_UP = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0); + const real_t tmp_coords_jac_2_GREEN_UP = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1); + const real_t tmp_coords_jac_3_GREEN_UP = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2); + const real_t tmp_coords_jac_4_GREEN_UP = tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0); + const real_t tmp_coords_jac_5_GREEN_UP = tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1); + const real_t tmp_coords_jac_6_GREEN_UP = tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2); + const real_t p_affine_const_0_0_GREEN_UP = tmp_coords_jac_1_GREEN_UP; + const real_t p_affine_const_0_1_GREEN_UP = tmp_coords_jac_2_GREEN_UP; + const real_t p_affine_const_0_2_GREEN_UP = tmp_coords_jac_3_GREEN_UP; + const real_t p_affine_const_1_0_GREEN_UP = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0); + const real_t p_affine_const_1_1_GREEN_UP = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1); + const real_t p_affine_const_1_2_GREEN_UP = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2); + const real_t p_affine_const_2_0_GREEN_UP = macro_vertex_coord_id_0comp0 + tmp_coords_jac_4_GREEN_UP; + const real_t p_affine_const_2_1_GREEN_UP = macro_vertex_coord_id_0comp1 + tmp_coords_jac_5_GREEN_UP; + const real_t p_affine_const_2_2_GREEN_UP = macro_vertex_coord_id_0comp2 + tmp_coords_jac_6_GREEN_UP; + const real_t p_affine_const_3_0_GREEN_UP = tmp_coords_jac_1_GREEN_UP + tmp_coords_jac_4_GREEN_UP; + const real_t p_affine_const_3_1_GREEN_UP = tmp_coords_jac_2_GREEN_UP + tmp_coords_jac_5_GREEN_UP; + const real_t p_affine_const_3_2_GREEN_UP = tmp_coords_jac_3_GREEN_UP + tmp_coords_jac_6_GREEN_UP; + const real_t jac_affine_0_0_GREEN_UP = -p_affine_const_0_0_GREEN_UP + p_affine_const_1_0_GREEN_UP; + const real_t jac_affine_0_1_GREEN_UP = -p_affine_const_0_0_GREEN_UP + p_affine_const_2_0_GREEN_UP; + const real_t jac_affine_0_2_GREEN_UP = -p_affine_const_0_0_GREEN_UP + p_affine_const_3_0_GREEN_UP; + const real_t jac_affine_1_0_GREEN_UP = -p_affine_const_0_1_GREEN_UP + p_affine_const_1_1_GREEN_UP; + const real_t jac_affine_1_1_GREEN_UP = -p_affine_const_0_1_GREEN_UP + p_affine_const_2_1_GREEN_UP; + const real_t tmp_coords_jac_11_GREEN_UP = jac_affine_0_2_GREEN_UP*jac_affine_1_1_GREEN_UP; + const real_t jac_affine_1_2_GREEN_UP = -p_affine_const_0_1_GREEN_UP + p_affine_const_3_1_GREEN_UP; + const real_t tmp_coords_jac_9_GREEN_UP = jac_affine_0_1_GREEN_UP*jac_affine_1_2_GREEN_UP; + const real_t jac_affine_2_0_GREEN_UP = -p_affine_const_0_2_GREEN_UP + p_affine_const_1_2_GREEN_UP; + const real_t jac_affine_2_1_GREEN_UP = -p_affine_const_0_2_GREEN_UP + p_affine_const_2_2_GREEN_UP; + const real_t tmp_coords_jac_8_GREEN_UP = jac_affine_1_2_GREEN_UP*jac_affine_2_1_GREEN_UP; + const real_t jac_affine_2_2_GREEN_UP = -p_affine_const_0_2_GREEN_UP + p_affine_const_3_2_GREEN_UP; + const real_t tmp_coords_jac_7_GREEN_UP = jac_affine_1_1_GREEN_UP*jac_affine_2_2_GREEN_UP; + const real_t tmp_coords_jac_10_GREEN_UP = jac_affine_0_1_GREEN_UP*jac_affine_2_2_GREEN_UP; + const real_t tmp_coords_jac_12_GREEN_UP = jac_affine_0_0_GREEN_UP*tmp_coords_jac_7_GREEN_UP - jac_affine_0_0_GREEN_UP*tmp_coords_jac_8_GREEN_UP + jac_affine_0_2_GREEN_UP*jac_affine_1_0_GREEN_UP*jac_affine_2_1_GREEN_UP - jac_affine_1_0_GREEN_UP*tmp_coords_jac_10_GREEN_UP - jac_affine_2_0_GREEN_UP*tmp_coords_jac_11_GREEN_UP + jac_affine_2_0_GREEN_UP*tmp_coords_jac_9_GREEN_UP; + const real_t tmp_coords_jac_13_GREEN_UP = 1.0 / (tmp_coords_jac_12_GREEN_UP); + const real_t jac_affine_inv_0_0_GREEN_UP = tmp_coords_jac_13_GREEN_UP*(tmp_coords_jac_7_GREEN_UP - tmp_coords_jac_8_GREEN_UP); + const real_t jac_affine_inv_0_1_GREEN_UP = tmp_coords_jac_13_GREEN_UP*(jac_affine_0_2_GREEN_UP*jac_affine_2_1_GREEN_UP - tmp_coords_jac_10_GREEN_UP); + const real_t jac_affine_inv_0_2_GREEN_UP = tmp_coords_jac_13_GREEN_UP*(-tmp_coords_jac_11_GREEN_UP + tmp_coords_jac_9_GREEN_UP); + const real_t jac_affine_inv_1_0_GREEN_UP = tmp_coords_jac_13_GREEN_UP*(-jac_affine_1_0_GREEN_UP*jac_affine_2_2_GREEN_UP + jac_affine_1_2_GREEN_UP*jac_affine_2_0_GREEN_UP); + const real_t jac_affine_inv_1_1_GREEN_UP = tmp_coords_jac_13_GREEN_UP*(jac_affine_0_0_GREEN_UP*jac_affine_2_2_GREEN_UP - jac_affine_0_2_GREEN_UP*jac_affine_2_0_GREEN_UP); + const real_t jac_affine_inv_1_2_GREEN_UP = tmp_coords_jac_13_GREEN_UP*(-jac_affine_0_0_GREEN_UP*jac_affine_1_2_GREEN_UP + jac_affine_0_2_GREEN_UP*jac_affine_1_0_GREEN_UP); + const real_t jac_affine_inv_2_0_GREEN_UP = tmp_coords_jac_13_GREEN_UP*(jac_affine_1_0_GREEN_UP*jac_affine_2_1_GREEN_UP - jac_affine_1_1_GREEN_UP*jac_affine_2_0_GREEN_UP); + const real_t jac_affine_inv_2_1_GREEN_UP = tmp_coords_jac_13_GREEN_UP*(-jac_affine_0_0_GREEN_UP*jac_affine_2_1_GREEN_UP + jac_affine_0_1_GREEN_UP*jac_affine_2_0_GREEN_UP); + const real_t jac_affine_inv_2_2_GREEN_UP = tmp_coords_jac_13_GREEN_UP*(jac_affine_0_0_GREEN_UP*jac_affine_1_1_GREEN_UP - jac_affine_0_1_GREEN_UP*jac_affine_1_0_GREEN_UP); + const real_t abs_det_jac_affine_GREEN_UP = abs(tmp_coords_jac_12_GREEN_UP); { /* CellType.GREEN_UP */ - const real_t tmp_coords_jac_0_GREEN_UP = 1.0 / (micro_edges_per_macro_edge_float)*1.0; - const real_t tmp_coords_jac_1_GREEN_UP = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0); - const real_t tmp_coords_jac_2_GREEN_UP = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1); - const real_t tmp_coords_jac_3_GREEN_UP = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2); - const real_t tmp_coords_jac_4_GREEN_UP = tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0); - const real_t tmp_coords_jac_5_GREEN_UP = tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1); - const real_t tmp_coords_jac_6_GREEN_UP = tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2); - const real_t p_affine_const_0_0_GREEN_UP = tmp_coords_jac_1_GREEN_UP; - const real_t p_affine_const_0_1_GREEN_UP = tmp_coords_jac_2_GREEN_UP; - const real_t p_affine_const_0_2_GREEN_UP = tmp_coords_jac_3_GREEN_UP; - const real_t p_affine_const_1_0_GREEN_UP = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0); - const real_t p_affine_const_1_1_GREEN_UP = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1); - const real_t p_affine_const_1_2_GREEN_UP = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_GREEN_UP*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2); - const real_t p_affine_const_2_0_GREEN_UP = macro_vertex_coord_id_0comp0 + tmp_coords_jac_4_GREEN_UP; - const real_t p_affine_const_2_1_GREEN_UP = macro_vertex_coord_id_0comp1 + tmp_coords_jac_5_GREEN_UP; - const real_t p_affine_const_2_2_GREEN_UP = macro_vertex_coord_id_0comp2 + tmp_coords_jac_6_GREEN_UP; - const real_t p_affine_const_3_0_GREEN_UP = tmp_coords_jac_1_GREEN_UP + tmp_coords_jac_4_GREEN_UP; - const real_t p_affine_const_3_1_GREEN_UP = tmp_coords_jac_2_GREEN_UP + tmp_coords_jac_5_GREEN_UP; - const real_t p_affine_const_3_2_GREEN_UP = tmp_coords_jac_3_GREEN_UP + tmp_coords_jac_6_GREEN_UP; - const real_t jac_affine_0_0_GREEN_UP = -p_affine_const_0_0_GREEN_UP + p_affine_const_1_0_GREEN_UP; - const real_t jac_affine_0_1_GREEN_UP = -p_affine_const_0_0_GREEN_UP + p_affine_const_2_0_GREEN_UP; - const real_t jac_affine_0_2_GREEN_UP = -p_affine_const_0_0_GREEN_UP + p_affine_const_3_0_GREEN_UP; - const real_t jac_affine_1_0_GREEN_UP = -p_affine_const_0_1_GREEN_UP + p_affine_const_1_1_GREEN_UP; - const real_t jac_affine_1_1_GREEN_UP = -p_affine_const_0_1_GREEN_UP + p_affine_const_2_1_GREEN_UP; - const real_t tmp_coords_jac_11_GREEN_UP = jac_affine_0_2_GREEN_UP*jac_affine_1_1_GREEN_UP; - const real_t jac_affine_1_2_GREEN_UP = -p_affine_const_0_1_GREEN_UP + p_affine_const_3_1_GREEN_UP; - const real_t tmp_coords_jac_9_GREEN_UP = jac_affine_0_1_GREEN_UP*jac_affine_1_2_GREEN_UP; - const real_t jac_affine_2_0_GREEN_UP = -p_affine_const_0_2_GREEN_UP + p_affine_const_1_2_GREEN_UP; - const real_t jac_affine_2_1_GREEN_UP = -p_affine_const_0_2_GREEN_UP + p_affine_const_2_2_GREEN_UP; - const real_t tmp_coords_jac_8_GREEN_UP = jac_affine_1_2_GREEN_UP*jac_affine_2_1_GREEN_UP; - const real_t jac_affine_2_2_GREEN_UP = -p_affine_const_0_2_GREEN_UP + p_affine_const_3_2_GREEN_UP; - const real_t tmp_coords_jac_7_GREEN_UP = jac_affine_1_1_GREEN_UP*jac_affine_2_2_GREEN_UP; - const real_t tmp_coords_jac_10_GREEN_UP = jac_affine_0_1_GREEN_UP*jac_affine_2_2_GREEN_UP; - const real_t tmp_coords_jac_12_GREEN_UP = 1.0 / (jac_affine_0_0_GREEN_UP*tmp_coords_jac_7_GREEN_UP - jac_affine_0_0_GREEN_UP*tmp_coords_jac_8_GREEN_UP + jac_affine_0_2_GREEN_UP*jac_affine_1_0_GREEN_UP*jac_affine_2_1_GREEN_UP - jac_affine_1_0_GREEN_UP*tmp_coords_jac_10_GREEN_UP - jac_affine_2_0_GREEN_UP*tmp_coords_jac_11_GREEN_UP + jac_affine_2_0_GREEN_UP*tmp_coords_jac_9_GREEN_UP); - const real_t jac_affine_inv_0_0_GREEN_UP = tmp_coords_jac_12_GREEN_UP*(tmp_coords_jac_7_GREEN_UP - tmp_coords_jac_8_GREEN_UP); - const real_t jac_affine_inv_0_1_GREEN_UP = tmp_coords_jac_12_GREEN_UP*(jac_affine_0_2_GREEN_UP*jac_affine_2_1_GREEN_UP - tmp_coords_jac_10_GREEN_UP); - const real_t jac_affine_inv_0_2_GREEN_UP = tmp_coords_jac_12_GREEN_UP*(-tmp_coords_jac_11_GREEN_UP + tmp_coords_jac_9_GREEN_UP); - const real_t jac_affine_inv_1_0_GREEN_UP = tmp_coords_jac_12_GREEN_UP*(-jac_affine_1_0_GREEN_UP*jac_affine_2_2_GREEN_UP + jac_affine_1_2_GREEN_UP*jac_affine_2_0_GREEN_UP); - const real_t jac_affine_inv_1_1_GREEN_UP = tmp_coords_jac_12_GREEN_UP*(jac_affine_0_0_GREEN_UP*jac_affine_2_2_GREEN_UP - jac_affine_0_2_GREEN_UP*jac_affine_2_0_GREEN_UP); - const real_t jac_affine_inv_1_2_GREEN_UP = tmp_coords_jac_12_GREEN_UP*(-jac_affine_0_0_GREEN_UP*jac_affine_1_2_GREEN_UP + jac_affine_0_2_GREEN_UP*jac_affine_1_0_GREEN_UP); - const real_t jac_affine_inv_2_0_GREEN_UP = tmp_coords_jac_12_GREEN_UP*(jac_affine_1_0_GREEN_UP*jac_affine_2_1_GREEN_UP - jac_affine_1_1_GREEN_UP*jac_affine_2_0_GREEN_UP); - const real_t jac_affine_inv_2_1_GREEN_UP = tmp_coords_jac_12_GREEN_UP*(-jac_affine_0_0_GREEN_UP*jac_affine_2_1_GREEN_UP + jac_affine_0_1_GREEN_UP*jac_affine_2_0_GREEN_UP); - const real_t jac_affine_inv_2_2_GREEN_UP = tmp_coords_jac_12_GREEN_UP*(jac_affine_0_0_GREEN_UP*jac_affine_1_1_GREEN_UP - jac_affine_0_1_GREEN_UP*jac_affine_1_0_GREEN_UP); for (int64_t ctr_2 = 0; ctr_2 < micro_edges_per_macro_edge; ctr_2 += 1) for (int64_t ctr_1 = 0; ctr_1 < -ctr_2 + micro_edges_per_macro_edge; ctr_1 += 1) for (int64_t ctr_0 = 0; ctr_0 < -ctr_1 - ctr_2 + micro_edges_per_macro_edge - 1; ctr_0 += 1) { - const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2)); - const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2)); - const real_t p_affine_0_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2)); - const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2)); - const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2)); - const real_t p_affine_1_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2)); - const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2 + 1)); - const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2 + 1)); - const real_t p_affine_2_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2 + 1)); - const real_t p_affine_3_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2 + 1)); - const real_t p_affine_3_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2 + 1)); - const real_t p_affine_3_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2 + 1)); + + const int64_t phantom_ctr_0 = ctr_0; + real_t _data_float_loop_ctr_array_dim_0[4]; + _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0; + _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1; + _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2; + _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3; + real_t _data_float_loop_ctr_array_dim_1[4]; + _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1; + real_t _data_float_loop_ctr_array_dim_2[4]; + _data_float_loop_ctr_array_dim_2[0] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[1] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[2] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[3] = (real_t) ctr_2; + + const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_0_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_2_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_3_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_3_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_3_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; const real_t rho_dof_0 = _data_rhoVertex[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]; const real_t rho_dof_1 = _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]; const real_t rho_dof_2 = _data_rhoVertex[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6))]; @@ -3433,7 +3537,7 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotU::toMatrix_macro_3D( idx_t * RESTRIC const real_t tmp_qloop_29 = -tmp_qloop_28 + tmp_qloop_6 - tmp_qloop_7 - tmp_qloop_9; const real_t tmp_qloop_30 = tmp_qloop_0*4.0; const real_t tmp_qloop_31 = -tmp_qloop_11 - tmp_qloop_30 + tmp_qloop_8; - const real_t tmp_qloop_32 = 1.0 / (rho_dof_0*tmp_qloop_12 + rho_dof_1*tmp_qloop_24 + rho_dof_2*tmp_qloop_25 + rho_dof_3*tmp_qloop_26 + rho_dof_4*tmp_qloop_7 + rho_dof_5*tmp_qloop_10 + rho_dof_6*tmp_qloop_9 + rho_dof_7*tmp_qloop_27 + rho_dof_8*tmp_qloop_29 + rho_dof_9*tmp_qloop_31)*_data_q_w[q]; + const real_t tmp_qloop_32 = abs_det_jac_affine_GREEN_UP*1.0 / (rho_dof_0*tmp_qloop_12 + rho_dof_1*tmp_qloop_24 + rho_dof_2*tmp_qloop_25 + rho_dof_3*tmp_qloop_26 + rho_dof_4*tmp_qloop_7 + rho_dof_5*tmp_qloop_10 + rho_dof_6*tmp_qloop_9 + rho_dof_7*tmp_qloop_27 + rho_dof_8*tmp_qloop_29 + rho_dof_9*tmp_qloop_31)*_data_q_w[q]; const real_t tmp_qloop_33 = tmp_qloop_32*(1.0 - _data_q_p_0[q] - _data_q_p_1[q] - _data_q_p_2[q]); const real_t tmp_qloop_34 = tmp_qloop_23*tmp_qloop_33; const real_t tmp_qloop_35 = jac_affine_inv_0_1_GREEN_UP*tmp_qloop_18 + jac_affine_inv_1_1_GREEN_UP*tmp_qloop_21 + jac_affine_inv_2_1_GREEN_UP*tmp_qloop_22; @@ -3992,70 +4096,90 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotU::toMatrix_macro_3D( idx_t * RESTRIC mat->addValues( _data_rowIdx, _data_colIdx, _data_mat ); } } + const real_t tmp_coords_jac_0_GREEN_DOWN = 1.0 / (micro_edges_per_macro_edge_float)*1.0; + const real_t tmp_coords_jac_1_GREEN_DOWN = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0); + const real_t tmp_coords_jac_2_GREEN_DOWN = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1); + const real_t tmp_coords_jac_3_GREEN_DOWN = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2); + const real_t tmp_coords_jac_4_GREEN_DOWN = tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0); + const real_t tmp_coords_jac_5_GREEN_DOWN = tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1); + const real_t tmp_coords_jac_6_GREEN_DOWN = tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2); + const real_t tmp_coords_jac_7_GREEN_DOWN = tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0); + const real_t tmp_coords_jac_8_GREEN_DOWN = tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1); + const real_t tmp_coords_jac_9_GREEN_DOWN = tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2); + const real_t p_affine_const_0_0_GREEN_DOWN = tmp_coords_jac_1_GREEN_DOWN; + const real_t p_affine_const_0_1_GREEN_DOWN = tmp_coords_jac_2_GREEN_DOWN; + const real_t p_affine_const_0_2_GREEN_DOWN = tmp_coords_jac_3_GREEN_DOWN; + const real_t p_affine_const_1_0_GREEN_DOWN = tmp_coords_jac_1_GREEN_DOWN + tmp_coords_jac_4_GREEN_DOWN; + const real_t p_affine_const_1_1_GREEN_DOWN = tmp_coords_jac_2_GREEN_DOWN + tmp_coords_jac_5_GREEN_DOWN; + const real_t p_affine_const_1_2_GREEN_DOWN = tmp_coords_jac_3_GREEN_DOWN + tmp_coords_jac_6_GREEN_DOWN; + const real_t p_affine_const_2_0_GREEN_DOWN = macro_vertex_coord_id_0comp0 + tmp_coords_jac_4_GREEN_DOWN + tmp_coords_jac_7_GREEN_DOWN; + const real_t p_affine_const_2_1_GREEN_DOWN = macro_vertex_coord_id_0comp1 + tmp_coords_jac_5_GREEN_DOWN + tmp_coords_jac_8_GREEN_DOWN; + const real_t p_affine_const_2_2_GREEN_DOWN = macro_vertex_coord_id_0comp2 + tmp_coords_jac_6_GREEN_DOWN + tmp_coords_jac_9_GREEN_DOWN; + const real_t p_affine_const_3_0_GREEN_DOWN = tmp_coords_jac_1_GREEN_DOWN + tmp_coords_jac_7_GREEN_DOWN; + const real_t p_affine_const_3_1_GREEN_DOWN = tmp_coords_jac_2_GREEN_DOWN + tmp_coords_jac_8_GREEN_DOWN; + const real_t p_affine_const_3_2_GREEN_DOWN = tmp_coords_jac_3_GREEN_DOWN + tmp_coords_jac_9_GREEN_DOWN; + const real_t jac_affine_0_0_GREEN_DOWN = -p_affine_const_0_0_GREEN_DOWN + p_affine_const_1_0_GREEN_DOWN; + const real_t jac_affine_0_1_GREEN_DOWN = -p_affine_const_0_0_GREEN_DOWN + p_affine_const_2_0_GREEN_DOWN; + const real_t jac_affine_0_2_GREEN_DOWN = -p_affine_const_0_0_GREEN_DOWN + p_affine_const_3_0_GREEN_DOWN; + const real_t jac_affine_1_0_GREEN_DOWN = -p_affine_const_0_1_GREEN_DOWN + p_affine_const_1_1_GREEN_DOWN; + const real_t jac_affine_1_1_GREEN_DOWN = -p_affine_const_0_1_GREEN_DOWN + p_affine_const_2_1_GREEN_DOWN; + const real_t tmp_coords_jac_14_GREEN_DOWN = jac_affine_0_2_GREEN_DOWN*jac_affine_1_1_GREEN_DOWN; + const real_t jac_affine_1_2_GREEN_DOWN = -p_affine_const_0_1_GREEN_DOWN + p_affine_const_3_1_GREEN_DOWN; + const real_t tmp_coords_jac_12_GREEN_DOWN = jac_affine_0_1_GREEN_DOWN*jac_affine_1_2_GREEN_DOWN; + const real_t jac_affine_2_0_GREEN_DOWN = -p_affine_const_0_2_GREEN_DOWN + p_affine_const_1_2_GREEN_DOWN; + const real_t jac_affine_2_1_GREEN_DOWN = -p_affine_const_0_2_GREEN_DOWN + p_affine_const_2_2_GREEN_DOWN; + const real_t tmp_coords_jac_11_GREEN_DOWN = jac_affine_1_2_GREEN_DOWN*jac_affine_2_1_GREEN_DOWN; + const real_t jac_affine_2_2_GREEN_DOWN = -p_affine_const_0_2_GREEN_DOWN + p_affine_const_3_2_GREEN_DOWN; + const real_t tmp_coords_jac_10_GREEN_DOWN = jac_affine_1_1_GREEN_DOWN*jac_affine_2_2_GREEN_DOWN; + const real_t tmp_coords_jac_13_GREEN_DOWN = jac_affine_0_1_GREEN_DOWN*jac_affine_2_2_GREEN_DOWN; + const real_t tmp_coords_jac_15_GREEN_DOWN = jac_affine_0_0_GREEN_DOWN*tmp_coords_jac_10_GREEN_DOWN - jac_affine_0_0_GREEN_DOWN*tmp_coords_jac_11_GREEN_DOWN + jac_affine_0_2_GREEN_DOWN*jac_affine_1_0_GREEN_DOWN*jac_affine_2_1_GREEN_DOWN - jac_affine_1_0_GREEN_DOWN*tmp_coords_jac_13_GREEN_DOWN + jac_affine_2_0_GREEN_DOWN*tmp_coords_jac_12_GREEN_DOWN - jac_affine_2_0_GREEN_DOWN*tmp_coords_jac_14_GREEN_DOWN; + const real_t tmp_coords_jac_16_GREEN_DOWN = 1.0 / (tmp_coords_jac_15_GREEN_DOWN); + const real_t jac_affine_inv_0_0_GREEN_DOWN = tmp_coords_jac_16_GREEN_DOWN*(tmp_coords_jac_10_GREEN_DOWN - tmp_coords_jac_11_GREEN_DOWN); + const real_t jac_affine_inv_0_1_GREEN_DOWN = tmp_coords_jac_16_GREEN_DOWN*(jac_affine_0_2_GREEN_DOWN*jac_affine_2_1_GREEN_DOWN - tmp_coords_jac_13_GREEN_DOWN); + const real_t jac_affine_inv_0_2_GREEN_DOWN = tmp_coords_jac_16_GREEN_DOWN*(tmp_coords_jac_12_GREEN_DOWN - tmp_coords_jac_14_GREEN_DOWN); + const real_t jac_affine_inv_1_0_GREEN_DOWN = tmp_coords_jac_16_GREEN_DOWN*(-jac_affine_1_0_GREEN_DOWN*jac_affine_2_2_GREEN_DOWN + jac_affine_1_2_GREEN_DOWN*jac_affine_2_0_GREEN_DOWN); + const real_t jac_affine_inv_1_1_GREEN_DOWN = tmp_coords_jac_16_GREEN_DOWN*(jac_affine_0_0_GREEN_DOWN*jac_affine_2_2_GREEN_DOWN - jac_affine_0_2_GREEN_DOWN*jac_affine_2_0_GREEN_DOWN); + const real_t jac_affine_inv_1_2_GREEN_DOWN = tmp_coords_jac_16_GREEN_DOWN*(-jac_affine_0_0_GREEN_DOWN*jac_affine_1_2_GREEN_DOWN + jac_affine_0_2_GREEN_DOWN*jac_affine_1_0_GREEN_DOWN); + const real_t jac_affine_inv_2_0_GREEN_DOWN = tmp_coords_jac_16_GREEN_DOWN*(jac_affine_1_0_GREEN_DOWN*jac_affine_2_1_GREEN_DOWN - jac_affine_1_1_GREEN_DOWN*jac_affine_2_0_GREEN_DOWN); + const real_t jac_affine_inv_2_1_GREEN_DOWN = tmp_coords_jac_16_GREEN_DOWN*(-jac_affine_0_0_GREEN_DOWN*jac_affine_2_1_GREEN_DOWN + jac_affine_0_1_GREEN_DOWN*jac_affine_2_0_GREEN_DOWN); + const real_t jac_affine_inv_2_2_GREEN_DOWN = tmp_coords_jac_16_GREEN_DOWN*(jac_affine_0_0_GREEN_DOWN*jac_affine_1_1_GREEN_DOWN - jac_affine_0_1_GREEN_DOWN*jac_affine_1_0_GREEN_DOWN); + const real_t abs_det_jac_affine_GREEN_DOWN = abs(tmp_coords_jac_15_GREEN_DOWN); { /* CellType.GREEN_DOWN */ - const real_t tmp_coords_jac_0_GREEN_DOWN = 1.0 / (micro_edges_per_macro_edge_float)*1.0; - const real_t tmp_coords_jac_1_GREEN_DOWN = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0); - const real_t tmp_coords_jac_2_GREEN_DOWN = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1); - const real_t tmp_coords_jac_3_GREEN_DOWN = macro_vertex_coord_id_0comp2 + tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2); - const real_t tmp_coords_jac_4_GREEN_DOWN = tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0); - const real_t tmp_coords_jac_5_GREEN_DOWN = tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1); - const real_t tmp_coords_jac_6_GREEN_DOWN = tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2); - const real_t tmp_coords_jac_7_GREEN_DOWN = tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0); - const real_t tmp_coords_jac_8_GREEN_DOWN = tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1); - const real_t tmp_coords_jac_9_GREEN_DOWN = tmp_coords_jac_0_GREEN_DOWN*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2); - const real_t p_affine_const_0_0_GREEN_DOWN = tmp_coords_jac_1_GREEN_DOWN; - const real_t p_affine_const_0_1_GREEN_DOWN = tmp_coords_jac_2_GREEN_DOWN; - const real_t p_affine_const_0_2_GREEN_DOWN = tmp_coords_jac_3_GREEN_DOWN; - const real_t p_affine_const_1_0_GREEN_DOWN = tmp_coords_jac_1_GREEN_DOWN + tmp_coords_jac_4_GREEN_DOWN; - const real_t p_affine_const_1_1_GREEN_DOWN = tmp_coords_jac_2_GREEN_DOWN + tmp_coords_jac_5_GREEN_DOWN; - const real_t p_affine_const_1_2_GREEN_DOWN = tmp_coords_jac_3_GREEN_DOWN + tmp_coords_jac_6_GREEN_DOWN; - const real_t p_affine_const_2_0_GREEN_DOWN = macro_vertex_coord_id_0comp0 + tmp_coords_jac_4_GREEN_DOWN + tmp_coords_jac_7_GREEN_DOWN; - const real_t p_affine_const_2_1_GREEN_DOWN = macro_vertex_coord_id_0comp1 + tmp_coords_jac_5_GREEN_DOWN + tmp_coords_jac_8_GREEN_DOWN; - const real_t p_affine_const_2_2_GREEN_DOWN = macro_vertex_coord_id_0comp2 + tmp_coords_jac_6_GREEN_DOWN + tmp_coords_jac_9_GREEN_DOWN; - const real_t p_affine_const_3_0_GREEN_DOWN = tmp_coords_jac_1_GREEN_DOWN + tmp_coords_jac_7_GREEN_DOWN; - const real_t p_affine_const_3_1_GREEN_DOWN = tmp_coords_jac_2_GREEN_DOWN + tmp_coords_jac_8_GREEN_DOWN; - const real_t p_affine_const_3_2_GREEN_DOWN = tmp_coords_jac_3_GREEN_DOWN + tmp_coords_jac_9_GREEN_DOWN; - const real_t jac_affine_0_0_GREEN_DOWN = -p_affine_const_0_0_GREEN_DOWN + p_affine_const_1_0_GREEN_DOWN; - const real_t jac_affine_0_1_GREEN_DOWN = -p_affine_const_0_0_GREEN_DOWN + p_affine_const_2_0_GREEN_DOWN; - const real_t jac_affine_0_2_GREEN_DOWN = -p_affine_const_0_0_GREEN_DOWN + p_affine_const_3_0_GREEN_DOWN; - const real_t jac_affine_1_0_GREEN_DOWN = -p_affine_const_0_1_GREEN_DOWN + p_affine_const_1_1_GREEN_DOWN; - const real_t jac_affine_1_1_GREEN_DOWN = -p_affine_const_0_1_GREEN_DOWN + p_affine_const_2_1_GREEN_DOWN; - const real_t tmp_coords_jac_14_GREEN_DOWN = jac_affine_0_2_GREEN_DOWN*jac_affine_1_1_GREEN_DOWN; - const real_t jac_affine_1_2_GREEN_DOWN = -p_affine_const_0_1_GREEN_DOWN + p_affine_const_3_1_GREEN_DOWN; - const real_t tmp_coords_jac_12_GREEN_DOWN = jac_affine_0_1_GREEN_DOWN*jac_affine_1_2_GREEN_DOWN; - const real_t jac_affine_2_0_GREEN_DOWN = -p_affine_const_0_2_GREEN_DOWN + p_affine_const_1_2_GREEN_DOWN; - const real_t jac_affine_2_1_GREEN_DOWN = -p_affine_const_0_2_GREEN_DOWN + p_affine_const_2_2_GREEN_DOWN; - const real_t tmp_coords_jac_11_GREEN_DOWN = jac_affine_1_2_GREEN_DOWN*jac_affine_2_1_GREEN_DOWN; - const real_t jac_affine_2_2_GREEN_DOWN = -p_affine_const_0_2_GREEN_DOWN + p_affine_const_3_2_GREEN_DOWN; - const real_t tmp_coords_jac_10_GREEN_DOWN = jac_affine_1_1_GREEN_DOWN*jac_affine_2_2_GREEN_DOWN; - const real_t tmp_coords_jac_13_GREEN_DOWN = jac_affine_0_1_GREEN_DOWN*jac_affine_2_2_GREEN_DOWN; - const real_t tmp_coords_jac_15_GREEN_DOWN = 1.0 / (jac_affine_0_0_GREEN_DOWN*tmp_coords_jac_10_GREEN_DOWN - jac_affine_0_0_GREEN_DOWN*tmp_coords_jac_11_GREEN_DOWN + jac_affine_0_2_GREEN_DOWN*jac_affine_1_0_GREEN_DOWN*jac_affine_2_1_GREEN_DOWN - jac_affine_1_0_GREEN_DOWN*tmp_coords_jac_13_GREEN_DOWN + jac_affine_2_0_GREEN_DOWN*tmp_coords_jac_12_GREEN_DOWN - jac_affine_2_0_GREEN_DOWN*tmp_coords_jac_14_GREEN_DOWN); - const real_t jac_affine_inv_0_0_GREEN_DOWN = tmp_coords_jac_15_GREEN_DOWN*(tmp_coords_jac_10_GREEN_DOWN - tmp_coords_jac_11_GREEN_DOWN); - const real_t jac_affine_inv_0_1_GREEN_DOWN = tmp_coords_jac_15_GREEN_DOWN*(jac_affine_0_2_GREEN_DOWN*jac_affine_2_1_GREEN_DOWN - tmp_coords_jac_13_GREEN_DOWN); - const real_t jac_affine_inv_0_2_GREEN_DOWN = tmp_coords_jac_15_GREEN_DOWN*(tmp_coords_jac_12_GREEN_DOWN - tmp_coords_jac_14_GREEN_DOWN); - const real_t jac_affine_inv_1_0_GREEN_DOWN = tmp_coords_jac_15_GREEN_DOWN*(-jac_affine_1_0_GREEN_DOWN*jac_affine_2_2_GREEN_DOWN + jac_affine_1_2_GREEN_DOWN*jac_affine_2_0_GREEN_DOWN); - const real_t jac_affine_inv_1_1_GREEN_DOWN = tmp_coords_jac_15_GREEN_DOWN*(jac_affine_0_0_GREEN_DOWN*jac_affine_2_2_GREEN_DOWN - jac_affine_0_2_GREEN_DOWN*jac_affine_2_0_GREEN_DOWN); - const real_t jac_affine_inv_1_2_GREEN_DOWN = tmp_coords_jac_15_GREEN_DOWN*(-jac_affine_0_0_GREEN_DOWN*jac_affine_1_2_GREEN_DOWN + jac_affine_0_2_GREEN_DOWN*jac_affine_1_0_GREEN_DOWN); - const real_t jac_affine_inv_2_0_GREEN_DOWN = tmp_coords_jac_15_GREEN_DOWN*(jac_affine_1_0_GREEN_DOWN*jac_affine_2_1_GREEN_DOWN - jac_affine_1_1_GREEN_DOWN*jac_affine_2_0_GREEN_DOWN); - const real_t jac_affine_inv_2_1_GREEN_DOWN = tmp_coords_jac_15_GREEN_DOWN*(-jac_affine_0_0_GREEN_DOWN*jac_affine_2_1_GREEN_DOWN + jac_affine_0_1_GREEN_DOWN*jac_affine_2_0_GREEN_DOWN); - const real_t jac_affine_inv_2_2_GREEN_DOWN = tmp_coords_jac_15_GREEN_DOWN*(jac_affine_0_0_GREEN_DOWN*jac_affine_1_1_GREEN_DOWN - jac_affine_0_1_GREEN_DOWN*jac_affine_1_0_GREEN_DOWN); for (int64_t ctr_2 = 0; ctr_2 < micro_edges_per_macro_edge; ctr_2 += 1) for (int64_t ctr_1 = 0; ctr_1 < -ctr_2 + micro_edges_per_macro_edge; ctr_1 += 1) for (int64_t ctr_0 = 0; ctr_0 < -ctr_1 - ctr_2 + micro_edges_per_macro_edge - 1; ctr_0 += 1) { - const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2)); - const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2)); - const real_t p_affine_0_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2)); - const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2)); - const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2)); - const real_t p_affine_1_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2)); - const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2 + 1)); - const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2 + 1)); - const real_t p_affine_2_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2 + 1)); - const real_t p_affine_3_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*((real_t)(ctr_2 + 1)); - const real_t p_affine_3_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*((real_t)(ctr_2 + 1)); - const real_t p_affine_3_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*((real_t)(ctr_0)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*((real_t)(ctr_1 + 1)) + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*((real_t)(ctr_2 + 1)); + + const int64_t phantom_ctr_0 = ctr_0; + real_t _data_float_loop_ctr_array_dim_0[4]; + _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0; + _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1; + _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2; + _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3; + real_t _data_float_loop_ctr_array_dim_1[4]; + _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1; + real_t _data_float_loop_ctr_array_dim_2[4]; + _data_float_loop_ctr_array_dim_2[0] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[1] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[2] = (real_t) ctr_2; + _data_float_loop_ctr_array_dim_2[3] = (real_t) ctr_2; + + const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_0_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*1.0*_data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0]; + const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_2_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_3_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_3comp0)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_3_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_3comp1)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_3_2 = macro_vertex_coord_id_0comp2 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_1comp2)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_2comp2)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp2 + macro_vertex_coord_id_3comp2)*(1.0 + _data_float_loop_ctr_array_dim_2[ctr_0 - phantom_ctr_0])*1.0; const real_t rho_dof_0 = _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6))]; const real_t rho_dof_1 = _data_rhoVertex[ctr_0 + (ctr_1 + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) - (((-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)*(-ctr_2 + micro_edges_per_macro_edge + 3)) / (6)) + 1]; const real_t rho_dof_2 = _data_rhoVertex[ctr_0 + ctr_1*(-ctr_2 + micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) - (((-ctr_2 + micro_edges_per_macro_edge)*(-ctr_2 + micro_edges_per_macro_edge + 1)*(-ctr_2 + micro_edges_per_macro_edge + 2)) / (6)) + (((micro_edges_per_macro_edge + 1)*(micro_edges_per_macro_edge + 2)*(micro_edges_per_macro_edge + 3)) / (6)) + 1]; @@ -4220,7 +4344,7 @@ void P2VectorToP1ElementwiseGradRhoByRhoDotU::toMatrix_macro_3D( idx_t * RESTRIC const real_t tmp_qloop_29 = -tmp_qloop_28 + tmp_qloop_6 - tmp_qloop_7 - tmp_qloop_9; const real_t tmp_qloop_30 = tmp_qloop_0*4.0; const real_t tmp_qloop_31 = -tmp_qloop_11 - tmp_qloop_30 + tmp_qloop_8; - const real_t tmp_qloop_32 = 1.0 / (rho_dof_0*tmp_qloop_12 + rho_dof_1*tmp_qloop_24 + rho_dof_2*tmp_qloop_25 + rho_dof_3*tmp_qloop_26 + rho_dof_4*tmp_qloop_7 + rho_dof_5*tmp_qloop_10 + rho_dof_6*tmp_qloop_9 + rho_dof_7*tmp_qloop_27 + rho_dof_8*tmp_qloop_29 + rho_dof_9*tmp_qloop_31)*_data_q_w[q]; + const real_t tmp_qloop_32 = abs_det_jac_affine_GREEN_DOWN*1.0 / (rho_dof_0*tmp_qloop_12 + rho_dof_1*tmp_qloop_24 + rho_dof_2*tmp_qloop_25 + rho_dof_3*tmp_qloop_26 + rho_dof_4*tmp_qloop_7 + rho_dof_5*tmp_qloop_10 + rho_dof_6*tmp_qloop_9 + rho_dof_7*tmp_qloop_27 + rho_dof_8*tmp_qloop_29 + rho_dof_9*tmp_qloop_31)*_data_q_w[q]; const real_t tmp_qloop_33 = tmp_qloop_32*(1.0 - _data_q_p_0[q] - _data_q_p_1[q] - _data_q_p_2[q]); const real_t tmp_qloop_34 = tmp_qloop_23*tmp_qloop_33; const real_t tmp_qloop_35 = jac_affine_inv_0_1_GREEN_DOWN*tmp_qloop_18 + jac_affine_inv_1_1_GREEN_DOWN*tmp_qloop_21 + jac_affine_inv_2_1_GREEN_DOWN*tmp_qloop_22; diff --git a/operators/gradient/CMakeLists.txt b/operators/gradient/CMakeLists.txt index 0c37a89a..6d3cdf34 100644 --- a/operators/gradient/CMakeLists.txt +++ b/operators/gradient/CMakeLists.txt @@ -21,40 +21,40 @@ add_library( opgen-gradient if(HYTEG_BUILD_WITH_AVX AND WALBERLA_DOUBLE_ACCURACY) target_sources(opgen-gradient PRIVATE - avx/P1ToP2ElementwiseGradientAnnulusMap_0_0_apply_macro_2D.cpp - avx/P1ToP2ElementwiseGradientAnnulusMap_1_0_apply_macro_2D.cpp - avx/P1ToP2ElementwiseGradientIcosahedralShellMap_0_0_apply_macro_3D.cpp - avx/P1ToP2ElementwiseGradientIcosahedralShellMap_1_0_apply_macro_3D.cpp - avx/P1ToP2ElementwiseGradientIcosahedralShellMap_2_0_apply_macro_3D.cpp - avx/P1ToP2ElementwiseGradient_0_0_apply_macro_2D.cpp - avx/P1ToP2ElementwiseGradient_0_0_apply_macro_3D.cpp - avx/P1ToP2ElementwiseGradient_1_0_apply_macro_2D.cpp - avx/P1ToP2ElementwiseGradient_1_0_apply_macro_3D.cpp - avx/P1ToP2ElementwiseGradient_2_0_apply_macro_3D.cpp - noarch/P1ToP2ElementwiseGradientAnnulusMap_0_0_toMatrix_macro_2D.cpp - noarch/P1ToP2ElementwiseGradientAnnulusMap_1_0_toMatrix_macro_2D.cpp - noarch/P1ToP2ElementwiseGradientIcosahedralShellMap_0_0_toMatrix_macro_3D.cpp - noarch/P1ToP2ElementwiseGradientIcosahedralShellMap_1_0_toMatrix_macro_3D.cpp - noarch/P1ToP2ElementwiseGradientIcosahedralShellMap_2_0_toMatrix_macro_3D.cpp - noarch/P1ToP2ElementwiseGradient_0_0_toMatrix_macro_2D.cpp - noarch/P1ToP2ElementwiseGradient_0_0_toMatrix_macro_3D.cpp - noarch/P1ToP2ElementwiseGradient_1_0_toMatrix_macro_2D.cpp - noarch/P1ToP2ElementwiseGradient_1_0_toMatrix_macro_3D.cpp - noarch/P1ToP2ElementwiseGradient_2_0_toMatrix_macro_3D.cpp + avx/P1ToP2ElementwiseGradientAnnulusMap_0_0_apply_P1ToP2ElementwiseGradientAnnulusMap_0_0_macro_2D.cpp + avx/P1ToP2ElementwiseGradientAnnulusMap_1_0_apply_P1ToP2ElementwiseGradientAnnulusMap_1_0_macro_2D.cpp + avx/P1ToP2ElementwiseGradientIcosahedralShellMap_0_0_apply_P1ToP2ElementwiseGradientIcosahedralShellMap_0_0_macro_3D.cpp + avx/P1ToP2ElementwiseGradientIcosahedralShellMap_1_0_apply_P1ToP2ElementwiseGradientIcosahedralShellMap_1_0_macro_3D.cpp + avx/P1ToP2ElementwiseGradientIcosahedralShellMap_2_0_apply_P1ToP2ElementwiseGradientIcosahedralShellMap_2_0_macro_3D.cpp + avx/P1ToP2ElementwiseGradient_0_0_apply_P1ToP2ElementwiseGradient_0_0_macro_2D.cpp + avx/P1ToP2ElementwiseGradient_0_0_apply_P1ToP2ElementwiseGradient_0_0_macro_3D.cpp + avx/P1ToP2ElementwiseGradient_1_0_apply_P1ToP2ElementwiseGradient_1_0_macro_2D.cpp + avx/P1ToP2ElementwiseGradient_1_0_apply_P1ToP2ElementwiseGradient_1_0_macro_3D.cpp + avx/P1ToP2ElementwiseGradient_2_0_apply_P1ToP2ElementwiseGradient_2_0_macro_3D.cpp + noarch/P1ToP2ElementwiseGradientAnnulusMap_0_0_toMatrix_P1ToP2ElementwiseGradientAnnulusMap_0_0_macro_2D.cpp + noarch/P1ToP2ElementwiseGradientAnnulusMap_1_0_toMatrix_P1ToP2ElementwiseGradientAnnulusMap_1_0_macro_2D.cpp + noarch/P1ToP2ElementwiseGradientIcosahedralShellMap_0_0_toMatrix_P1ToP2ElementwiseGradientIcosahedralShellMap_0_0_macro_3D.cpp + noarch/P1ToP2ElementwiseGradientIcosahedralShellMap_1_0_toMatrix_P1ToP2ElementwiseGradientIcosahedralShellMap_1_0_macro_3D.cpp + noarch/P1ToP2ElementwiseGradientIcosahedralShellMap_2_0_toMatrix_P1ToP2ElementwiseGradientIcosahedralShellMap_2_0_macro_3D.cpp + noarch/P1ToP2ElementwiseGradient_0_0_toMatrix_P1ToP2ElementwiseGradient_0_0_macro_2D.cpp + noarch/P1ToP2ElementwiseGradient_0_0_toMatrix_P1ToP2ElementwiseGradient_0_0_macro_3D.cpp + noarch/P1ToP2ElementwiseGradient_1_0_toMatrix_P1ToP2ElementwiseGradient_1_0_macro_2D.cpp + noarch/P1ToP2ElementwiseGradient_1_0_toMatrix_P1ToP2ElementwiseGradient_1_0_macro_3D.cpp + noarch/P1ToP2ElementwiseGradient_2_0_toMatrix_P1ToP2ElementwiseGradient_2_0_macro_3D.cpp ) set_source_files_properties( - avx/P1ToP2ElementwiseGradientAnnulusMap_0_0_apply_macro_2D.cpp - avx/P1ToP2ElementwiseGradientAnnulusMap_1_0_apply_macro_2D.cpp - avx/P1ToP2ElementwiseGradientIcosahedralShellMap_0_0_apply_macro_3D.cpp - avx/P1ToP2ElementwiseGradientIcosahedralShellMap_1_0_apply_macro_3D.cpp - avx/P1ToP2ElementwiseGradientIcosahedralShellMap_2_0_apply_macro_3D.cpp - avx/P1ToP2ElementwiseGradient_0_0_apply_macro_2D.cpp - avx/P1ToP2ElementwiseGradient_0_0_apply_macro_3D.cpp - avx/P1ToP2ElementwiseGradient_1_0_apply_macro_2D.cpp - avx/P1ToP2ElementwiseGradient_1_0_apply_macro_3D.cpp - avx/P1ToP2ElementwiseGradient_2_0_apply_macro_3D.cpp + avx/P1ToP2ElementwiseGradientAnnulusMap_0_0_apply_P1ToP2ElementwiseGradientAnnulusMap_0_0_macro_2D.cpp + avx/P1ToP2ElementwiseGradientAnnulusMap_1_0_apply_P1ToP2ElementwiseGradientAnnulusMap_1_0_macro_2D.cpp + avx/P1ToP2ElementwiseGradientIcosahedralShellMap_0_0_apply_P1ToP2ElementwiseGradientIcosahedralShellMap_0_0_macro_3D.cpp + avx/P1ToP2ElementwiseGradientIcosahedralShellMap_1_0_apply_P1ToP2ElementwiseGradientIcosahedralShellMap_1_0_macro_3D.cpp + avx/P1ToP2ElementwiseGradientIcosahedralShellMap_2_0_apply_P1ToP2ElementwiseGradientIcosahedralShellMap_2_0_macro_3D.cpp + avx/P1ToP2ElementwiseGradient_0_0_apply_P1ToP2ElementwiseGradient_0_0_macro_2D.cpp + avx/P1ToP2ElementwiseGradient_0_0_apply_P1ToP2ElementwiseGradient_0_0_macro_3D.cpp + avx/P1ToP2ElementwiseGradient_1_0_apply_P1ToP2ElementwiseGradient_1_0_macro_2D.cpp + avx/P1ToP2ElementwiseGradient_1_0_apply_P1ToP2ElementwiseGradient_1_0_macro_3D.cpp + avx/P1ToP2ElementwiseGradient_2_0_apply_P1ToP2ElementwiseGradient_2_0_macro_3D.cpp PROPERTIES COMPILE_OPTIONS ${HYTEG_COMPILER_NATIVE_FLAGS} ) @@ -65,26 +65,26 @@ else() target_sources(opgen-gradient PRIVATE - noarch/P1ToP2ElementwiseGradientAnnulusMap_0_0_apply_macro_2D.cpp - noarch/P1ToP2ElementwiseGradientAnnulusMap_0_0_toMatrix_macro_2D.cpp - noarch/P1ToP2ElementwiseGradientAnnulusMap_1_0_apply_macro_2D.cpp - noarch/P1ToP2ElementwiseGradientAnnulusMap_1_0_toMatrix_macro_2D.cpp - noarch/P1ToP2ElementwiseGradientIcosahedralShellMap_0_0_apply_macro_3D.cpp - noarch/P1ToP2ElementwiseGradientIcosahedralShellMap_0_0_toMatrix_macro_3D.cpp - noarch/P1ToP2ElementwiseGradientIcosahedralShellMap_1_0_apply_macro_3D.cpp - noarch/P1ToP2ElementwiseGradientIcosahedralShellMap_1_0_toMatrix_macro_3D.cpp - noarch/P1ToP2ElementwiseGradientIcosahedralShellMap_2_0_apply_macro_3D.cpp - noarch/P1ToP2ElementwiseGradientIcosahedralShellMap_2_0_toMatrix_macro_3D.cpp - noarch/P1ToP2ElementwiseGradient_0_0_apply_macro_2D.cpp - noarch/P1ToP2ElementwiseGradient_0_0_apply_macro_3D.cpp - noarch/P1ToP2ElementwiseGradient_0_0_toMatrix_macro_2D.cpp - noarch/P1ToP2ElementwiseGradient_0_0_toMatrix_macro_3D.cpp - noarch/P1ToP2ElementwiseGradient_1_0_apply_macro_2D.cpp - noarch/P1ToP2ElementwiseGradient_1_0_apply_macro_3D.cpp - noarch/P1ToP2ElementwiseGradient_1_0_toMatrix_macro_2D.cpp - noarch/P1ToP2ElementwiseGradient_1_0_toMatrix_macro_3D.cpp - noarch/P1ToP2ElementwiseGradient_2_0_apply_macro_3D.cpp - noarch/P1ToP2ElementwiseGradient_2_0_toMatrix_macro_3D.cpp + noarch/P1ToP2ElementwiseGradientAnnulusMap_0_0_apply_P1ToP2ElementwiseGradientAnnulusMap_0_0_macro_2D.cpp + noarch/P1ToP2ElementwiseGradientAnnulusMap_0_0_toMatrix_P1ToP2ElementwiseGradientAnnulusMap_0_0_macro_2D.cpp + noarch/P1ToP2ElementwiseGradientAnnulusMap_1_0_apply_P1ToP2ElementwiseGradientAnnulusMap_1_0_macro_2D.cpp + noarch/P1ToP2ElementwiseGradientAnnulusMap_1_0_toMatrix_P1ToP2ElementwiseGradientAnnulusMap_1_0_macro_2D.cpp + noarch/P1ToP2ElementwiseGradientIcosahedralShellMap_0_0_apply_P1ToP2ElementwiseGradientIcosahedralShellMap_0_0_macro_3D.cpp + noarch/P1ToP2ElementwiseGradientIcosahedralShellMap_0_0_toMatrix_P1ToP2ElementwiseGradientIcosahedralShellMap_0_0_macro_3D.cpp + noarch/P1ToP2ElementwiseGradientIcosahedralShellMap_1_0_apply_P1ToP2ElementwiseGradientIcosahedralShellMap_1_0_macro_3D.cpp + noarch/P1ToP2ElementwiseGradientIcosahedralShellMap_1_0_toMatrix_P1ToP2ElementwiseGradientIcosahedralShellMap_1_0_macro_3D.cpp + noarch/P1ToP2ElementwiseGradientIcosahedralShellMap_2_0_apply_P1ToP2ElementwiseGradientIcosahedralShellMap_2_0_macro_3D.cpp + noarch/P1ToP2ElementwiseGradientIcosahedralShellMap_2_0_toMatrix_P1ToP2ElementwiseGradientIcosahedralShellMap_2_0_macro_3D.cpp + noarch/P1ToP2ElementwiseGradient_0_0_apply_P1ToP2ElementwiseGradient_0_0_macro_2D.cpp + noarch/P1ToP2ElementwiseGradient_0_0_apply_P1ToP2ElementwiseGradient_0_0_macro_3D.cpp + noarch/P1ToP2ElementwiseGradient_0_0_toMatrix_P1ToP2ElementwiseGradient_0_0_macro_2D.cpp + noarch/P1ToP2ElementwiseGradient_0_0_toMatrix_P1ToP2ElementwiseGradient_0_0_macro_3D.cpp + noarch/P1ToP2ElementwiseGradient_1_0_apply_P1ToP2ElementwiseGradient_1_0_macro_2D.cpp + noarch/P1ToP2ElementwiseGradient_1_0_apply_P1ToP2ElementwiseGradient_1_0_macro_3D.cpp + noarch/P1ToP2ElementwiseGradient_1_0_toMatrix_P1ToP2ElementwiseGradient_1_0_macro_2D.cpp + noarch/P1ToP2ElementwiseGradient_1_0_toMatrix_P1ToP2ElementwiseGradient_1_0_macro_3D.cpp + noarch/P1ToP2ElementwiseGradient_2_0_apply_P1ToP2ElementwiseGradient_2_0_macro_3D.cpp + noarch/P1ToP2ElementwiseGradient_2_0_toMatrix_P1ToP2ElementwiseGradient_2_0_macro_3D.cpp ) endif() diff --git a/operators/gradient/P1ToP2ElementwiseGradientAnnulusMap_0_0.cpp b/operators/gradient/P1ToP2ElementwiseGradientAnnulusMap_0_0.cpp index c7b6e5e1..b124e41b 100644 --- a/operators/gradient/P1ToP2ElementwiseGradientAnnulusMap_0_0.cpp +++ b/operators/gradient/P1ToP2ElementwiseGradientAnnulusMap_0_0.cpp @@ -144,7 +144,7 @@ void P1ToP2ElementwiseGradientAnnulusMap_0_0::apply( const P1Function< real_t >& this->timingTree_->start( "kernel" ); - apply_macro_2D( + apply_P1ToP2ElementwiseGradientAnnulusMap_0_0_macro_2D( _data_dstEdge, _data_dstVertex, @@ -165,6 +165,7 @@ void P1ToP2ElementwiseGradientAnnulusMap_0_0::apply( const P1Function< real_t >& refVertex_1, thrVertex_0, thrVertex_1 ); + this->timingTree_->stop( "kernel" ); } @@ -243,7 +244,7 @@ void P1ToP2ElementwiseGradientAnnulusMap_0_0::toMatrix( const std::shared_ptr< S this->timingTree_->start( "kernel" ); - toMatrix_macro_2D( + toMatrix_P1ToP2ElementwiseGradientAnnulusMap_0_0_macro_2D( _data_dstEdge, _data_dstVertex, @@ -265,6 +266,7 @@ void P1ToP2ElementwiseGradientAnnulusMap_0_0::toMatrix( const std::shared_ptr< S refVertex_1, thrVertex_0, thrVertex_1 ); + this->timingTree_->stop( "kernel" ); } } diff --git a/operators/gradient/P1ToP2ElementwiseGradientAnnulusMap_0_0.hpp b/operators/gradient/P1ToP2ElementwiseGradientAnnulusMap_0_0.hpp index d822b1ad..5518ed90 100644 --- a/operators/gradient/P1ToP2ElementwiseGradientAnnulusMap_0_0.hpp +++ b/operators/gradient/P1ToP2ElementwiseGradientAnnulusMap_0_0.hpp @@ -29,6 +29,7 @@ #include "core/DataTypes.h" #include "hyteg/LikwidWrapper.hpp" +#include "hyteg/boundary/BoundaryConditions.hpp" #include "hyteg/communication/Syncing.hpp" #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp" #include "hyteg/geometry/AnnulusMap.hpp" @@ -37,6 +38,7 @@ #include "hyteg/p2functionspace/P2Function.hpp" #include "hyteg/primitivestorage/PrimitiveStorage.hpp" #include "hyteg/sparseassembly/SparseMatrixProxy.hpp" +#include "hyteg/types/types.hpp" #define FUNC_PREFIX @@ -77,57 +79,66 @@ class P1ToP2ElementwiseGradientAnnulusMap_0_0 : public Operator< P1Function< rea protected: private: - /// Kernel type: apply + /// Integral: P1ToP2ElementwiseGradientAnnulusMap_0_0 + /// - volume element: triangle, dim: 2, vertices: 3, spacedim: 2 + /// - kernel type: apply + /// - loop strategy: SAWTOOTH /// - quadrature rule: Dunavant 2 | points: 3, degree: 2 + /// - blending map: AnnulusMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 220 330 17 12 3 0 0 1 - void apply_macro_2D( real_t* RESTRICT _data_dstEdge, - real_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_src, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t refVertex_0, - real_t refVertex_1, - real_t thrVertex_0, - real_t thrVertex_1 ) const; - /// Kernel type: toMatrix + void apply_P1ToP2ElementwiseGradientAnnulusMap_0_0_macro_2D( real_t* RESTRICT _data_dstEdge, + real_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_src, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t refVertex_0, + real_t refVertex_1, + real_t thrVertex_0, + real_t thrVertex_1 ) const; + + /// Integral: P1ToP2ElementwiseGradientAnnulusMap_0_0 + /// - volume element: triangle, dim: 2, vertices: 3, spacedim: 2 + /// - kernel type: toMatrix + /// - loop strategy: SAWTOOTH /// - quadrature rule: Dunavant 2 | points: 3, degree: 2 + /// - blending map: AnnulusMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 202 312 17 12 3 0 0 4 - void toMatrix_macro_2D( idx_t* RESTRICT _data_dstEdge, - idx_t* RESTRICT _data_dstVertex, - idx_t* RESTRICT _data_src, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - std::shared_ptr< SparseMatrixProxy > mat, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t refVertex_0, - real_t refVertex_1, - real_t thrVertex_0, - real_t thrVertex_1 ) const; + void toMatrix_P1ToP2ElementwiseGradientAnnulusMap_0_0_macro_2D( idx_t* RESTRICT _data_dstEdge, + idx_t* RESTRICT _data_dstVertex, + idx_t* RESTRICT _data_src, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + std::shared_ptr< SparseMatrixProxy > mat, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t refVertex_0, + real_t refVertex_1, + real_t thrVertex_0, + real_t thrVertex_1 ) const; }; } // namespace operatorgeneration diff --git a/operators/gradient/P1ToP2ElementwiseGradientAnnulusMap_1_0.cpp b/operators/gradient/P1ToP2ElementwiseGradientAnnulusMap_1_0.cpp index e1ee2e39..08305061 100644 --- a/operators/gradient/P1ToP2ElementwiseGradientAnnulusMap_1_0.cpp +++ b/operators/gradient/P1ToP2ElementwiseGradientAnnulusMap_1_0.cpp @@ -144,7 +144,7 @@ void P1ToP2ElementwiseGradientAnnulusMap_1_0::apply( const P1Function< real_t >& this->timingTree_->start( "kernel" ); - apply_macro_2D( + apply_P1ToP2ElementwiseGradientAnnulusMap_1_0_macro_2D( _data_dstEdge, _data_dstVertex, @@ -165,6 +165,7 @@ void P1ToP2ElementwiseGradientAnnulusMap_1_0::apply( const P1Function< real_t >& refVertex_1, thrVertex_0, thrVertex_1 ); + this->timingTree_->stop( "kernel" ); } @@ -243,7 +244,7 @@ void P1ToP2ElementwiseGradientAnnulusMap_1_0::toMatrix( const std::shared_ptr< S this->timingTree_->start( "kernel" ); - toMatrix_macro_2D( + toMatrix_P1ToP2ElementwiseGradientAnnulusMap_1_0_macro_2D( _data_dstEdge, _data_dstVertex, @@ -265,6 +266,7 @@ void P1ToP2ElementwiseGradientAnnulusMap_1_0::toMatrix( const std::shared_ptr< S refVertex_1, thrVertex_0, thrVertex_1 ); + this->timingTree_->stop( "kernel" ); } } diff --git a/operators/gradient/P1ToP2ElementwiseGradientAnnulusMap_1_0.hpp b/operators/gradient/P1ToP2ElementwiseGradientAnnulusMap_1_0.hpp index 1c03c47c..eeb840ed 100644 --- a/operators/gradient/P1ToP2ElementwiseGradientAnnulusMap_1_0.hpp +++ b/operators/gradient/P1ToP2ElementwiseGradientAnnulusMap_1_0.hpp @@ -29,6 +29,7 @@ #include "core/DataTypes.h" #include "hyteg/LikwidWrapper.hpp" +#include "hyteg/boundary/BoundaryConditions.hpp" #include "hyteg/communication/Syncing.hpp" #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp" #include "hyteg/geometry/AnnulusMap.hpp" @@ -37,6 +38,7 @@ #include "hyteg/p2functionspace/P2Function.hpp" #include "hyteg/primitivestorage/PrimitiveStorage.hpp" #include "hyteg/sparseassembly/SparseMatrixProxy.hpp" +#include "hyteg/types/types.hpp" #define FUNC_PREFIX @@ -77,57 +79,66 @@ class P1ToP2ElementwiseGradientAnnulusMap_1_0 : public Operator< P1Function< rea protected: private: - /// Kernel type: apply + /// Integral: P1ToP2ElementwiseGradientAnnulusMap_1_0 + /// - volume element: triangle, dim: 2, vertices: 3, spacedim: 2 + /// - kernel type: apply + /// - loop strategy: SAWTOOTH /// - quadrature rule: Dunavant 2 | points: 3, degree: 2 + /// - blending map: AnnulusMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 220 330 17 12 3 0 0 1 - void apply_macro_2D( real_t* RESTRICT _data_dstEdge, - real_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_src, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t refVertex_0, - real_t refVertex_1, - real_t thrVertex_0, - real_t thrVertex_1 ) const; - /// Kernel type: toMatrix + void apply_P1ToP2ElementwiseGradientAnnulusMap_1_0_macro_2D( real_t* RESTRICT _data_dstEdge, + real_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_src, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t refVertex_0, + real_t refVertex_1, + real_t thrVertex_0, + real_t thrVertex_1 ) const; + + /// Integral: P1ToP2ElementwiseGradientAnnulusMap_1_0 + /// - volume element: triangle, dim: 2, vertices: 3, spacedim: 2 + /// - kernel type: toMatrix + /// - loop strategy: SAWTOOTH /// - quadrature rule: Dunavant 2 | points: 3, degree: 2 + /// - blending map: AnnulusMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 202 312 17 12 3 0 0 4 - void toMatrix_macro_2D( idx_t* RESTRICT _data_dstEdge, - idx_t* RESTRICT _data_dstVertex, - idx_t* RESTRICT _data_src, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - std::shared_ptr< SparseMatrixProxy > mat, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t refVertex_0, - real_t refVertex_1, - real_t thrVertex_0, - real_t thrVertex_1 ) const; + void toMatrix_P1ToP2ElementwiseGradientAnnulusMap_1_0_macro_2D( idx_t* RESTRICT _data_dstEdge, + idx_t* RESTRICT _data_dstVertex, + idx_t* RESTRICT _data_src, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + std::shared_ptr< SparseMatrixProxy > mat, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t refVertex_0, + real_t refVertex_1, + real_t thrVertex_0, + real_t thrVertex_1 ) const; }; } // namespace operatorgeneration diff --git a/operators/gradient/P1ToP2ElementwiseGradientIcosahedralShellMap_0_0.cpp b/operators/gradient/P1ToP2ElementwiseGradientIcosahedralShellMap_0_0.cpp index 53aed4cf..d4d125c3 100644 --- a/operators/gradient/P1ToP2ElementwiseGradientIcosahedralShellMap_0_0.cpp +++ b/operators/gradient/P1ToP2ElementwiseGradientIcosahedralShellMap_0_0.cpp @@ -146,7 +146,7 @@ void P1ToP2ElementwiseGradientIcosahedralShellMap_0_0::apply( const P1Function< this->timingTree_->start( "kernel" ); - apply_macro_3D( + apply_P1ToP2ElementwiseGradientIcosahedralShellMap_0_0_macro_3D( _data_dstEdge, _data_dstVertex, @@ -179,6 +179,7 @@ void P1ToP2ElementwiseGradientIcosahedralShellMap_0_0::apply( const P1Function< thrVertex_0, thrVertex_1, thrVertex_2 ); + this->timingTree_->stop( "kernel" ); } @@ -269,7 +270,7 @@ void P1ToP2ElementwiseGradientIcosahedralShellMap_0_0::toMatrix( const std::shar this->timingTree_->start( "kernel" ); - toMatrix_macro_3D( + toMatrix_P1ToP2ElementwiseGradientIcosahedralShellMap_0_0_macro_3D( _data_dstEdge, _data_dstVertex, @@ -303,6 +304,7 @@ void P1ToP2ElementwiseGradientIcosahedralShellMap_0_0::toMatrix( const std::shar thrVertex_0, thrVertex_1, thrVertex_2 ); + this->timingTree_->stop( "kernel" ); } } diff --git a/operators/gradient/P1ToP2ElementwiseGradientIcosahedralShellMap_0_0.hpp b/operators/gradient/P1ToP2ElementwiseGradientIcosahedralShellMap_0_0.hpp index 78819b72..16490ad8 100644 --- a/operators/gradient/P1ToP2ElementwiseGradientIcosahedralShellMap_0_0.hpp +++ b/operators/gradient/P1ToP2ElementwiseGradientIcosahedralShellMap_0_0.hpp @@ -29,6 +29,7 @@ #include "core/DataTypes.h" #include "hyteg/LikwidWrapper.hpp" +#include "hyteg/boundary/BoundaryConditions.hpp" #include "hyteg/communication/Syncing.hpp" #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp" #include "hyteg/geometry/IcosahedralShellMap.hpp" @@ -37,6 +38,7 @@ #include "hyteg/p2functionspace/P2Function.hpp" #include "hyteg/primitivestorage/PrimitiveStorage.hpp" #include "hyteg/sparseassembly/SparseMatrixProxy.hpp" +#include "hyteg/types/types.hpp" #define FUNC_PREFIX @@ -77,81 +79,90 @@ class P1ToP2ElementwiseGradientIcosahedralShellMap_0_0 : public Operator< P1Func protected: private: - /// Kernel type: apply + /// Integral: P1ToP2ElementwiseGradientIcosahedralShellMap_0_0 + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: apply + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 1 | points: 4, degree: 2 + /// - blending map: IcosahedralShellMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 540 783 46 4 4 0 0 1 - void apply_macro_3D( real_t* RESTRICT _data_dstEdge, - real_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_src, - real_t forVertex_0, - real_t forVertex_1, - real_t forVertex_2, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t rayVertex_2, - real_t refVertex_0, - real_t refVertex_1, - real_t refVertex_2, - real_t thrVertex_0, - real_t thrVertex_1, - real_t thrVertex_2 ) const; - /// Kernel type: toMatrix + void apply_P1ToP2ElementwiseGradientIcosahedralShellMap_0_0_macro_3D( real_t* RESTRICT _data_dstEdge, + real_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_src, + real_t forVertex_0, + real_t forVertex_1, + real_t forVertex_2, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t rayVertex_2, + real_t refVertex_0, + real_t refVertex_1, + real_t refVertex_2, + real_t thrVertex_0, + real_t thrVertex_1, + real_t thrVertex_2 ) const; + + /// Integral: P1ToP2ElementwiseGradientIcosahedralShellMap_0_0 + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: toMatrix + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 1 | points: 4, degree: 2 + /// - blending map: IcosahedralShellMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 500 743 46 4 4 0 0 4 - void toMatrix_macro_3D( idx_t* RESTRICT _data_dstEdge, - idx_t* RESTRICT _data_dstVertex, - idx_t* RESTRICT _data_src, - real_t forVertex_0, - real_t forVertex_1, - real_t forVertex_2, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - std::shared_ptr< SparseMatrixProxy > mat, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t rayVertex_2, - real_t refVertex_0, - real_t refVertex_1, - real_t refVertex_2, - real_t thrVertex_0, - real_t thrVertex_1, - real_t thrVertex_2 ) const; + void toMatrix_P1ToP2ElementwiseGradientIcosahedralShellMap_0_0_macro_3D( idx_t* RESTRICT _data_dstEdge, + idx_t* RESTRICT _data_dstVertex, + idx_t* RESTRICT _data_src, + real_t forVertex_0, + real_t forVertex_1, + real_t forVertex_2, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + std::shared_ptr< SparseMatrixProxy > mat, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t rayVertex_2, + real_t refVertex_0, + real_t refVertex_1, + real_t refVertex_2, + real_t thrVertex_0, + real_t thrVertex_1, + real_t thrVertex_2 ) const; }; } // namespace operatorgeneration diff --git a/operators/gradient/P1ToP2ElementwiseGradientIcosahedralShellMap_1_0.cpp b/operators/gradient/P1ToP2ElementwiseGradientIcosahedralShellMap_1_0.cpp index 54ec309d..37126655 100644 --- a/operators/gradient/P1ToP2ElementwiseGradientIcosahedralShellMap_1_0.cpp +++ b/operators/gradient/P1ToP2ElementwiseGradientIcosahedralShellMap_1_0.cpp @@ -146,7 +146,7 @@ void P1ToP2ElementwiseGradientIcosahedralShellMap_1_0::apply( const P1Function< this->timingTree_->start( "kernel" ); - apply_macro_3D( + apply_P1ToP2ElementwiseGradientIcosahedralShellMap_1_0_macro_3D( _data_dstEdge, _data_dstVertex, @@ -179,6 +179,7 @@ void P1ToP2ElementwiseGradientIcosahedralShellMap_1_0::apply( const P1Function< thrVertex_0, thrVertex_1, thrVertex_2 ); + this->timingTree_->stop( "kernel" ); } @@ -269,7 +270,7 @@ void P1ToP2ElementwiseGradientIcosahedralShellMap_1_0::toMatrix( const std::shar this->timingTree_->start( "kernel" ); - toMatrix_macro_3D( + toMatrix_P1ToP2ElementwiseGradientIcosahedralShellMap_1_0_macro_3D( _data_dstEdge, _data_dstVertex, @@ -303,6 +304,7 @@ void P1ToP2ElementwiseGradientIcosahedralShellMap_1_0::toMatrix( const std::shar thrVertex_0, thrVertex_1, thrVertex_2 ); + this->timingTree_->stop( "kernel" ); } } diff --git a/operators/gradient/P1ToP2ElementwiseGradientIcosahedralShellMap_1_0.hpp b/operators/gradient/P1ToP2ElementwiseGradientIcosahedralShellMap_1_0.hpp index 561a431e..7be898ba 100644 --- a/operators/gradient/P1ToP2ElementwiseGradientIcosahedralShellMap_1_0.hpp +++ b/operators/gradient/P1ToP2ElementwiseGradientIcosahedralShellMap_1_0.hpp @@ -29,6 +29,7 @@ #include "core/DataTypes.h" #include "hyteg/LikwidWrapper.hpp" +#include "hyteg/boundary/BoundaryConditions.hpp" #include "hyteg/communication/Syncing.hpp" #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp" #include "hyteg/geometry/IcosahedralShellMap.hpp" @@ -37,6 +38,7 @@ #include "hyteg/p2functionspace/P2Function.hpp" #include "hyteg/primitivestorage/PrimitiveStorage.hpp" #include "hyteg/sparseassembly/SparseMatrixProxy.hpp" +#include "hyteg/types/types.hpp" #define FUNC_PREFIX @@ -77,81 +79,90 @@ class P1ToP2ElementwiseGradientIcosahedralShellMap_1_0 : public Operator< P1Func protected: private: - /// Kernel type: apply + /// Integral: P1ToP2ElementwiseGradientIcosahedralShellMap_1_0 + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: apply + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 1 | points: 4, degree: 2 + /// - blending map: IcosahedralShellMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 540 783 46 4 4 0 0 1 - void apply_macro_3D( real_t* RESTRICT _data_dstEdge, - real_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_src, - real_t forVertex_0, - real_t forVertex_1, - real_t forVertex_2, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t rayVertex_2, - real_t refVertex_0, - real_t refVertex_1, - real_t refVertex_2, - real_t thrVertex_0, - real_t thrVertex_1, - real_t thrVertex_2 ) const; - /// Kernel type: toMatrix + void apply_P1ToP2ElementwiseGradientIcosahedralShellMap_1_0_macro_3D( real_t* RESTRICT _data_dstEdge, + real_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_src, + real_t forVertex_0, + real_t forVertex_1, + real_t forVertex_2, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t rayVertex_2, + real_t refVertex_0, + real_t refVertex_1, + real_t refVertex_2, + real_t thrVertex_0, + real_t thrVertex_1, + real_t thrVertex_2 ) const; + + /// Integral: P1ToP2ElementwiseGradientIcosahedralShellMap_1_0 + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: toMatrix + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 1 | points: 4, degree: 2 + /// - blending map: IcosahedralShellMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 500 743 46 4 4 0 0 4 - void toMatrix_macro_3D( idx_t* RESTRICT _data_dstEdge, - idx_t* RESTRICT _data_dstVertex, - idx_t* RESTRICT _data_src, - real_t forVertex_0, - real_t forVertex_1, - real_t forVertex_2, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - std::shared_ptr< SparseMatrixProxy > mat, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t rayVertex_2, - real_t refVertex_0, - real_t refVertex_1, - real_t refVertex_2, - real_t thrVertex_0, - real_t thrVertex_1, - real_t thrVertex_2 ) const; + void toMatrix_P1ToP2ElementwiseGradientIcosahedralShellMap_1_0_macro_3D( idx_t* RESTRICT _data_dstEdge, + idx_t* RESTRICT _data_dstVertex, + idx_t* RESTRICT _data_src, + real_t forVertex_0, + real_t forVertex_1, + real_t forVertex_2, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + std::shared_ptr< SparseMatrixProxy > mat, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t rayVertex_2, + real_t refVertex_0, + real_t refVertex_1, + real_t refVertex_2, + real_t thrVertex_0, + real_t thrVertex_1, + real_t thrVertex_2 ) const; }; } // namespace operatorgeneration diff --git a/operators/gradient/P1ToP2ElementwiseGradientIcosahedralShellMap_2_0.cpp b/operators/gradient/P1ToP2ElementwiseGradientIcosahedralShellMap_2_0.cpp index 095574db..16dad771 100644 --- a/operators/gradient/P1ToP2ElementwiseGradientIcosahedralShellMap_2_0.cpp +++ b/operators/gradient/P1ToP2ElementwiseGradientIcosahedralShellMap_2_0.cpp @@ -146,7 +146,7 @@ void P1ToP2ElementwiseGradientIcosahedralShellMap_2_0::apply( const P1Function< this->timingTree_->start( "kernel" ); - apply_macro_3D( + apply_P1ToP2ElementwiseGradientIcosahedralShellMap_2_0_macro_3D( _data_dstEdge, _data_dstVertex, @@ -179,6 +179,7 @@ void P1ToP2ElementwiseGradientIcosahedralShellMap_2_0::apply( const P1Function< thrVertex_0, thrVertex_1, thrVertex_2 ); + this->timingTree_->stop( "kernel" ); } @@ -269,7 +270,7 @@ void P1ToP2ElementwiseGradientIcosahedralShellMap_2_0::toMatrix( const std::shar this->timingTree_->start( "kernel" ); - toMatrix_macro_3D( + toMatrix_P1ToP2ElementwiseGradientIcosahedralShellMap_2_0_macro_3D( _data_dstEdge, _data_dstVertex, @@ -303,6 +304,7 @@ void P1ToP2ElementwiseGradientIcosahedralShellMap_2_0::toMatrix( const std::shar thrVertex_0, thrVertex_1, thrVertex_2 ); + this->timingTree_->stop( "kernel" ); } } diff --git a/operators/gradient/P1ToP2ElementwiseGradientIcosahedralShellMap_2_0.hpp b/operators/gradient/P1ToP2ElementwiseGradientIcosahedralShellMap_2_0.hpp index 10e81880..6d0336ae 100644 --- a/operators/gradient/P1ToP2ElementwiseGradientIcosahedralShellMap_2_0.hpp +++ b/operators/gradient/P1ToP2ElementwiseGradientIcosahedralShellMap_2_0.hpp @@ -29,6 +29,7 @@ #include "core/DataTypes.h" #include "hyteg/LikwidWrapper.hpp" +#include "hyteg/boundary/BoundaryConditions.hpp" #include "hyteg/communication/Syncing.hpp" #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp" #include "hyteg/geometry/IcosahedralShellMap.hpp" @@ -37,6 +38,7 @@ #include "hyteg/p2functionspace/P2Function.hpp" #include "hyteg/primitivestorage/PrimitiveStorage.hpp" #include "hyteg/sparseassembly/SparseMatrixProxy.hpp" +#include "hyteg/types/types.hpp" #define FUNC_PREFIX @@ -77,81 +79,90 @@ class P1ToP2ElementwiseGradientIcosahedralShellMap_2_0 : public Operator< P1Func protected: private: - /// Kernel type: apply + /// Integral: P1ToP2ElementwiseGradientIcosahedralShellMap_2_0 + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: apply + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 1 | points: 4, degree: 2 + /// - blending map: IcosahedralShellMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 540 783 46 4 4 0 0 1 - void apply_macro_3D( real_t* RESTRICT _data_dstEdge, - real_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_src, - real_t forVertex_0, - real_t forVertex_1, - real_t forVertex_2, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t rayVertex_2, - real_t refVertex_0, - real_t refVertex_1, - real_t refVertex_2, - real_t thrVertex_0, - real_t thrVertex_1, - real_t thrVertex_2 ) const; - /// Kernel type: toMatrix + void apply_P1ToP2ElementwiseGradientIcosahedralShellMap_2_0_macro_3D( real_t* RESTRICT _data_dstEdge, + real_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_src, + real_t forVertex_0, + real_t forVertex_1, + real_t forVertex_2, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t rayVertex_2, + real_t refVertex_0, + real_t refVertex_1, + real_t refVertex_2, + real_t thrVertex_0, + real_t thrVertex_1, + real_t thrVertex_2 ) const; + + /// Integral: P1ToP2ElementwiseGradientIcosahedralShellMap_2_0 + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: toMatrix + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 1 | points: 4, degree: 2 + /// - blending map: IcosahedralShellMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 500 743 46 4 4 0 0 4 - void toMatrix_macro_3D( idx_t* RESTRICT _data_dstEdge, - idx_t* RESTRICT _data_dstVertex, - idx_t* RESTRICT _data_src, - real_t forVertex_0, - real_t forVertex_1, - real_t forVertex_2, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - std::shared_ptr< SparseMatrixProxy > mat, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t rayVertex_2, - real_t refVertex_0, - real_t refVertex_1, - real_t refVertex_2, - real_t thrVertex_0, - real_t thrVertex_1, - real_t thrVertex_2 ) const; + void toMatrix_P1ToP2ElementwiseGradientIcosahedralShellMap_2_0_macro_3D( idx_t* RESTRICT _data_dstEdge, + idx_t* RESTRICT _data_dstVertex, + idx_t* RESTRICT _data_src, + real_t forVertex_0, + real_t forVertex_1, + real_t forVertex_2, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + std::shared_ptr< SparseMatrixProxy > mat, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t rayVertex_2, + real_t refVertex_0, + real_t refVertex_1, + real_t refVertex_2, + real_t thrVertex_0, + real_t thrVertex_1, + real_t thrVertex_2 ) const; }; } // namespace operatorgeneration diff --git a/operators/gradient/P1ToP2ElementwiseGradient_0_0.cpp b/operators/gradient/P1ToP2ElementwiseGradient_0_0.cpp index afce6952..481eb1dd 100644 --- a/operators/gradient/P1ToP2ElementwiseGradient_0_0.cpp +++ b/operators/gradient/P1ToP2ElementwiseGradient_0_0.cpp @@ -128,7 +128,7 @@ void P1ToP2ElementwiseGradient_0_0::apply( const P1Function< real_t >& src, this->timingTree_->start( "kernel" ); - apply_macro_3D( + apply_P1ToP2ElementwiseGradient_0_0_macro_3D( _data_dstEdge, _data_dstVertex, @@ -147,6 +147,7 @@ void P1ToP2ElementwiseGradient_0_0::apply( const P1Function< real_t >& src, macro_vertex_coord_id_3comp2, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } @@ -213,7 +214,7 @@ void P1ToP2ElementwiseGradient_0_0::apply( const P1Function< real_t >& src, this->timingTree_->start( "kernel" ); - apply_macro_2D( + apply_P1ToP2ElementwiseGradient_0_0_macro_2D( _data_dstEdge, _data_dstVertex, @@ -226,6 +227,7 @@ void P1ToP2ElementwiseGradient_0_0::apply( const P1Function< real_t >& src, macro_vertex_coord_id_2comp1, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } @@ -291,7 +293,7 @@ void P1ToP2ElementwiseGradient_0_0::toMatrix( const std::shared_ptr< SparseMatri this->timingTree_->start( "kernel" ); - toMatrix_macro_3D( + toMatrix_P1ToP2ElementwiseGradient_0_0_macro_3D( _data_dstEdge, _data_dstVertex, @@ -311,6 +313,7 @@ void P1ToP2ElementwiseGradient_0_0::toMatrix( const std::shared_ptr< SparseMatri mat, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } } @@ -340,7 +343,7 @@ void P1ToP2ElementwiseGradient_0_0::toMatrix( const std::shared_ptr< SparseMatri this->timingTree_->start( "kernel" ); - toMatrix_macro_2D( + toMatrix_P1ToP2ElementwiseGradient_0_0_macro_2D( _data_dstEdge, _data_dstVertex, @@ -354,6 +357,7 @@ void P1ToP2ElementwiseGradient_0_0::toMatrix( const std::shared_ptr< SparseMatri mat, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } } diff --git a/operators/gradient/P1ToP2ElementwiseGradient_0_0.hpp b/operators/gradient/P1ToP2ElementwiseGradient_0_0.hpp index b7dee5e8..f0c11fa6 100644 --- a/operators/gradient/P1ToP2ElementwiseGradient_0_0.hpp +++ b/operators/gradient/P1ToP2ElementwiseGradient_0_0.hpp @@ -29,6 +29,7 @@ #include "core/DataTypes.h" #include "hyteg/LikwidWrapper.hpp" +#include "hyteg/boundary/BoundaryConditions.hpp" #include "hyteg/communication/Syncing.hpp" #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp" #include "hyteg/operators/Operator.hpp" @@ -36,6 +37,7 @@ #include "hyteg/p2functionspace/P2Function.hpp" #include "hyteg/primitivestorage/PrimitiveStorage.hpp" #include "hyteg/sparseassembly/SparseMatrixProxy.hpp" +#include "hyteg/types/types.hpp" #define FUNC_PREFIX @@ -74,88 +76,107 @@ class P1ToP2ElementwiseGradient_0_0 : public Operator< P1Function< real_t >, P2F protected: private: - /// Kernel type: apply + /// Integral: P1ToP2ElementwiseGradient_0_0 + /// - volume element: triangle, dim: 2, vertices: 3, spacedim: 2 + /// - kernel type: apply + /// - loop strategy: CUBES /// - quadrature rule: Dunavant 2 | points: 3, degree: 2 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 116 144 12 0 0 0 0 1 - void apply_macro_2D( real_t* RESTRICT _data_dstEdge, - real_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_src, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; - /// Kernel type: apply + void apply_P1ToP2ElementwiseGradient_0_0_macro_2D( real_t* RESTRICT _data_dstEdge, + real_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_src, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; + + /// Integral: P1ToP2ElementwiseGradient_0_0 + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: apply + /// - loop strategy: CUBES /// - quadrature rule: Hammer-Marlowe-Stroud 1 | points: 4, degree: 2 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 345 380 36 0 0 0 0 1 - void apply_macro_3D( real_t* RESTRICT _data_dstEdge, - real_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_src, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; - /// Kernel type: toMatrix + void apply_P1ToP2ElementwiseGradient_0_0_macro_3D( real_t* RESTRICT _data_dstEdge, + real_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_src, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; + + /// Integral: P1ToP2ElementwiseGradient_0_0 + /// - volume element: triangle, dim: 2, vertices: 3, spacedim: 2 + /// - kernel type: toMatrix + /// - loop strategy: CUBES /// - quadrature rule: Dunavant 2 | points: 3, degree: 2 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 98 126 12 0 0 0 0 4 - void toMatrix_macro_2D( idx_t* RESTRICT _data_dstEdge, - idx_t* RESTRICT _data_dstVertex, - idx_t* RESTRICT _data_src, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - std::shared_ptr< SparseMatrixProxy > mat, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; - /// Kernel type: toMatrix + void toMatrix_P1ToP2ElementwiseGradient_0_0_macro_2D( idx_t* RESTRICT _data_dstEdge, + idx_t* RESTRICT _data_dstVertex, + idx_t* RESTRICT _data_src, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + std::shared_ptr< SparseMatrixProxy > mat, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; + + /// Integral: P1ToP2ElementwiseGradient_0_0 + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: toMatrix + /// - loop strategy: CUBES /// - quadrature rule: Hammer-Marlowe-Stroud 1 | points: 4, degree: 2 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 305 340 36 0 0 0 0 4 - void toMatrix_macro_3D( idx_t* RESTRICT _data_dstEdge, - idx_t* RESTRICT _data_dstVertex, - idx_t* RESTRICT _data_src, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - std::shared_ptr< SparseMatrixProxy > mat, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; + void toMatrix_P1ToP2ElementwiseGradient_0_0_macro_3D( idx_t* RESTRICT _data_dstEdge, + idx_t* RESTRICT _data_dstVertex, + idx_t* RESTRICT _data_src, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + std::shared_ptr< SparseMatrixProxy > mat, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; }; } // namespace operatorgeneration diff --git a/operators/gradient/P1ToP2ElementwiseGradient_1_0.cpp b/operators/gradient/P1ToP2ElementwiseGradient_1_0.cpp index 9f65cb11..c60d6176 100644 --- a/operators/gradient/P1ToP2ElementwiseGradient_1_0.cpp +++ b/operators/gradient/P1ToP2ElementwiseGradient_1_0.cpp @@ -128,7 +128,7 @@ void P1ToP2ElementwiseGradient_1_0::apply( const P1Function< real_t >& src, this->timingTree_->start( "kernel" ); - apply_macro_3D( + apply_P1ToP2ElementwiseGradient_1_0_macro_3D( _data_dstEdge, _data_dstVertex, @@ -147,6 +147,7 @@ void P1ToP2ElementwiseGradient_1_0::apply( const P1Function< real_t >& src, macro_vertex_coord_id_3comp2, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } @@ -213,7 +214,7 @@ void P1ToP2ElementwiseGradient_1_0::apply( const P1Function< real_t >& src, this->timingTree_->start( "kernel" ); - apply_macro_2D( + apply_P1ToP2ElementwiseGradient_1_0_macro_2D( _data_dstEdge, _data_dstVertex, @@ -226,6 +227,7 @@ void P1ToP2ElementwiseGradient_1_0::apply( const P1Function< real_t >& src, macro_vertex_coord_id_2comp1, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } @@ -291,7 +293,7 @@ void P1ToP2ElementwiseGradient_1_0::toMatrix( const std::shared_ptr< SparseMatri this->timingTree_->start( "kernel" ); - toMatrix_macro_3D( + toMatrix_P1ToP2ElementwiseGradient_1_0_macro_3D( _data_dstEdge, _data_dstVertex, @@ -311,6 +313,7 @@ void P1ToP2ElementwiseGradient_1_0::toMatrix( const std::shared_ptr< SparseMatri mat, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } } @@ -340,7 +343,7 @@ void P1ToP2ElementwiseGradient_1_0::toMatrix( const std::shared_ptr< SparseMatri this->timingTree_->start( "kernel" ); - toMatrix_macro_2D( + toMatrix_P1ToP2ElementwiseGradient_1_0_macro_2D( _data_dstEdge, _data_dstVertex, @@ -354,6 +357,7 @@ void P1ToP2ElementwiseGradient_1_0::toMatrix( const std::shared_ptr< SparseMatri mat, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } } diff --git a/operators/gradient/P1ToP2ElementwiseGradient_1_0.hpp b/operators/gradient/P1ToP2ElementwiseGradient_1_0.hpp index ee0af312..8f976754 100644 --- a/operators/gradient/P1ToP2ElementwiseGradient_1_0.hpp +++ b/operators/gradient/P1ToP2ElementwiseGradient_1_0.hpp @@ -29,6 +29,7 @@ #include "core/DataTypes.h" #include "hyteg/LikwidWrapper.hpp" +#include "hyteg/boundary/BoundaryConditions.hpp" #include "hyteg/communication/Syncing.hpp" #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp" #include "hyteg/operators/Operator.hpp" @@ -36,6 +37,7 @@ #include "hyteg/p2functionspace/P2Function.hpp" #include "hyteg/primitivestorage/PrimitiveStorage.hpp" #include "hyteg/sparseassembly/SparseMatrixProxy.hpp" +#include "hyteg/types/types.hpp" #define FUNC_PREFIX @@ -74,88 +76,107 @@ class P1ToP2ElementwiseGradient_1_0 : public Operator< P1Function< real_t >, P2F protected: private: - /// Kernel type: apply + /// Integral: P1ToP2ElementwiseGradient_1_0 + /// - volume element: triangle, dim: 2, vertices: 3, spacedim: 2 + /// - kernel type: apply + /// - loop strategy: CUBES /// - quadrature rule: Dunavant 2 | points: 3, degree: 2 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 116 144 12 0 0 0 0 1 - void apply_macro_2D( real_t* RESTRICT _data_dstEdge, - real_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_src, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; - /// Kernel type: apply + void apply_P1ToP2ElementwiseGradient_1_0_macro_2D( real_t* RESTRICT _data_dstEdge, + real_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_src, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; + + /// Integral: P1ToP2ElementwiseGradient_1_0 + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: apply + /// - loop strategy: CUBES /// - quadrature rule: Hammer-Marlowe-Stroud 1 | points: 4, degree: 2 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 345 380 36 0 0 0 0 1 - void apply_macro_3D( real_t* RESTRICT _data_dstEdge, - real_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_src, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; - /// Kernel type: toMatrix + void apply_P1ToP2ElementwiseGradient_1_0_macro_3D( real_t* RESTRICT _data_dstEdge, + real_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_src, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; + + /// Integral: P1ToP2ElementwiseGradient_1_0 + /// - volume element: triangle, dim: 2, vertices: 3, spacedim: 2 + /// - kernel type: toMatrix + /// - loop strategy: CUBES /// - quadrature rule: Dunavant 2 | points: 3, degree: 2 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 98 126 12 0 0 0 0 4 - void toMatrix_macro_2D( idx_t* RESTRICT _data_dstEdge, - idx_t* RESTRICT _data_dstVertex, - idx_t* RESTRICT _data_src, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - std::shared_ptr< SparseMatrixProxy > mat, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; - /// Kernel type: toMatrix + void toMatrix_P1ToP2ElementwiseGradient_1_0_macro_2D( idx_t* RESTRICT _data_dstEdge, + idx_t* RESTRICT _data_dstVertex, + idx_t* RESTRICT _data_src, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + std::shared_ptr< SparseMatrixProxy > mat, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; + + /// Integral: P1ToP2ElementwiseGradient_1_0 + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: toMatrix + /// - loop strategy: CUBES /// - quadrature rule: Hammer-Marlowe-Stroud 1 | points: 4, degree: 2 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 305 340 36 0 0 0 0 4 - void toMatrix_macro_3D( idx_t* RESTRICT _data_dstEdge, - idx_t* RESTRICT _data_dstVertex, - idx_t* RESTRICT _data_src, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - std::shared_ptr< SparseMatrixProxy > mat, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; + void toMatrix_P1ToP2ElementwiseGradient_1_0_macro_3D( idx_t* RESTRICT _data_dstEdge, + idx_t* RESTRICT _data_dstVertex, + idx_t* RESTRICT _data_src, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + std::shared_ptr< SparseMatrixProxy > mat, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; }; } // namespace operatorgeneration diff --git a/operators/gradient/P1ToP2ElementwiseGradient_2_0.cpp b/operators/gradient/P1ToP2ElementwiseGradient_2_0.cpp index ffa952cc..80d01f0c 100644 --- a/operators/gradient/P1ToP2ElementwiseGradient_2_0.cpp +++ b/operators/gradient/P1ToP2ElementwiseGradient_2_0.cpp @@ -128,7 +128,7 @@ void P1ToP2ElementwiseGradient_2_0::apply( const P1Function< real_t >& src, this->timingTree_->start( "kernel" ); - apply_macro_3D( + apply_P1ToP2ElementwiseGradient_2_0_macro_3D( _data_dstEdge, _data_dstVertex, @@ -147,6 +147,7 @@ void P1ToP2ElementwiseGradient_2_0::apply( const P1Function< real_t >& src, macro_vertex_coord_id_3comp2, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } @@ -220,7 +221,7 @@ void P1ToP2ElementwiseGradient_2_0::toMatrix( const std::shared_ptr< SparseMatri this->timingTree_->start( "kernel" ); - toMatrix_macro_3D( + toMatrix_P1ToP2ElementwiseGradient_2_0_macro_3D( _data_dstEdge, _data_dstVertex, @@ -240,6 +241,7 @@ void P1ToP2ElementwiseGradient_2_0::toMatrix( const std::shared_ptr< SparseMatri mat, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } } diff --git a/operators/gradient/P1ToP2ElementwiseGradient_2_0.hpp b/operators/gradient/P1ToP2ElementwiseGradient_2_0.hpp index 7f518871..810b24c2 100644 --- a/operators/gradient/P1ToP2ElementwiseGradient_2_0.hpp +++ b/operators/gradient/P1ToP2ElementwiseGradient_2_0.hpp @@ -29,6 +29,7 @@ #include "core/DataTypes.h" #include "hyteg/LikwidWrapper.hpp" +#include "hyteg/boundary/BoundaryConditions.hpp" #include "hyteg/communication/Syncing.hpp" #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp" #include "hyteg/operators/Operator.hpp" @@ -36,6 +37,7 @@ #include "hyteg/p2functionspace/P2Function.hpp" #include "hyteg/primitivestorage/PrimitiveStorage.hpp" #include "hyteg/sparseassembly/SparseMatrixProxy.hpp" +#include "hyteg/types/types.hpp" #define FUNC_PREFIX @@ -74,53 +76,62 @@ class P1ToP2ElementwiseGradient_2_0 : public Operator< P1Function< real_t >, P2F protected: private: - /// Kernel type: apply + /// Integral: P1ToP2ElementwiseGradient_2_0 + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: apply + /// - loop strategy: CUBES /// - quadrature rule: Hammer-Marlowe-Stroud 1 | points: 4, degree: 2 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 345 380 36 0 0 0 0 1 - void apply_macro_3D( real_t* RESTRICT _data_dstEdge, - real_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_src, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; - /// Kernel type: toMatrix + void apply_P1ToP2ElementwiseGradient_2_0_macro_3D( real_t* RESTRICT _data_dstEdge, + real_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_src, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; + + /// Integral: P1ToP2ElementwiseGradient_2_0 + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: toMatrix + /// - loop strategy: CUBES /// - quadrature rule: Hammer-Marlowe-Stroud 1 | points: 4, degree: 2 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 305 340 36 0 0 0 0 4 - void toMatrix_macro_3D( idx_t* RESTRICT _data_dstEdge, - idx_t* RESTRICT _data_dstVertex, - idx_t* RESTRICT _data_src, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - std::shared_ptr< SparseMatrixProxy > mat, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; + void toMatrix_P1ToP2ElementwiseGradient_2_0_macro_3D( idx_t* RESTRICT _data_dstEdge, + idx_t* RESTRICT _data_dstVertex, + idx_t* RESTRICT _data_src, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + std::shared_ptr< SparseMatrixProxy > mat, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; }; } // namespace operatorgeneration diff --git a/operators/gradient/avx/P1ToP2ElementwiseGradientAnnulusMap_0_0_apply_macro_2D.cpp b/operators/gradient/avx/P1ToP2ElementwiseGradientAnnulusMap_0_0_apply_P1ToP2ElementwiseGradientAnnulusMap_0_0_macro_2D.cpp similarity index 99% rename from operators/gradient/avx/P1ToP2ElementwiseGradientAnnulusMap_0_0_apply_macro_2D.cpp rename to operators/gradient/avx/P1ToP2ElementwiseGradientAnnulusMap_0_0_apply_P1ToP2ElementwiseGradientAnnulusMap_0_0_macro_2D.cpp index 56fce1cf..68d512a9 100644 --- a/operators/gradient/avx/P1ToP2ElementwiseGradientAnnulusMap_0_0_apply_macro_2D.cpp +++ b/operators/gradient/avx/P1ToP2ElementwiseGradientAnnulusMap_0_0_apply_P1ToP2ElementwiseGradientAnnulusMap_0_0_macro_2D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P1ToP2ElementwiseGradientAnnulusMap_0_0::apply_macro_2D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const +void P1ToP2ElementwiseGradientAnnulusMap_0_0::apply_P1ToP2ElementwiseGradientAnnulusMap_0_0_macro_2D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const { { const real_t tmp_coords_jac_0_GRAY = 1.0 / (micro_edges_per_macro_edge_float)*1.0; diff --git a/operators/gradient/avx/P1ToP2ElementwiseGradientAnnulusMap_1_0_apply_macro_2D.cpp b/operators/gradient/avx/P1ToP2ElementwiseGradientAnnulusMap_1_0_apply_P1ToP2ElementwiseGradientAnnulusMap_1_0_macro_2D.cpp similarity index 99% rename from operators/gradient/avx/P1ToP2ElementwiseGradientAnnulusMap_1_0_apply_macro_2D.cpp rename to operators/gradient/avx/P1ToP2ElementwiseGradientAnnulusMap_1_0_apply_P1ToP2ElementwiseGradientAnnulusMap_1_0_macro_2D.cpp index 47d6b55f..ac6c2dca 100644 --- a/operators/gradient/avx/P1ToP2ElementwiseGradientAnnulusMap_1_0_apply_macro_2D.cpp +++ b/operators/gradient/avx/P1ToP2ElementwiseGradientAnnulusMap_1_0_apply_P1ToP2ElementwiseGradientAnnulusMap_1_0_macro_2D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P1ToP2ElementwiseGradientAnnulusMap_1_0::apply_macro_2D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const +void P1ToP2ElementwiseGradientAnnulusMap_1_0::apply_P1ToP2ElementwiseGradientAnnulusMap_1_0_macro_2D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const { { const real_t tmp_coords_jac_0_GRAY = 1.0 / (micro_edges_per_macro_edge_float)*1.0; diff --git a/operators/gradient/avx/P1ToP2ElementwiseGradientIcosahedralShellMap_0_0_apply_macro_3D.cpp b/operators/gradient/avx/P1ToP2ElementwiseGradientIcosahedralShellMap_0_0_apply_P1ToP2ElementwiseGradientIcosahedralShellMap_0_0_macro_3D.cpp similarity index 99% rename from operators/gradient/avx/P1ToP2ElementwiseGradientIcosahedralShellMap_0_0_apply_macro_3D.cpp rename to operators/gradient/avx/P1ToP2ElementwiseGradientIcosahedralShellMap_0_0_apply_P1ToP2ElementwiseGradientIcosahedralShellMap_0_0_macro_3D.cpp index 657796e4..f08adf58 100644 --- a/operators/gradient/avx/P1ToP2ElementwiseGradientIcosahedralShellMap_0_0_apply_macro_3D.cpp +++ b/operators/gradient/avx/P1ToP2ElementwiseGradientIcosahedralShellMap_0_0_apply_P1ToP2ElementwiseGradientIcosahedralShellMap_0_0_macro_3D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P1ToP2ElementwiseGradientIcosahedralShellMap_0_0::apply_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_src, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const +void P1ToP2ElementwiseGradientIcosahedralShellMap_0_0::apply_P1ToP2ElementwiseGradientIcosahedralShellMap_0_0_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_src, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const { { const real_t tmp_coords_jac_0_WHITE_UP = 1.0 / (micro_edges_per_macro_edge_float)*1.0; diff --git a/operators/gradient/avx/P1ToP2ElementwiseGradientIcosahedralShellMap_1_0_apply_macro_3D.cpp b/operators/gradient/avx/P1ToP2ElementwiseGradientIcosahedralShellMap_1_0_apply_P1ToP2ElementwiseGradientIcosahedralShellMap_1_0_macro_3D.cpp similarity index 99% rename from operators/gradient/avx/P1ToP2ElementwiseGradientIcosahedralShellMap_1_0_apply_macro_3D.cpp rename to operators/gradient/avx/P1ToP2ElementwiseGradientIcosahedralShellMap_1_0_apply_P1ToP2ElementwiseGradientIcosahedralShellMap_1_0_macro_3D.cpp index d94aa200..2b92e6b7 100644 --- a/operators/gradient/avx/P1ToP2ElementwiseGradientIcosahedralShellMap_1_0_apply_macro_3D.cpp +++ b/operators/gradient/avx/P1ToP2ElementwiseGradientIcosahedralShellMap_1_0_apply_P1ToP2ElementwiseGradientIcosahedralShellMap_1_0_macro_3D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P1ToP2ElementwiseGradientIcosahedralShellMap_1_0::apply_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_src, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const +void P1ToP2ElementwiseGradientIcosahedralShellMap_1_0::apply_P1ToP2ElementwiseGradientIcosahedralShellMap_1_0_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_src, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const { { const real_t tmp_coords_jac_0_WHITE_UP = 1.0 / (micro_edges_per_macro_edge_float)*1.0; diff --git a/operators/gradient/avx/P1ToP2ElementwiseGradientIcosahedralShellMap_2_0_apply_macro_3D.cpp b/operators/gradient/avx/P1ToP2ElementwiseGradientIcosahedralShellMap_2_0_apply_P1ToP2ElementwiseGradientIcosahedralShellMap_2_0_macro_3D.cpp similarity index 99% rename from operators/gradient/avx/P1ToP2ElementwiseGradientIcosahedralShellMap_2_0_apply_macro_3D.cpp rename to operators/gradient/avx/P1ToP2ElementwiseGradientIcosahedralShellMap_2_0_apply_P1ToP2ElementwiseGradientIcosahedralShellMap_2_0_macro_3D.cpp index 808dafe1..f0e17ec3 100644 --- a/operators/gradient/avx/P1ToP2ElementwiseGradientIcosahedralShellMap_2_0_apply_macro_3D.cpp +++ b/operators/gradient/avx/P1ToP2ElementwiseGradientIcosahedralShellMap_2_0_apply_P1ToP2ElementwiseGradientIcosahedralShellMap_2_0_macro_3D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P1ToP2ElementwiseGradientIcosahedralShellMap_2_0::apply_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_src, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const +void P1ToP2ElementwiseGradientIcosahedralShellMap_2_0::apply_P1ToP2ElementwiseGradientIcosahedralShellMap_2_0_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_src, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const { { const real_t tmp_coords_jac_0_WHITE_UP = 1.0 / (micro_edges_per_macro_edge_float)*1.0; diff --git a/operators/gradient/avx/P1ToP2ElementwiseGradient_0_0_apply_macro_2D.cpp b/operators/gradient/avx/P1ToP2ElementwiseGradient_0_0_apply_P1ToP2ElementwiseGradient_0_0_macro_2D.cpp similarity index 99% rename from operators/gradient/avx/P1ToP2ElementwiseGradient_0_0_apply_macro_2D.cpp rename to operators/gradient/avx/P1ToP2ElementwiseGradient_0_0_apply_P1ToP2ElementwiseGradient_0_0_macro_2D.cpp index 2cea1726..0b4dbee5 100644 --- a/operators/gradient/avx/P1ToP2ElementwiseGradient_0_0_apply_macro_2D.cpp +++ b/operators/gradient/avx/P1ToP2ElementwiseGradient_0_0_apply_P1ToP2ElementwiseGradient_0_0_macro_2D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P1ToP2ElementwiseGradient_0_0::apply_macro_2D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P1ToP2ElementwiseGradient_0_0::apply_P1ToP2ElementwiseGradient_0_0_macro_2D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t tmp_coords_jac_0_BLUE = 1.0 / (micro_edges_per_macro_edge_float)*1.0; diff --git a/operators/gradient/avx/P1ToP2ElementwiseGradient_0_0_apply_macro_3D.cpp b/operators/gradient/avx/P1ToP2ElementwiseGradient_0_0_apply_P1ToP2ElementwiseGradient_0_0_macro_3D.cpp similarity index 99% rename from operators/gradient/avx/P1ToP2ElementwiseGradient_0_0_apply_macro_3D.cpp rename to operators/gradient/avx/P1ToP2ElementwiseGradient_0_0_apply_P1ToP2ElementwiseGradient_0_0_macro_3D.cpp index 38b98cf7..25e43913 100644 --- a/operators/gradient/avx/P1ToP2ElementwiseGradient_0_0_apply_macro_3D.cpp +++ b/operators/gradient/avx/P1ToP2ElementwiseGradient_0_0_apply_P1ToP2ElementwiseGradient_0_0_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P1ToP2ElementwiseGradient_0_0::apply_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P1ToP2ElementwiseGradient_0_0::apply_P1ToP2ElementwiseGradient_0_0_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t tmp_coords_jac_0_GREEN_DOWN = 1.0 / (micro_edges_per_macro_edge_float)*1.0; diff --git a/operators/gradient/avx/P1ToP2ElementwiseGradient_1_0_apply_macro_2D.cpp b/operators/gradient/avx/P1ToP2ElementwiseGradient_1_0_apply_P1ToP2ElementwiseGradient_1_0_macro_2D.cpp similarity index 99% rename from operators/gradient/avx/P1ToP2ElementwiseGradient_1_0_apply_macro_2D.cpp rename to operators/gradient/avx/P1ToP2ElementwiseGradient_1_0_apply_P1ToP2ElementwiseGradient_1_0_macro_2D.cpp index e446fff4..1eb2891d 100644 --- a/operators/gradient/avx/P1ToP2ElementwiseGradient_1_0_apply_macro_2D.cpp +++ b/operators/gradient/avx/P1ToP2ElementwiseGradient_1_0_apply_P1ToP2ElementwiseGradient_1_0_macro_2D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P1ToP2ElementwiseGradient_1_0::apply_macro_2D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P1ToP2ElementwiseGradient_1_0::apply_P1ToP2ElementwiseGradient_1_0_macro_2D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t tmp_coords_jac_0_BLUE = 1.0 / (micro_edges_per_macro_edge_float)*1.0; diff --git a/operators/gradient/avx/P1ToP2ElementwiseGradient_1_0_apply_macro_3D.cpp b/operators/gradient/avx/P1ToP2ElementwiseGradient_1_0_apply_P1ToP2ElementwiseGradient_1_0_macro_3D.cpp similarity index 99% rename from operators/gradient/avx/P1ToP2ElementwiseGradient_1_0_apply_macro_3D.cpp rename to operators/gradient/avx/P1ToP2ElementwiseGradient_1_0_apply_P1ToP2ElementwiseGradient_1_0_macro_3D.cpp index 3225debc..cef4dfa4 100644 --- a/operators/gradient/avx/P1ToP2ElementwiseGradient_1_0_apply_macro_3D.cpp +++ b/operators/gradient/avx/P1ToP2ElementwiseGradient_1_0_apply_P1ToP2ElementwiseGradient_1_0_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P1ToP2ElementwiseGradient_1_0::apply_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P1ToP2ElementwiseGradient_1_0::apply_P1ToP2ElementwiseGradient_1_0_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t tmp_coords_jac_0_GREEN_DOWN = 1.0 / (micro_edges_per_macro_edge_float)*1.0; diff --git a/operators/gradient/avx/P1ToP2ElementwiseGradient_2_0_apply_macro_3D.cpp b/operators/gradient/avx/P1ToP2ElementwiseGradient_2_0_apply_P1ToP2ElementwiseGradient_2_0_macro_3D.cpp similarity index 99% rename from operators/gradient/avx/P1ToP2ElementwiseGradient_2_0_apply_macro_3D.cpp rename to operators/gradient/avx/P1ToP2ElementwiseGradient_2_0_apply_P1ToP2ElementwiseGradient_2_0_macro_3D.cpp index 25c56d96..042040bb 100644 --- a/operators/gradient/avx/P1ToP2ElementwiseGradient_2_0_apply_macro_3D.cpp +++ b/operators/gradient/avx/P1ToP2ElementwiseGradient_2_0_apply_P1ToP2ElementwiseGradient_2_0_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P1ToP2ElementwiseGradient_2_0::apply_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P1ToP2ElementwiseGradient_2_0::apply_P1ToP2ElementwiseGradient_2_0_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t tmp_coords_jac_0_GREEN_DOWN = 1.0 / (micro_edges_per_macro_edge_float)*1.0; diff --git a/operators/gradient/noarch/P1ToP2ElementwiseGradientAnnulusMap_0_0_apply_macro_2D.cpp b/operators/gradient/noarch/P1ToP2ElementwiseGradientAnnulusMap_0_0_apply_P1ToP2ElementwiseGradientAnnulusMap_0_0_macro_2D.cpp similarity index 98% rename from operators/gradient/noarch/P1ToP2ElementwiseGradientAnnulusMap_0_0_apply_macro_2D.cpp rename to operators/gradient/noarch/P1ToP2ElementwiseGradientAnnulusMap_0_0_apply_P1ToP2ElementwiseGradientAnnulusMap_0_0_macro_2D.cpp index b4312145..f935d21a 100644 --- a/operators/gradient/noarch/P1ToP2ElementwiseGradientAnnulusMap_0_0_apply_macro_2D.cpp +++ b/operators/gradient/noarch/P1ToP2ElementwiseGradientAnnulusMap_0_0_apply_P1ToP2ElementwiseGradientAnnulusMap_0_0_macro_2D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P1ToP2ElementwiseGradientAnnulusMap_0_0::apply_macro_2D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const +void P1ToP2ElementwiseGradientAnnulusMap_0_0::apply_P1ToP2ElementwiseGradientAnnulusMap_0_0_macro_2D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const { { const real_t tmp_coords_jac_0_GRAY = 1.0 / (micro_edges_per_macro_edge_float)*1.0; diff --git a/operators/gradient/noarch/P1ToP2ElementwiseGradientAnnulusMap_0_0_toMatrix_macro_2D.cpp b/operators/gradient/noarch/P1ToP2ElementwiseGradientAnnulusMap_0_0_toMatrix_P1ToP2ElementwiseGradientAnnulusMap_0_0_macro_2D.cpp similarity index 98% rename from operators/gradient/noarch/P1ToP2ElementwiseGradientAnnulusMap_0_0_toMatrix_macro_2D.cpp rename to operators/gradient/noarch/P1ToP2ElementwiseGradientAnnulusMap_0_0_toMatrix_P1ToP2ElementwiseGradientAnnulusMap_0_0_macro_2D.cpp index 28bd7239..9588c7cd 100644 --- a/operators/gradient/noarch/P1ToP2ElementwiseGradientAnnulusMap_0_0_toMatrix_macro_2D.cpp +++ b/operators/gradient/noarch/P1ToP2ElementwiseGradientAnnulusMap_0_0_toMatrix_P1ToP2ElementwiseGradientAnnulusMap_0_0_macro_2D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P1ToP2ElementwiseGradientAnnulusMap_0_0::toMatrix_macro_2D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, idx_t * RESTRICT _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const +void P1ToP2ElementwiseGradientAnnulusMap_0_0::toMatrix_P1ToP2ElementwiseGradientAnnulusMap_0_0_macro_2D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, idx_t * RESTRICT _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const { { const real_t tmp_coords_jac_0_GRAY = 1.0 / (micro_edges_per_macro_edge_float)*1.0; diff --git a/operators/gradient/noarch/P1ToP2ElementwiseGradientAnnulusMap_1_0_apply_macro_2D.cpp b/operators/gradient/noarch/P1ToP2ElementwiseGradientAnnulusMap_1_0_apply_P1ToP2ElementwiseGradientAnnulusMap_1_0_macro_2D.cpp similarity index 98% rename from operators/gradient/noarch/P1ToP2ElementwiseGradientAnnulusMap_1_0_apply_macro_2D.cpp rename to operators/gradient/noarch/P1ToP2ElementwiseGradientAnnulusMap_1_0_apply_P1ToP2ElementwiseGradientAnnulusMap_1_0_macro_2D.cpp index 60b23b46..6288dadc 100644 --- a/operators/gradient/noarch/P1ToP2ElementwiseGradientAnnulusMap_1_0_apply_macro_2D.cpp +++ b/operators/gradient/noarch/P1ToP2ElementwiseGradientAnnulusMap_1_0_apply_P1ToP2ElementwiseGradientAnnulusMap_1_0_macro_2D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P1ToP2ElementwiseGradientAnnulusMap_1_0::apply_macro_2D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const +void P1ToP2ElementwiseGradientAnnulusMap_1_0::apply_P1ToP2ElementwiseGradientAnnulusMap_1_0_macro_2D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const { { const real_t tmp_coords_jac_0_GRAY = 1.0 / (micro_edges_per_macro_edge_float)*1.0; diff --git a/operators/gradient/noarch/P1ToP2ElementwiseGradientAnnulusMap_1_0_toMatrix_macro_2D.cpp b/operators/gradient/noarch/P1ToP2ElementwiseGradientAnnulusMap_1_0_toMatrix_P1ToP2ElementwiseGradientAnnulusMap_1_0_macro_2D.cpp similarity index 98% rename from operators/gradient/noarch/P1ToP2ElementwiseGradientAnnulusMap_1_0_toMatrix_macro_2D.cpp rename to operators/gradient/noarch/P1ToP2ElementwiseGradientAnnulusMap_1_0_toMatrix_P1ToP2ElementwiseGradientAnnulusMap_1_0_macro_2D.cpp index ce869eef..9d12e824 100644 --- a/operators/gradient/noarch/P1ToP2ElementwiseGradientAnnulusMap_1_0_toMatrix_macro_2D.cpp +++ b/operators/gradient/noarch/P1ToP2ElementwiseGradientAnnulusMap_1_0_toMatrix_P1ToP2ElementwiseGradientAnnulusMap_1_0_macro_2D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P1ToP2ElementwiseGradientAnnulusMap_1_0::toMatrix_macro_2D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, idx_t * RESTRICT _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const +void P1ToP2ElementwiseGradientAnnulusMap_1_0::toMatrix_P1ToP2ElementwiseGradientAnnulusMap_1_0_macro_2D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, idx_t * RESTRICT _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const { { const real_t tmp_coords_jac_0_GRAY = 1.0 / (micro_edges_per_macro_edge_float)*1.0; diff --git a/operators/gradient/noarch/P1ToP2ElementwiseGradientIcosahedralShellMap_0_0_apply_macro_3D.cpp b/operators/gradient/noarch/P1ToP2ElementwiseGradientIcosahedralShellMap_0_0_apply_P1ToP2ElementwiseGradientIcosahedralShellMap_0_0_macro_3D.cpp similarity index 99% rename from operators/gradient/noarch/P1ToP2ElementwiseGradientIcosahedralShellMap_0_0_apply_macro_3D.cpp rename to operators/gradient/noarch/P1ToP2ElementwiseGradientIcosahedralShellMap_0_0_apply_P1ToP2ElementwiseGradientIcosahedralShellMap_0_0_macro_3D.cpp index d61d2c99..2eab9008 100644 --- a/operators/gradient/noarch/P1ToP2ElementwiseGradientIcosahedralShellMap_0_0_apply_macro_3D.cpp +++ b/operators/gradient/noarch/P1ToP2ElementwiseGradientIcosahedralShellMap_0_0_apply_P1ToP2ElementwiseGradientIcosahedralShellMap_0_0_macro_3D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P1ToP2ElementwiseGradientIcosahedralShellMap_0_0::apply_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_src, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const +void P1ToP2ElementwiseGradientIcosahedralShellMap_0_0::apply_P1ToP2ElementwiseGradientIcosahedralShellMap_0_0_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_src, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const { { const real_t tmp_coords_jac_0_WHITE_UP = 1.0 / (micro_edges_per_macro_edge_float)*1.0; diff --git a/operators/gradient/noarch/P1ToP2ElementwiseGradientIcosahedralShellMap_0_0_toMatrix_macro_3D.cpp b/operators/gradient/noarch/P1ToP2ElementwiseGradientIcosahedralShellMap_0_0_toMatrix_P1ToP2ElementwiseGradientIcosahedralShellMap_0_0_macro_3D.cpp similarity index 99% rename from operators/gradient/noarch/P1ToP2ElementwiseGradientIcosahedralShellMap_0_0_toMatrix_macro_3D.cpp rename to operators/gradient/noarch/P1ToP2ElementwiseGradientIcosahedralShellMap_0_0_toMatrix_P1ToP2ElementwiseGradientIcosahedralShellMap_0_0_macro_3D.cpp index 41e1786a..254485aa 100644 --- a/operators/gradient/noarch/P1ToP2ElementwiseGradientIcosahedralShellMap_0_0_toMatrix_macro_3D.cpp +++ b/operators/gradient/noarch/P1ToP2ElementwiseGradientIcosahedralShellMap_0_0_toMatrix_P1ToP2ElementwiseGradientIcosahedralShellMap_0_0_macro_3D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P1ToP2ElementwiseGradientIcosahedralShellMap_0_0::toMatrix_macro_3D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, idx_t * RESTRICT _data_src, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const +void P1ToP2ElementwiseGradientIcosahedralShellMap_0_0::toMatrix_P1ToP2ElementwiseGradientIcosahedralShellMap_0_0_macro_3D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, idx_t * RESTRICT _data_src, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const { { const real_t tmp_coords_jac_0_WHITE_UP = 1.0 / (micro_edges_per_macro_edge_float)*1.0; diff --git a/operators/gradient/noarch/P1ToP2ElementwiseGradientIcosahedralShellMap_1_0_apply_macro_3D.cpp b/operators/gradient/noarch/P1ToP2ElementwiseGradientIcosahedralShellMap_1_0_apply_P1ToP2ElementwiseGradientIcosahedralShellMap_1_0_macro_3D.cpp similarity index 99% rename from operators/gradient/noarch/P1ToP2ElementwiseGradientIcosahedralShellMap_1_0_apply_macro_3D.cpp rename to operators/gradient/noarch/P1ToP2ElementwiseGradientIcosahedralShellMap_1_0_apply_P1ToP2ElementwiseGradientIcosahedralShellMap_1_0_macro_3D.cpp index 14ec97f8..5b3a5aa4 100644 --- a/operators/gradient/noarch/P1ToP2ElementwiseGradientIcosahedralShellMap_1_0_apply_macro_3D.cpp +++ b/operators/gradient/noarch/P1ToP2ElementwiseGradientIcosahedralShellMap_1_0_apply_P1ToP2ElementwiseGradientIcosahedralShellMap_1_0_macro_3D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P1ToP2ElementwiseGradientIcosahedralShellMap_1_0::apply_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_src, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const +void P1ToP2ElementwiseGradientIcosahedralShellMap_1_0::apply_P1ToP2ElementwiseGradientIcosahedralShellMap_1_0_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_src, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const { { const real_t tmp_coords_jac_0_WHITE_UP = 1.0 / (micro_edges_per_macro_edge_float)*1.0; diff --git a/operators/gradient/noarch/P1ToP2ElementwiseGradientIcosahedralShellMap_1_0_toMatrix_macro_3D.cpp b/operators/gradient/noarch/P1ToP2ElementwiseGradientIcosahedralShellMap_1_0_toMatrix_P1ToP2ElementwiseGradientIcosahedralShellMap_1_0_macro_3D.cpp similarity index 99% rename from operators/gradient/noarch/P1ToP2ElementwiseGradientIcosahedralShellMap_1_0_toMatrix_macro_3D.cpp rename to operators/gradient/noarch/P1ToP2ElementwiseGradientIcosahedralShellMap_1_0_toMatrix_P1ToP2ElementwiseGradientIcosahedralShellMap_1_0_macro_3D.cpp index 1cc81709..3b0b1eb0 100644 --- a/operators/gradient/noarch/P1ToP2ElementwiseGradientIcosahedralShellMap_1_0_toMatrix_macro_3D.cpp +++ b/operators/gradient/noarch/P1ToP2ElementwiseGradientIcosahedralShellMap_1_0_toMatrix_P1ToP2ElementwiseGradientIcosahedralShellMap_1_0_macro_3D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P1ToP2ElementwiseGradientIcosahedralShellMap_1_0::toMatrix_macro_3D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, idx_t * RESTRICT _data_src, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const +void P1ToP2ElementwiseGradientIcosahedralShellMap_1_0::toMatrix_P1ToP2ElementwiseGradientIcosahedralShellMap_1_0_macro_3D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, idx_t * RESTRICT _data_src, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const { { const real_t tmp_coords_jac_0_WHITE_UP = 1.0 / (micro_edges_per_macro_edge_float)*1.0; diff --git a/operators/gradient/noarch/P1ToP2ElementwiseGradientIcosahedralShellMap_2_0_apply_macro_3D.cpp b/operators/gradient/noarch/P1ToP2ElementwiseGradientIcosahedralShellMap_2_0_apply_P1ToP2ElementwiseGradientIcosahedralShellMap_2_0_macro_3D.cpp similarity index 99% rename from operators/gradient/noarch/P1ToP2ElementwiseGradientIcosahedralShellMap_2_0_apply_macro_3D.cpp rename to operators/gradient/noarch/P1ToP2ElementwiseGradientIcosahedralShellMap_2_0_apply_P1ToP2ElementwiseGradientIcosahedralShellMap_2_0_macro_3D.cpp index d45cc629..a0a11ee9 100644 --- a/operators/gradient/noarch/P1ToP2ElementwiseGradientIcosahedralShellMap_2_0_apply_macro_3D.cpp +++ b/operators/gradient/noarch/P1ToP2ElementwiseGradientIcosahedralShellMap_2_0_apply_P1ToP2ElementwiseGradientIcosahedralShellMap_2_0_macro_3D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P1ToP2ElementwiseGradientIcosahedralShellMap_2_0::apply_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_src, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const +void P1ToP2ElementwiseGradientIcosahedralShellMap_2_0::apply_P1ToP2ElementwiseGradientIcosahedralShellMap_2_0_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_src, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const { { const real_t tmp_coords_jac_0_WHITE_UP = 1.0 / (micro_edges_per_macro_edge_float)*1.0; diff --git a/operators/gradient/noarch/P1ToP2ElementwiseGradientIcosahedralShellMap_2_0_toMatrix_macro_3D.cpp b/operators/gradient/noarch/P1ToP2ElementwiseGradientIcosahedralShellMap_2_0_toMatrix_P1ToP2ElementwiseGradientIcosahedralShellMap_2_0_macro_3D.cpp similarity index 99% rename from operators/gradient/noarch/P1ToP2ElementwiseGradientIcosahedralShellMap_2_0_toMatrix_macro_3D.cpp rename to operators/gradient/noarch/P1ToP2ElementwiseGradientIcosahedralShellMap_2_0_toMatrix_P1ToP2ElementwiseGradientIcosahedralShellMap_2_0_macro_3D.cpp index 980a97ef..3f146ad5 100644 --- a/operators/gradient/noarch/P1ToP2ElementwiseGradientIcosahedralShellMap_2_0_toMatrix_macro_3D.cpp +++ b/operators/gradient/noarch/P1ToP2ElementwiseGradientIcosahedralShellMap_2_0_toMatrix_P1ToP2ElementwiseGradientIcosahedralShellMap_2_0_macro_3D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P1ToP2ElementwiseGradientIcosahedralShellMap_2_0::toMatrix_macro_3D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, idx_t * RESTRICT _data_src, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const +void P1ToP2ElementwiseGradientIcosahedralShellMap_2_0::toMatrix_P1ToP2ElementwiseGradientIcosahedralShellMap_2_0_macro_3D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, idx_t * RESTRICT _data_src, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const { { const real_t tmp_coords_jac_0_WHITE_UP = 1.0 / (micro_edges_per_macro_edge_float)*1.0; diff --git a/operators/gradient/noarch/P1ToP2ElementwiseGradient_0_0_apply_macro_2D.cpp b/operators/gradient/noarch/P1ToP2ElementwiseGradient_0_0_apply_P1ToP2ElementwiseGradient_0_0_macro_2D.cpp similarity index 98% rename from operators/gradient/noarch/P1ToP2ElementwiseGradient_0_0_apply_macro_2D.cpp rename to operators/gradient/noarch/P1ToP2ElementwiseGradient_0_0_apply_P1ToP2ElementwiseGradient_0_0_macro_2D.cpp index d71bcbed..f2b2c2e1 100644 --- a/operators/gradient/noarch/P1ToP2ElementwiseGradient_0_0_apply_macro_2D.cpp +++ b/operators/gradient/noarch/P1ToP2ElementwiseGradient_0_0_apply_P1ToP2ElementwiseGradient_0_0_macro_2D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P1ToP2ElementwiseGradient_0_0::apply_macro_2D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P1ToP2ElementwiseGradient_0_0::apply_P1ToP2ElementwiseGradient_0_0_macro_2D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t tmp_coords_jac_0_BLUE = 1.0 / (micro_edges_per_macro_edge_float)*1.0; diff --git a/operators/gradient/noarch/P1ToP2ElementwiseGradient_0_0_apply_macro_3D.cpp b/operators/gradient/noarch/P1ToP2ElementwiseGradient_0_0_apply_P1ToP2ElementwiseGradient_0_0_macro_3D.cpp similarity index 99% rename from operators/gradient/noarch/P1ToP2ElementwiseGradient_0_0_apply_macro_3D.cpp rename to operators/gradient/noarch/P1ToP2ElementwiseGradient_0_0_apply_P1ToP2ElementwiseGradient_0_0_macro_3D.cpp index c67eeed4..1c9b0439 100644 --- a/operators/gradient/noarch/P1ToP2ElementwiseGradient_0_0_apply_macro_3D.cpp +++ b/operators/gradient/noarch/P1ToP2ElementwiseGradient_0_0_apply_P1ToP2ElementwiseGradient_0_0_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P1ToP2ElementwiseGradient_0_0::apply_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P1ToP2ElementwiseGradient_0_0::apply_P1ToP2ElementwiseGradient_0_0_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t tmp_coords_jac_0_GREEN_DOWN = 1.0 / (micro_edges_per_macro_edge_float)*1.0; diff --git a/operators/gradient/noarch/P1ToP2ElementwiseGradient_0_0_toMatrix_macro_2D.cpp b/operators/gradient/noarch/P1ToP2ElementwiseGradient_0_0_toMatrix_P1ToP2ElementwiseGradient_0_0_macro_2D.cpp similarity index 98% rename from operators/gradient/noarch/P1ToP2ElementwiseGradient_0_0_toMatrix_macro_2D.cpp rename to operators/gradient/noarch/P1ToP2ElementwiseGradient_0_0_toMatrix_P1ToP2ElementwiseGradient_0_0_macro_2D.cpp index f2d8391d..e5020c45 100644 --- a/operators/gradient/noarch/P1ToP2ElementwiseGradient_0_0_toMatrix_macro_2D.cpp +++ b/operators/gradient/noarch/P1ToP2ElementwiseGradient_0_0_toMatrix_P1ToP2ElementwiseGradient_0_0_macro_2D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P1ToP2ElementwiseGradient_0_0::toMatrix_macro_2D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, idx_t * RESTRICT _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P1ToP2ElementwiseGradient_0_0::toMatrix_P1ToP2ElementwiseGradient_0_0_macro_2D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, idx_t * RESTRICT _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t tmp_coords_jac_0_BLUE = 1.0 / (micro_edges_per_macro_edge_float)*1.0; diff --git a/operators/gradient/noarch/P1ToP2ElementwiseGradient_0_0_toMatrix_macro_3D.cpp b/operators/gradient/noarch/P1ToP2ElementwiseGradient_0_0_toMatrix_P1ToP2ElementwiseGradient_0_0_macro_3D.cpp similarity index 99% rename from operators/gradient/noarch/P1ToP2ElementwiseGradient_0_0_toMatrix_macro_3D.cpp rename to operators/gradient/noarch/P1ToP2ElementwiseGradient_0_0_toMatrix_P1ToP2ElementwiseGradient_0_0_macro_3D.cpp index 068e344b..32469538 100644 --- a/operators/gradient/noarch/P1ToP2ElementwiseGradient_0_0_toMatrix_macro_3D.cpp +++ b/operators/gradient/noarch/P1ToP2ElementwiseGradient_0_0_toMatrix_P1ToP2ElementwiseGradient_0_0_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P1ToP2ElementwiseGradient_0_0::toMatrix_macro_3D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, idx_t * RESTRICT _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P1ToP2ElementwiseGradient_0_0::toMatrix_P1ToP2ElementwiseGradient_0_0_macro_3D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, idx_t * RESTRICT _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t tmp_coords_jac_0_GREEN_DOWN = 1.0 / (micro_edges_per_macro_edge_float)*1.0; diff --git a/operators/gradient/noarch/P1ToP2ElementwiseGradient_1_0_apply_macro_2D.cpp b/operators/gradient/noarch/P1ToP2ElementwiseGradient_1_0_apply_P1ToP2ElementwiseGradient_1_0_macro_2D.cpp similarity index 98% rename from operators/gradient/noarch/P1ToP2ElementwiseGradient_1_0_apply_macro_2D.cpp rename to operators/gradient/noarch/P1ToP2ElementwiseGradient_1_0_apply_P1ToP2ElementwiseGradient_1_0_macro_2D.cpp index 0589a1df..1049fd8b 100644 --- a/operators/gradient/noarch/P1ToP2ElementwiseGradient_1_0_apply_macro_2D.cpp +++ b/operators/gradient/noarch/P1ToP2ElementwiseGradient_1_0_apply_P1ToP2ElementwiseGradient_1_0_macro_2D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P1ToP2ElementwiseGradient_1_0::apply_macro_2D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P1ToP2ElementwiseGradient_1_0::apply_P1ToP2ElementwiseGradient_1_0_macro_2D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t tmp_coords_jac_0_BLUE = 1.0 / (micro_edges_per_macro_edge_float)*1.0; diff --git a/operators/gradient/noarch/P1ToP2ElementwiseGradient_1_0_apply_macro_3D.cpp b/operators/gradient/noarch/P1ToP2ElementwiseGradient_1_0_apply_P1ToP2ElementwiseGradient_1_0_macro_3D.cpp similarity index 99% rename from operators/gradient/noarch/P1ToP2ElementwiseGradient_1_0_apply_macro_3D.cpp rename to operators/gradient/noarch/P1ToP2ElementwiseGradient_1_0_apply_P1ToP2ElementwiseGradient_1_0_macro_3D.cpp index 410c95a8..83284af5 100644 --- a/operators/gradient/noarch/P1ToP2ElementwiseGradient_1_0_apply_macro_3D.cpp +++ b/operators/gradient/noarch/P1ToP2ElementwiseGradient_1_0_apply_P1ToP2ElementwiseGradient_1_0_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P1ToP2ElementwiseGradient_1_0::apply_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P1ToP2ElementwiseGradient_1_0::apply_P1ToP2ElementwiseGradient_1_0_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t tmp_coords_jac_0_GREEN_DOWN = 1.0 / (micro_edges_per_macro_edge_float)*1.0; diff --git a/operators/gradient/noarch/P1ToP2ElementwiseGradient_1_0_toMatrix_macro_2D.cpp b/operators/gradient/noarch/P1ToP2ElementwiseGradient_1_0_toMatrix_P1ToP2ElementwiseGradient_1_0_macro_2D.cpp similarity index 98% rename from operators/gradient/noarch/P1ToP2ElementwiseGradient_1_0_toMatrix_macro_2D.cpp rename to operators/gradient/noarch/P1ToP2ElementwiseGradient_1_0_toMatrix_P1ToP2ElementwiseGradient_1_0_macro_2D.cpp index ce8b5c70..e934ca93 100644 --- a/operators/gradient/noarch/P1ToP2ElementwiseGradient_1_0_toMatrix_macro_2D.cpp +++ b/operators/gradient/noarch/P1ToP2ElementwiseGradient_1_0_toMatrix_P1ToP2ElementwiseGradient_1_0_macro_2D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P1ToP2ElementwiseGradient_1_0::toMatrix_macro_2D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, idx_t * RESTRICT _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P1ToP2ElementwiseGradient_1_0::toMatrix_P1ToP2ElementwiseGradient_1_0_macro_2D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, idx_t * RESTRICT _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t tmp_coords_jac_0_BLUE = 1.0 / (micro_edges_per_macro_edge_float)*1.0; diff --git a/operators/gradient/noarch/P1ToP2ElementwiseGradient_1_0_toMatrix_macro_3D.cpp b/operators/gradient/noarch/P1ToP2ElementwiseGradient_1_0_toMatrix_P1ToP2ElementwiseGradient_1_0_macro_3D.cpp similarity index 99% rename from operators/gradient/noarch/P1ToP2ElementwiseGradient_1_0_toMatrix_macro_3D.cpp rename to operators/gradient/noarch/P1ToP2ElementwiseGradient_1_0_toMatrix_P1ToP2ElementwiseGradient_1_0_macro_3D.cpp index 657f8d54..ba0b7ad9 100644 --- a/operators/gradient/noarch/P1ToP2ElementwiseGradient_1_0_toMatrix_macro_3D.cpp +++ b/operators/gradient/noarch/P1ToP2ElementwiseGradient_1_0_toMatrix_P1ToP2ElementwiseGradient_1_0_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P1ToP2ElementwiseGradient_1_0::toMatrix_macro_3D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, idx_t * RESTRICT _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P1ToP2ElementwiseGradient_1_0::toMatrix_P1ToP2ElementwiseGradient_1_0_macro_3D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, idx_t * RESTRICT _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t tmp_coords_jac_0_GREEN_DOWN = 1.0 / (micro_edges_per_macro_edge_float)*1.0; diff --git a/operators/gradient/noarch/P1ToP2ElementwiseGradient_2_0_apply_macro_3D.cpp b/operators/gradient/noarch/P1ToP2ElementwiseGradient_2_0_apply_P1ToP2ElementwiseGradient_2_0_macro_3D.cpp similarity index 99% rename from operators/gradient/noarch/P1ToP2ElementwiseGradient_2_0_apply_macro_3D.cpp rename to operators/gradient/noarch/P1ToP2ElementwiseGradient_2_0_apply_P1ToP2ElementwiseGradient_2_0_macro_3D.cpp index ea2712d2..4f0c7b74 100644 --- a/operators/gradient/noarch/P1ToP2ElementwiseGradient_2_0_apply_macro_3D.cpp +++ b/operators/gradient/noarch/P1ToP2ElementwiseGradient_2_0_apply_P1ToP2ElementwiseGradient_2_0_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P1ToP2ElementwiseGradient_2_0::apply_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P1ToP2ElementwiseGradient_2_0::apply_P1ToP2ElementwiseGradient_2_0_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t tmp_coords_jac_0_GREEN_DOWN = 1.0 / (micro_edges_per_macro_edge_float)*1.0; diff --git a/operators/gradient/noarch/P1ToP2ElementwiseGradient_2_0_toMatrix_macro_3D.cpp b/operators/gradient/noarch/P1ToP2ElementwiseGradient_2_0_toMatrix_P1ToP2ElementwiseGradient_2_0_macro_3D.cpp similarity index 99% rename from operators/gradient/noarch/P1ToP2ElementwiseGradient_2_0_toMatrix_macro_3D.cpp rename to operators/gradient/noarch/P1ToP2ElementwiseGradient_2_0_toMatrix_P1ToP2ElementwiseGradient_2_0_macro_3D.cpp index 658ef807..cba14647 100644 --- a/operators/gradient/noarch/P1ToP2ElementwiseGradient_2_0_toMatrix_macro_3D.cpp +++ b/operators/gradient/noarch/P1ToP2ElementwiseGradient_2_0_toMatrix_P1ToP2ElementwiseGradient_2_0_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P1ToP2ElementwiseGradient_2_0::toMatrix_macro_3D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, idx_t * RESTRICT _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P1ToP2ElementwiseGradient_2_0::toMatrix_P1ToP2ElementwiseGradient_2_0_macro_3D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, idx_t * RESTRICT _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t tmp_coords_jac_0_GREEN_DOWN = 1.0 / (micro_edges_per_macro_edge_float)*1.0; diff --git a/operators/k_mass/CMakeLists.txt b/operators/k_mass/CMakeLists.txt index 71f23f38..51d608a7 100644 --- a/operators/k_mass/CMakeLists.txt +++ b/operators/k_mass/CMakeLists.txt @@ -23,62 +23,62 @@ add_library( opgen-k_mass if(HYTEG_BUILD_WITH_AVX AND WALBERLA_DOUBLE_ACCURACY) target_sources(opgen-k_mass PRIVATE - avx/P1ElementwiseKMassAnnulusMap_apply_macro_2D.cpp - avx/P1ElementwiseKMassAnnulusMap_computeInverseDiagonalOperatorValues_macro_2D.cpp - avx/P1ElementwiseKMassIcosahedralShellMap_apply_macro_3D.cpp - avx/P1ElementwiseKMassIcosahedralShellMap_computeInverseDiagonalOperatorValues_macro_3D.cpp - avx/P1ElementwiseKMass_apply_macro_2D.cpp - avx/P1ElementwiseKMass_apply_macro_3D.cpp - avx/P1ElementwiseKMass_computeInverseDiagonalOperatorValues_macro_2D.cpp - avx/P1ElementwiseKMass_computeInverseDiagonalOperatorValues_macro_3D.cpp - avx/P2ElementwiseKMassAnnulusMap_apply_macro_2D.cpp - avx/P2ElementwiseKMassAnnulusMap_computeInverseDiagonalOperatorValues_macro_2D.cpp - avx/P2ElementwiseKMassIcosahedralShellMap_apply_macro_3D.cpp - avx/P2ElementwiseKMassIcosahedralShellMap_computeInverseDiagonalOperatorValues_macro_3D.cpp - avx/P2ElementwiseKMass_apply_macro_2D.cpp - avx/P2ElementwiseKMass_apply_macro_3D.cpp - avx/P2ElementwiseKMass_computeInverseDiagonalOperatorValues_macro_2D.cpp - avx/P2ElementwiseKMass_computeInverseDiagonalOperatorValues_macro_3D.cpp - avx/P2ToP1ElementwiseKMassAnnulusMap_apply_macro_2D.cpp - avx/P2ToP1ElementwiseKMassIcosahedralShellMap_apply_macro_3D.cpp - avx/P2ToP1ElementwiseKMass_apply_macro_2D.cpp - avx/P2ToP1ElementwiseKMass_apply_macro_3D.cpp - noarch/P1ElementwiseKMassAnnulusMap_toMatrix_macro_2D.cpp - noarch/P1ElementwiseKMassIcosahedralShellMap_toMatrix_macro_3D.cpp - noarch/P1ElementwiseKMass_toMatrix_macro_2D.cpp - noarch/P1ElementwiseKMass_toMatrix_macro_3D.cpp - noarch/P2ElementwiseKMassAnnulusMap_toMatrix_macro_2D.cpp - noarch/P2ElementwiseKMassIcosahedralShellMap_toMatrix_macro_3D.cpp - noarch/P2ElementwiseKMass_toMatrix_macro_2D.cpp - noarch/P2ElementwiseKMass_toMatrix_macro_3D.cpp - noarch/P2ToP1ElementwiseKMassAnnulusMap_toMatrix_macro_2D.cpp - noarch/P2ToP1ElementwiseKMassIcosahedralShellMap_toMatrix_macro_3D.cpp - noarch/P2ToP1ElementwiseKMass_toMatrix_macro_2D.cpp - noarch/P2ToP1ElementwiseKMass_toMatrix_macro_3D.cpp + avx/P1ElementwiseKMassAnnulusMap_apply_P1ElementwiseKMassAnnulusMap_macro_2D.cpp + avx/P1ElementwiseKMassAnnulusMap_computeInverseDiagonalOperatorValues_P1ElementwiseKMassAnnulusMap_macro_2D.cpp + avx/P1ElementwiseKMassIcosahedralShellMap_apply_P1ElementwiseKMassIcosahedralShellMap_macro_3D.cpp + avx/P1ElementwiseKMassIcosahedralShellMap_computeInverseDiagonalOperatorValues_P1ElementwiseKMassIcosahedralShellMap_macro_3D.cpp + avx/P1ElementwiseKMass_apply_P1ElementwiseKMass_macro_2D.cpp + avx/P1ElementwiseKMass_apply_P1ElementwiseKMass_macro_3D.cpp + avx/P1ElementwiseKMass_computeInverseDiagonalOperatorValues_P1ElementwiseKMass_macro_2D.cpp + avx/P1ElementwiseKMass_computeInverseDiagonalOperatorValues_P1ElementwiseKMass_macro_3D.cpp + avx/P2ElementwiseKMassAnnulusMap_apply_P2ElementwiseKMassAnnulusMap_macro_2D.cpp + avx/P2ElementwiseKMassAnnulusMap_computeInverseDiagonalOperatorValues_P2ElementwiseKMassAnnulusMap_macro_2D.cpp + avx/P2ElementwiseKMassIcosahedralShellMap_apply_P2ElementwiseKMassIcosahedralShellMap_macro_3D.cpp + avx/P2ElementwiseKMassIcosahedralShellMap_computeInverseDiagonalOperatorValues_P2ElementwiseKMassIcosahedralShellMap_macro_3D.cpp + avx/P2ElementwiseKMass_apply_P2ElementwiseKMass_macro_2D.cpp + avx/P2ElementwiseKMass_apply_P2ElementwiseKMass_macro_3D.cpp + avx/P2ElementwiseKMass_computeInverseDiagonalOperatorValues_P2ElementwiseKMass_macro_2D.cpp + avx/P2ElementwiseKMass_computeInverseDiagonalOperatorValues_P2ElementwiseKMass_macro_3D.cpp + avx/P2ToP1ElementwiseKMassAnnulusMap_apply_P2ToP1ElementwiseKMassAnnulusMap_macro_2D.cpp + avx/P2ToP1ElementwiseKMassIcosahedralShellMap_apply_P2ToP1ElementwiseKMassIcosahedralShellMap_macro_3D.cpp + avx/P2ToP1ElementwiseKMass_apply_P2ToP1ElementwiseKMass_macro_2D.cpp + avx/P2ToP1ElementwiseKMass_apply_P2ToP1ElementwiseKMass_macro_3D.cpp + noarch/P1ElementwiseKMassAnnulusMap_toMatrix_P1ElementwiseKMassAnnulusMap_macro_2D.cpp + noarch/P1ElementwiseKMassIcosahedralShellMap_toMatrix_P1ElementwiseKMassIcosahedralShellMap_macro_3D.cpp + noarch/P1ElementwiseKMass_toMatrix_P1ElementwiseKMass_macro_2D.cpp + noarch/P1ElementwiseKMass_toMatrix_P1ElementwiseKMass_macro_3D.cpp + noarch/P2ElementwiseKMassAnnulusMap_toMatrix_P2ElementwiseKMassAnnulusMap_macro_2D.cpp + noarch/P2ElementwiseKMassIcosahedralShellMap_toMatrix_P2ElementwiseKMassIcosahedralShellMap_macro_3D.cpp + noarch/P2ElementwiseKMass_toMatrix_P2ElementwiseKMass_macro_2D.cpp + noarch/P2ElementwiseKMass_toMatrix_P2ElementwiseKMass_macro_3D.cpp + noarch/P2ToP1ElementwiseKMassAnnulusMap_toMatrix_P2ToP1ElementwiseKMassAnnulusMap_macro_2D.cpp + noarch/P2ToP1ElementwiseKMassIcosahedralShellMap_toMatrix_P2ToP1ElementwiseKMassIcosahedralShellMap_macro_3D.cpp + noarch/P2ToP1ElementwiseKMass_toMatrix_P2ToP1ElementwiseKMass_macro_2D.cpp + noarch/P2ToP1ElementwiseKMass_toMatrix_P2ToP1ElementwiseKMass_macro_3D.cpp ) set_source_files_properties( - avx/P1ElementwiseKMassAnnulusMap_apply_macro_2D.cpp - avx/P1ElementwiseKMassAnnulusMap_computeInverseDiagonalOperatorValues_macro_2D.cpp - avx/P1ElementwiseKMassIcosahedralShellMap_apply_macro_3D.cpp - avx/P1ElementwiseKMassIcosahedralShellMap_computeInverseDiagonalOperatorValues_macro_3D.cpp - avx/P1ElementwiseKMass_apply_macro_2D.cpp - avx/P1ElementwiseKMass_apply_macro_3D.cpp - avx/P1ElementwiseKMass_computeInverseDiagonalOperatorValues_macro_2D.cpp - avx/P1ElementwiseKMass_computeInverseDiagonalOperatorValues_macro_3D.cpp - avx/P2ElementwiseKMassAnnulusMap_apply_macro_2D.cpp - avx/P2ElementwiseKMassAnnulusMap_computeInverseDiagonalOperatorValues_macro_2D.cpp - avx/P2ElementwiseKMassIcosahedralShellMap_apply_macro_3D.cpp - avx/P2ElementwiseKMassIcosahedralShellMap_computeInverseDiagonalOperatorValues_macro_3D.cpp - avx/P2ElementwiseKMass_apply_macro_2D.cpp - avx/P2ElementwiseKMass_apply_macro_3D.cpp - avx/P2ElementwiseKMass_computeInverseDiagonalOperatorValues_macro_2D.cpp - avx/P2ElementwiseKMass_computeInverseDiagonalOperatorValues_macro_3D.cpp - avx/P2ToP1ElementwiseKMassAnnulusMap_apply_macro_2D.cpp - avx/P2ToP1ElementwiseKMassIcosahedralShellMap_apply_macro_3D.cpp - avx/P2ToP1ElementwiseKMass_apply_macro_2D.cpp - avx/P2ToP1ElementwiseKMass_apply_macro_3D.cpp + avx/P1ElementwiseKMassAnnulusMap_apply_P1ElementwiseKMassAnnulusMap_macro_2D.cpp + avx/P1ElementwiseKMassAnnulusMap_computeInverseDiagonalOperatorValues_P1ElementwiseKMassAnnulusMap_macro_2D.cpp + avx/P1ElementwiseKMassIcosahedralShellMap_apply_P1ElementwiseKMassIcosahedralShellMap_macro_3D.cpp + avx/P1ElementwiseKMassIcosahedralShellMap_computeInverseDiagonalOperatorValues_P1ElementwiseKMassIcosahedralShellMap_macro_3D.cpp + avx/P1ElementwiseKMass_apply_P1ElementwiseKMass_macro_2D.cpp + avx/P1ElementwiseKMass_apply_P1ElementwiseKMass_macro_3D.cpp + avx/P1ElementwiseKMass_computeInverseDiagonalOperatorValues_P1ElementwiseKMass_macro_2D.cpp + avx/P1ElementwiseKMass_computeInverseDiagonalOperatorValues_P1ElementwiseKMass_macro_3D.cpp + avx/P2ElementwiseKMassAnnulusMap_apply_P2ElementwiseKMassAnnulusMap_macro_2D.cpp + avx/P2ElementwiseKMassAnnulusMap_computeInverseDiagonalOperatorValues_P2ElementwiseKMassAnnulusMap_macro_2D.cpp + avx/P2ElementwiseKMassIcosahedralShellMap_apply_P2ElementwiseKMassIcosahedralShellMap_macro_3D.cpp + avx/P2ElementwiseKMassIcosahedralShellMap_computeInverseDiagonalOperatorValues_P2ElementwiseKMassIcosahedralShellMap_macro_3D.cpp + avx/P2ElementwiseKMass_apply_P2ElementwiseKMass_macro_2D.cpp + avx/P2ElementwiseKMass_apply_P2ElementwiseKMass_macro_3D.cpp + avx/P2ElementwiseKMass_computeInverseDiagonalOperatorValues_P2ElementwiseKMass_macro_2D.cpp + avx/P2ElementwiseKMass_computeInverseDiagonalOperatorValues_P2ElementwiseKMass_macro_3D.cpp + avx/P2ToP1ElementwiseKMassAnnulusMap_apply_P2ToP1ElementwiseKMassAnnulusMap_macro_2D.cpp + avx/P2ToP1ElementwiseKMassIcosahedralShellMap_apply_P2ToP1ElementwiseKMassIcosahedralShellMap_macro_3D.cpp + avx/P2ToP1ElementwiseKMass_apply_P2ToP1ElementwiseKMass_macro_2D.cpp + avx/P2ToP1ElementwiseKMass_apply_P2ToP1ElementwiseKMass_macro_3D.cpp PROPERTIES COMPILE_OPTIONS ${HYTEG_COMPILER_NATIVE_FLAGS} ) @@ -89,38 +89,38 @@ else() target_sources(opgen-k_mass PRIVATE - noarch/P1ElementwiseKMassAnnulusMap_apply_macro_2D.cpp - noarch/P1ElementwiseKMassAnnulusMap_computeInverseDiagonalOperatorValues_macro_2D.cpp - noarch/P1ElementwiseKMassAnnulusMap_toMatrix_macro_2D.cpp - noarch/P1ElementwiseKMassIcosahedralShellMap_apply_macro_3D.cpp - noarch/P1ElementwiseKMassIcosahedralShellMap_computeInverseDiagonalOperatorValues_macro_3D.cpp - noarch/P1ElementwiseKMassIcosahedralShellMap_toMatrix_macro_3D.cpp - noarch/P1ElementwiseKMass_apply_macro_2D.cpp - noarch/P1ElementwiseKMass_apply_macro_3D.cpp - noarch/P1ElementwiseKMass_computeInverseDiagonalOperatorValues_macro_2D.cpp - noarch/P1ElementwiseKMass_computeInverseDiagonalOperatorValues_macro_3D.cpp - noarch/P1ElementwiseKMass_toMatrix_macro_2D.cpp - noarch/P1ElementwiseKMass_toMatrix_macro_3D.cpp - noarch/P2ElementwiseKMassAnnulusMap_apply_macro_2D.cpp - noarch/P2ElementwiseKMassAnnulusMap_computeInverseDiagonalOperatorValues_macro_2D.cpp - noarch/P2ElementwiseKMassAnnulusMap_toMatrix_macro_2D.cpp - noarch/P2ElementwiseKMassIcosahedralShellMap_apply_macro_3D.cpp - noarch/P2ElementwiseKMassIcosahedralShellMap_computeInverseDiagonalOperatorValues_macro_3D.cpp - noarch/P2ElementwiseKMassIcosahedralShellMap_toMatrix_macro_3D.cpp - noarch/P2ElementwiseKMass_apply_macro_2D.cpp - noarch/P2ElementwiseKMass_apply_macro_3D.cpp - noarch/P2ElementwiseKMass_computeInverseDiagonalOperatorValues_macro_2D.cpp - noarch/P2ElementwiseKMass_computeInverseDiagonalOperatorValues_macro_3D.cpp - noarch/P2ElementwiseKMass_toMatrix_macro_2D.cpp - noarch/P2ElementwiseKMass_toMatrix_macro_3D.cpp - noarch/P2ToP1ElementwiseKMassAnnulusMap_apply_macro_2D.cpp - noarch/P2ToP1ElementwiseKMassAnnulusMap_toMatrix_macro_2D.cpp - noarch/P2ToP1ElementwiseKMassIcosahedralShellMap_apply_macro_3D.cpp - noarch/P2ToP1ElementwiseKMassIcosahedralShellMap_toMatrix_macro_3D.cpp - noarch/P2ToP1ElementwiseKMass_apply_macro_2D.cpp - noarch/P2ToP1ElementwiseKMass_apply_macro_3D.cpp - noarch/P2ToP1ElementwiseKMass_toMatrix_macro_2D.cpp - noarch/P2ToP1ElementwiseKMass_toMatrix_macro_3D.cpp + noarch/P1ElementwiseKMassAnnulusMap_apply_P1ElementwiseKMassAnnulusMap_macro_2D.cpp + noarch/P1ElementwiseKMassAnnulusMap_computeInverseDiagonalOperatorValues_P1ElementwiseKMassAnnulusMap_macro_2D.cpp + noarch/P1ElementwiseKMassAnnulusMap_toMatrix_P1ElementwiseKMassAnnulusMap_macro_2D.cpp + noarch/P1ElementwiseKMassIcosahedralShellMap_apply_P1ElementwiseKMassIcosahedralShellMap_macro_3D.cpp + noarch/P1ElementwiseKMassIcosahedralShellMap_computeInverseDiagonalOperatorValues_P1ElementwiseKMassIcosahedralShellMap_macro_3D.cpp + noarch/P1ElementwiseKMassIcosahedralShellMap_toMatrix_P1ElementwiseKMassIcosahedralShellMap_macro_3D.cpp + noarch/P1ElementwiseKMass_apply_P1ElementwiseKMass_macro_2D.cpp + noarch/P1ElementwiseKMass_apply_P1ElementwiseKMass_macro_3D.cpp + noarch/P1ElementwiseKMass_computeInverseDiagonalOperatorValues_P1ElementwiseKMass_macro_2D.cpp + noarch/P1ElementwiseKMass_computeInverseDiagonalOperatorValues_P1ElementwiseKMass_macro_3D.cpp + noarch/P1ElementwiseKMass_toMatrix_P1ElementwiseKMass_macro_2D.cpp + noarch/P1ElementwiseKMass_toMatrix_P1ElementwiseKMass_macro_3D.cpp + noarch/P2ElementwiseKMassAnnulusMap_apply_P2ElementwiseKMassAnnulusMap_macro_2D.cpp + noarch/P2ElementwiseKMassAnnulusMap_computeInverseDiagonalOperatorValues_P2ElementwiseKMassAnnulusMap_macro_2D.cpp + noarch/P2ElementwiseKMassAnnulusMap_toMatrix_P2ElementwiseKMassAnnulusMap_macro_2D.cpp + noarch/P2ElementwiseKMassIcosahedralShellMap_apply_P2ElementwiseKMassIcosahedralShellMap_macro_3D.cpp + noarch/P2ElementwiseKMassIcosahedralShellMap_computeInverseDiagonalOperatorValues_P2ElementwiseKMassIcosahedralShellMap_macro_3D.cpp + noarch/P2ElementwiseKMassIcosahedralShellMap_toMatrix_P2ElementwiseKMassIcosahedralShellMap_macro_3D.cpp + noarch/P2ElementwiseKMass_apply_P2ElementwiseKMass_macro_2D.cpp + noarch/P2ElementwiseKMass_apply_P2ElementwiseKMass_macro_3D.cpp + noarch/P2ElementwiseKMass_computeInverseDiagonalOperatorValues_P2ElementwiseKMass_macro_2D.cpp + noarch/P2ElementwiseKMass_computeInverseDiagonalOperatorValues_P2ElementwiseKMass_macro_3D.cpp + noarch/P2ElementwiseKMass_toMatrix_P2ElementwiseKMass_macro_2D.cpp + noarch/P2ElementwiseKMass_toMatrix_P2ElementwiseKMass_macro_3D.cpp + noarch/P2ToP1ElementwiseKMassAnnulusMap_apply_P2ToP1ElementwiseKMassAnnulusMap_macro_2D.cpp + noarch/P2ToP1ElementwiseKMassAnnulusMap_toMatrix_P2ToP1ElementwiseKMassAnnulusMap_macro_2D.cpp + noarch/P2ToP1ElementwiseKMassIcosahedralShellMap_apply_P2ToP1ElementwiseKMassIcosahedralShellMap_macro_3D.cpp + noarch/P2ToP1ElementwiseKMassIcosahedralShellMap_toMatrix_P2ToP1ElementwiseKMassIcosahedralShellMap_macro_3D.cpp + noarch/P2ToP1ElementwiseKMass_apply_P2ToP1ElementwiseKMass_macro_2D.cpp + noarch/P2ToP1ElementwiseKMass_apply_P2ToP1ElementwiseKMass_macro_3D.cpp + noarch/P2ToP1ElementwiseKMass_toMatrix_P2ToP1ElementwiseKMass_macro_2D.cpp + noarch/P2ToP1ElementwiseKMass_toMatrix_P2ToP1ElementwiseKMass_macro_3D.cpp ) endif() diff --git a/operators/k_mass/P1ElementwiseKMass.cpp b/operators/k_mass/P1ElementwiseKMass.cpp index 03b70d41..a77a51b2 100644 --- a/operators/k_mass/P1ElementwiseKMass.cpp +++ b/operators/k_mass/P1ElementwiseKMass.cpp @@ -133,7 +133,7 @@ void P1ElementwiseKMass::apply( const P1Function< real_t >& src, this->timingTree_->start( "kernel" ); - apply_macro_3D( + apply_P1ElementwiseKMass_macro_3D( _data_dst, _data_k, @@ -152,6 +152,7 @@ void P1ElementwiseKMass::apply( const P1Function< real_t >& src, macro_vertex_coord_id_3comp2, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } @@ -200,7 +201,7 @@ void P1ElementwiseKMass::apply( const P1Function< real_t >& src, this->timingTree_->start( "kernel" ); - apply_macro_2D( + apply_P1ElementwiseKMass_macro_2D( _data_dst, _data_k, @@ -213,6 +214,7 @@ void P1ElementwiseKMass::apply( const P1Function< real_t >& src, macro_vertex_coord_id_2comp1, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } @@ -276,7 +278,7 @@ void P1ElementwiseKMass::toMatrix( const std::shared_ptr< SparseMatrixProxy >& m this->timingTree_->start( "kernel" ); - toMatrix_macro_3D( + toMatrix_P1ElementwiseKMass_macro_3D( _data_dst, _data_k, @@ -296,6 +298,7 @@ void P1ElementwiseKMass::toMatrix( const std::shared_ptr< SparseMatrixProxy >& m mat, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } } @@ -325,7 +328,7 @@ void P1ElementwiseKMass::toMatrix( const std::shared_ptr< SparseMatrixProxy >& m this->timingTree_->start( "kernel" ); - toMatrix_macro_2D( + toMatrix_P1ElementwiseKMass_macro_2D( _data_dst, _data_k, @@ -339,6 +342,7 @@ void P1ElementwiseKMass::toMatrix( const std::shared_ptr< SparseMatrixProxy >& m mat, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } } @@ -390,7 +394,7 @@ void P1ElementwiseKMass::computeInverseDiagonalOperatorValues() this->timingTree_->start( "kernel" ); - computeInverseDiagonalOperatorValues_macro_3D( + computeInverseDiagonalOperatorValues_P1ElementwiseKMass_macro_3D( _data_invDiag_, _data_k, @@ -408,6 +412,7 @@ void P1ElementwiseKMass::computeInverseDiagonalOperatorValues() macro_vertex_coord_id_3comp2, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } @@ -447,7 +452,7 @@ void P1ElementwiseKMass::computeInverseDiagonalOperatorValues() this->timingTree_->start( "kernel" ); - computeInverseDiagonalOperatorValues_macro_2D( + computeInverseDiagonalOperatorValues_P1ElementwiseKMass_macro_2D( _data_invDiag_, _data_k, @@ -459,6 +464,7 @@ void P1ElementwiseKMass::computeInverseDiagonalOperatorValues() macro_vertex_coord_id_2comp1, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } diff --git a/operators/k_mass/P1ElementwiseKMass.hpp b/operators/k_mass/P1ElementwiseKMass.hpp index c79a7498..9252db2e 100644 --- a/operators/k_mass/P1ElementwiseKMass.hpp +++ b/operators/k_mass/P1ElementwiseKMass.hpp @@ -29,6 +29,7 @@ #include "core/DataTypes.h" #include "hyteg/LikwidWrapper.hpp" +#include "hyteg/boundary/BoundaryConditions.hpp" #include "hyteg/communication/Syncing.hpp" #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp" #include "hyteg/operators/Operator.hpp" @@ -36,6 +37,7 @@ #include "hyteg/primitivestorage/PrimitiveStorage.hpp" #include "hyteg/solvers/Smoothables.hpp" #include "hyteg/sparseassembly/SparseMatrixProxy.hpp" +#include "hyteg/types/types.hpp" #define FUNC_PREFIX @@ -82,126 +84,155 @@ class P1ElementwiseKMass : public Operator< P1Function< real_t >, P1Function< re protected: private: - /// Kernel type: apply + /// Integral: P1ElementwiseKMass + /// - volume element: triangle, dim: 2, vertices: 3, spacedim: 2 + /// - kernel type: apply + /// - loop strategy: SAWTOOTH /// - quadrature rule: Dunavant 3 | points: 4, degree: 3 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 81 73 12 0 0 0 0 1 - void apply_macro_2D( real_t* RESTRICT _data_dst, - real_t* RESTRICT _data_k, - real_t* RESTRICT _data_src, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; - /// Kernel type: apply + void apply_P1ElementwiseKMass_macro_2D( real_t* RESTRICT _data_dst, + real_t* RESTRICT _data_k, + real_t* RESTRICT _data_src, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; + + /// Integral: P1ElementwiseKMass + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: apply + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 3 | points: 5, degree: 3 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 189 163 36 0 0 0 0 1 - void apply_macro_3D( real_t* RESTRICT _data_dst, - real_t* RESTRICT _data_k, - real_t* RESTRICT _data_src, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; - /// Kernel type: toMatrix + void apply_P1ElementwiseKMass_macro_3D( real_t* RESTRICT _data_dst, + real_t* RESTRICT _data_k, + real_t* RESTRICT _data_src, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; + + /// Integral: P1ElementwiseKMass + /// - volume element: triangle, dim: 2, vertices: 3, spacedim: 2 + /// - kernel type: toMatrix + /// - loop strategy: SAWTOOTH /// - quadrature rule: Dunavant 3 | points: 4, degree: 3 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 72 64 12 0 0 0 0 4 - void toMatrix_macro_2D( idx_t* RESTRICT _data_dst, - real_t* RESTRICT _data_k, - idx_t* RESTRICT _data_src, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - std::shared_ptr< SparseMatrixProxy > mat, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; - /// Kernel type: toMatrix + void toMatrix_P1ElementwiseKMass_macro_2D( idx_t* RESTRICT _data_dst, + real_t* RESTRICT _data_k, + idx_t* RESTRICT _data_src, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + std::shared_ptr< SparseMatrixProxy > mat, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; + + /// Integral: P1ElementwiseKMass + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: toMatrix + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 3 | points: 5, degree: 3 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 173 147 36 0 0 0 0 4 - void toMatrix_macro_3D( idx_t* RESTRICT _data_dst, - real_t* RESTRICT _data_k, - idx_t* RESTRICT _data_src, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - std::shared_ptr< SparseMatrixProxy > mat, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; - /// Kernel type: computeInverseDiagonalOperatorValues + void toMatrix_P1ElementwiseKMass_macro_3D( idx_t* RESTRICT _data_dst, + real_t* RESTRICT _data_k, + idx_t* RESTRICT _data_src, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + std::shared_ptr< SparseMatrixProxy > mat, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; + + /// Integral: P1ElementwiseKMass + /// - volume element: triangle, dim: 2, vertices: 3, spacedim: 2 + /// - kernel type: computeInverseDiagonalOperatorValues + /// - loop strategy: SAWTOOTH /// - quadrature rule: Dunavant 3 | points: 4, degree: 3 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 63 52 12 0 0 0 0 1 - void computeInverseDiagonalOperatorValues_macro_2D( real_t* RESTRICT _data_invDiag_, - real_t* RESTRICT _data_k, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; - /// Kernel type: computeInverseDiagonalOperatorValues + void computeInverseDiagonalOperatorValues_P1ElementwiseKMass_macro_2D( real_t* RESTRICT _data_invDiag_, + real_t* RESTRICT _data_k, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; + + /// Integral: P1ElementwiseKMass + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: computeInverseDiagonalOperatorValues + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 3 | points: 5, degree: 3 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 147 117 36 0 0 0 0 1 - void computeInverseDiagonalOperatorValues_macro_3D( real_t* RESTRICT _data_invDiag_, - real_t* RESTRICT _data_k, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; + void computeInverseDiagonalOperatorValues_P1ElementwiseKMass_macro_3D( real_t* RESTRICT _data_invDiag_, + real_t* RESTRICT _data_k, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; std::shared_ptr< P1Function< real_t > > invDiag_; P1Function< real_t > k; diff --git a/operators/k_mass/P1ElementwiseKMassAnnulusMap.cpp b/operators/k_mass/P1ElementwiseKMassAnnulusMap.cpp index 68c62d7d..28e01722 100644 --- a/operators/k_mass/P1ElementwiseKMassAnnulusMap.cpp +++ b/operators/k_mass/P1ElementwiseKMassAnnulusMap.cpp @@ -135,7 +135,7 @@ void P1ElementwiseKMassAnnulusMap::apply( const P1Function< real_t >& src, this->timingTree_->start( "kernel" ); - apply_macro_2D( + apply_P1ElementwiseKMassAnnulusMap_macro_2D( _data_dst, _data_k, @@ -156,6 +156,7 @@ void P1ElementwiseKMassAnnulusMap::apply( const P1Function< real_t >& src, refVertex_1, thrVertex_0, thrVertex_1 ); + this->timingTree_->stop( "kernel" ); } @@ -232,7 +233,7 @@ void P1ElementwiseKMassAnnulusMap::toMatrix( const std::shared_ptr< SparseMatrix this->timingTree_->start( "kernel" ); - toMatrix_macro_2D( + toMatrix_P1ElementwiseKMassAnnulusMap_macro_2D( _data_dst, _data_k, @@ -254,6 +255,7 @@ void P1ElementwiseKMassAnnulusMap::toMatrix( const std::shared_ptr< SparseMatrix refVertex_1, thrVertex_0, thrVertex_1 ); + this->timingTree_->stop( "kernel" ); } } @@ -319,7 +321,7 @@ void P1ElementwiseKMassAnnulusMap::computeInverseDiagonalOperatorValues() this->timingTree_->start( "kernel" ); - computeInverseDiagonalOperatorValues_macro_2D( + computeInverseDiagonalOperatorValues_P1ElementwiseKMassAnnulusMap_macro_2D( _data_invDiag_, _data_k, @@ -339,6 +341,7 @@ void P1ElementwiseKMassAnnulusMap::computeInverseDiagonalOperatorValues() refVertex_1, thrVertex_0, thrVertex_1 ); + this->timingTree_->stop( "kernel" ); } diff --git a/operators/k_mass/P1ElementwiseKMassAnnulusMap.hpp b/operators/k_mass/P1ElementwiseKMassAnnulusMap.hpp index e8412918..bf249e60 100644 --- a/operators/k_mass/P1ElementwiseKMassAnnulusMap.hpp +++ b/operators/k_mass/P1ElementwiseKMassAnnulusMap.hpp @@ -29,6 +29,7 @@ #include "core/DataTypes.h" #include "hyteg/LikwidWrapper.hpp" +#include "hyteg/boundary/BoundaryConditions.hpp" #include "hyteg/communication/Syncing.hpp" #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp" #include "hyteg/geometry/AnnulusMap.hpp" @@ -37,6 +38,7 @@ #include "hyteg/primitivestorage/PrimitiveStorage.hpp" #include "hyteg/solvers/Smoothables.hpp" #include "hyteg/sparseassembly/SparseMatrixProxy.hpp" +#include "hyteg/types/types.hpp" #define FUNC_PREFIX @@ -83,81 +85,95 @@ class P1ElementwiseKMassAnnulusMap : public Operator< P1Function< real_t >, P1Fu protected: private: - /// Kernel type: apply + /// Integral: P1ElementwiseKMassAnnulusMap + /// - volume element: triangle, dim: 2, vertices: 3, spacedim: 2 + /// - kernel type: apply + /// - loop strategy: SAWTOOTH /// - quadrature rule: Dunavant 3 | points: 4, degree: 3 + /// - blending map: AnnulusMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 333 513 24 20 4 0 0 1 - void apply_macro_2D( real_t* RESTRICT _data_dst, - real_t* RESTRICT _data_k, - real_t* RESTRICT _data_src, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t refVertex_0, - real_t refVertex_1, - real_t thrVertex_0, - real_t thrVertex_1 ) const; - /// Kernel type: toMatrix + void apply_P1ElementwiseKMassAnnulusMap_macro_2D( real_t* RESTRICT _data_dst, + real_t* RESTRICT _data_k, + real_t* RESTRICT _data_src, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t refVertex_0, + real_t refVertex_1, + real_t thrVertex_0, + real_t thrVertex_1 ) const; + + /// Integral: P1ElementwiseKMassAnnulusMap + /// - volume element: triangle, dim: 2, vertices: 3, spacedim: 2 + /// - kernel type: toMatrix + /// - loop strategy: SAWTOOTH /// - quadrature rule: Dunavant 3 | points: 4, degree: 3 + /// - blending map: AnnulusMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 324 504 24 20 4 0 0 4 - void toMatrix_macro_2D( idx_t* RESTRICT _data_dst, - real_t* RESTRICT _data_k, - idx_t* RESTRICT _data_src, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - std::shared_ptr< SparseMatrixProxy > mat, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t refVertex_0, - real_t refVertex_1, - real_t thrVertex_0, - real_t thrVertex_1 ) const; - /// Kernel type: computeInverseDiagonalOperatorValues + void toMatrix_P1ElementwiseKMassAnnulusMap_macro_2D( idx_t* RESTRICT _data_dst, + real_t* RESTRICT _data_k, + idx_t* RESTRICT _data_src, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + std::shared_ptr< SparseMatrixProxy > mat, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t refVertex_0, + real_t refVertex_1, + real_t thrVertex_0, + real_t thrVertex_1 ) const; + + /// Integral: P1ElementwiseKMassAnnulusMap + /// - volume element: triangle, dim: 2, vertices: 3, spacedim: 2 + /// - kernel type: computeInverseDiagonalOperatorValues + /// - loop strategy: SAWTOOTH /// - quadrature rule: Dunavant 3 | points: 4, degree: 3 + /// - blending map: AnnulusMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 315 492 24 20 4 0 0 1 - void computeInverseDiagonalOperatorValues_macro_2D( real_t* RESTRICT _data_invDiag_, - real_t* RESTRICT _data_k, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t refVertex_0, - real_t refVertex_1, - real_t thrVertex_0, - real_t thrVertex_1 ) const; + void computeInverseDiagonalOperatorValues_P1ElementwiseKMassAnnulusMap_macro_2D( real_t* RESTRICT _data_invDiag_, + real_t* RESTRICT _data_k, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t refVertex_0, + real_t refVertex_1, + real_t thrVertex_0, + real_t thrVertex_1 ) const; std::shared_ptr< P1Function< real_t > > invDiag_; P1Function< real_t > k; diff --git a/operators/k_mass/P1ElementwiseKMassIcosahedralShellMap.cpp b/operators/k_mass/P1ElementwiseKMassIcosahedralShellMap.cpp index d9b48d04..3eeda4e4 100644 --- a/operators/k_mass/P1ElementwiseKMassIcosahedralShellMap.cpp +++ b/operators/k_mass/P1ElementwiseKMassIcosahedralShellMap.cpp @@ -149,7 +149,7 @@ void P1ElementwiseKMassIcosahedralShellMap::apply( const P1Function< real_t >& s this->timingTree_->start( "kernel" ); - apply_macro_3D( + apply_P1ElementwiseKMassIcosahedralShellMap_macro_3D( _data_dst, _data_k, @@ -182,6 +182,7 @@ void P1ElementwiseKMassIcosahedralShellMap::apply( const P1Function< real_t >& s thrVertex_0, thrVertex_1, thrVertex_2 ); + this->timingTree_->stop( "kernel" ); } @@ -267,7 +268,7 @@ void P1ElementwiseKMassIcosahedralShellMap::toMatrix( const std::shared_ptr< Spa this->timingTree_->start( "kernel" ); - toMatrix_macro_3D( + toMatrix_P1ElementwiseKMassIcosahedralShellMap_macro_3D( _data_dst, _data_k, @@ -301,6 +302,7 @@ void P1ElementwiseKMassIcosahedralShellMap::toMatrix( const std::shared_ptr< Spa thrVertex_0, thrVertex_1, thrVertex_2 ); + this->timingTree_->stop( "kernel" ); } } @@ -377,7 +379,7 @@ void P1ElementwiseKMassIcosahedralShellMap::computeInverseDiagonalOperatorValues this->timingTree_->start( "kernel" ); - computeInverseDiagonalOperatorValues_macro_3D( + computeInverseDiagonalOperatorValues_P1ElementwiseKMassIcosahedralShellMap_macro_3D( _data_invDiag_, _data_k, @@ -409,6 +411,7 @@ void P1ElementwiseKMassIcosahedralShellMap::computeInverseDiagonalOperatorValues thrVertex_0, thrVertex_1, thrVertex_2 ); + this->timingTree_->stop( "kernel" ); } diff --git a/operators/k_mass/P1ElementwiseKMassIcosahedralShellMap.hpp b/operators/k_mass/P1ElementwiseKMassIcosahedralShellMap.hpp index c501ad59..dead3823 100644 --- a/operators/k_mass/P1ElementwiseKMassIcosahedralShellMap.hpp +++ b/operators/k_mass/P1ElementwiseKMassIcosahedralShellMap.hpp @@ -29,6 +29,7 @@ #include "core/DataTypes.h" #include "hyteg/LikwidWrapper.hpp" +#include "hyteg/boundary/BoundaryConditions.hpp" #include "hyteg/communication/Syncing.hpp" #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp" #include "hyteg/geometry/IcosahedralShellMap.hpp" @@ -37,6 +38,7 @@ #include "hyteg/primitivestorage/PrimitiveStorage.hpp" #include "hyteg/solvers/Smoothables.hpp" #include "hyteg/sparseassembly/SparseMatrixProxy.hpp" +#include "hyteg/types/types.hpp" #define FUNC_PREFIX @@ -83,117 +85,132 @@ class P1ElementwiseKMassIcosahedralShellMap : public Operator< P1Function< real_ protected: private: - /// Kernel type: apply + /// Integral: P1ElementwiseKMassIcosahedralShellMap + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: apply + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 3 | points: 5, degree: 3 + /// - blending map: IcosahedralShellMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 904 1543 51 15 5 0 0 1 - void apply_macro_3D( real_t* RESTRICT _data_dst, - real_t* RESTRICT _data_k, - real_t* RESTRICT _data_src, - real_t forVertex_0, - real_t forVertex_1, - real_t forVertex_2, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t rayVertex_2, - real_t refVertex_0, - real_t refVertex_1, - real_t refVertex_2, - real_t thrVertex_0, - real_t thrVertex_1, - real_t thrVertex_2 ) const; - /// Kernel type: toMatrix + void apply_P1ElementwiseKMassIcosahedralShellMap_macro_3D( real_t* RESTRICT _data_dst, + real_t* RESTRICT _data_k, + real_t* RESTRICT _data_src, + real_t forVertex_0, + real_t forVertex_1, + real_t forVertex_2, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t rayVertex_2, + real_t refVertex_0, + real_t refVertex_1, + real_t refVertex_2, + real_t thrVertex_0, + real_t thrVertex_1, + real_t thrVertex_2 ) const; + + /// Integral: P1ElementwiseKMassIcosahedralShellMap + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: toMatrix + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 3 | points: 5, degree: 3 + /// - blending map: IcosahedralShellMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 888 1527 51 15 5 0 0 4 - void toMatrix_macro_3D( idx_t* RESTRICT _data_dst, - real_t* RESTRICT _data_k, - idx_t* RESTRICT _data_src, - real_t forVertex_0, - real_t forVertex_1, - real_t forVertex_2, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - std::shared_ptr< SparseMatrixProxy > mat, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t rayVertex_2, - real_t refVertex_0, - real_t refVertex_1, - real_t refVertex_2, - real_t thrVertex_0, - real_t thrVertex_1, - real_t thrVertex_2 ) const; - /// Kernel type: computeInverseDiagonalOperatorValues + void toMatrix_P1ElementwiseKMassIcosahedralShellMap_macro_3D( idx_t* RESTRICT _data_dst, + real_t* RESTRICT _data_k, + idx_t* RESTRICT _data_src, + real_t forVertex_0, + real_t forVertex_1, + real_t forVertex_2, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + std::shared_ptr< SparseMatrixProxy > mat, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t rayVertex_2, + real_t refVertex_0, + real_t refVertex_1, + real_t refVertex_2, + real_t thrVertex_0, + real_t thrVertex_1, + real_t thrVertex_2 ) const; + + /// Integral: P1ElementwiseKMassIcosahedralShellMap + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: computeInverseDiagonalOperatorValues + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 3 | points: 5, degree: 3 + /// - blending map: IcosahedralShellMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 862 1497 51 15 5 0 0 1 - void computeInverseDiagonalOperatorValues_macro_3D( real_t* RESTRICT _data_invDiag_, - real_t* RESTRICT _data_k, - real_t forVertex_0, - real_t forVertex_1, - real_t forVertex_2, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t rayVertex_2, - real_t refVertex_0, - real_t refVertex_1, - real_t refVertex_2, - real_t thrVertex_0, - real_t thrVertex_1, - real_t thrVertex_2 ) const; + void computeInverseDiagonalOperatorValues_P1ElementwiseKMassIcosahedralShellMap_macro_3D( + real_t* RESTRICT _data_invDiag_, + real_t* RESTRICT _data_k, + real_t forVertex_0, + real_t forVertex_1, + real_t forVertex_2, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t rayVertex_2, + real_t refVertex_0, + real_t refVertex_1, + real_t refVertex_2, + real_t thrVertex_0, + real_t thrVertex_1, + real_t thrVertex_2 ) const; std::shared_ptr< P1Function< real_t > > invDiag_; P1Function< real_t > k; diff --git a/operators/k_mass/P2ElementwiseKMass.cpp b/operators/k_mass/P2ElementwiseKMass.cpp index dd4df2bc..46aa8ff2 100644 --- a/operators/k_mass/P2ElementwiseKMass.cpp +++ b/operators/k_mass/P2ElementwiseKMass.cpp @@ -137,7 +137,7 @@ void P2ElementwiseKMass::apply( const P2Function< real_t >& src, this->timingTree_->start( "kernel" ); - apply_macro_3D( + apply_P2ElementwiseKMass_macro_3D( _data_dstEdge, _data_dstVertex, @@ -159,6 +159,7 @@ void P2ElementwiseKMass::apply( const P2Function< real_t >& src, macro_vertex_coord_id_3comp2, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } @@ -228,7 +229,7 @@ void P2ElementwiseKMass::apply( const P2Function< real_t >& src, this->timingTree_->start( "kernel" ); - apply_macro_2D( + apply_P2ElementwiseKMass_macro_2D( _data_dstEdge, _data_dstVertex, @@ -244,6 +245,7 @@ void P2ElementwiseKMass::apply( const P2Function< real_t >& src, macro_vertex_coord_id_2comp1, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } @@ -314,7 +316,7 @@ void P2ElementwiseKMass::toMatrix( const std::shared_ptr< SparseMatrixProxy >& m this->timingTree_->start( "kernel" ); - toMatrix_macro_3D( + toMatrix_P2ElementwiseKMass_macro_3D( _data_dstEdge, _data_dstVertex, @@ -337,6 +339,7 @@ void P2ElementwiseKMass::toMatrix( const std::shared_ptr< SparseMatrixProxy >& m mat, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } } @@ -369,7 +372,7 @@ void P2ElementwiseKMass::toMatrix( const std::shared_ptr< SparseMatrixProxy >& m this->timingTree_->start( "kernel" ); - toMatrix_macro_2D( + toMatrix_P2ElementwiseKMass_macro_2D( _data_dstEdge, _data_dstVertex, @@ -386,6 +389,7 @@ void P2ElementwiseKMass::toMatrix( const std::shared_ptr< SparseMatrixProxy >& m mat, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } } @@ -440,7 +444,7 @@ void P2ElementwiseKMass::computeInverseDiagonalOperatorValues() this->timingTree_->start( "kernel" ); - computeInverseDiagonalOperatorValues_macro_3D( + computeInverseDiagonalOperatorValues_P2ElementwiseKMass_macro_3D( _data_invDiag_Edge, _data_invDiag_Vertex, @@ -460,6 +464,7 @@ void P2ElementwiseKMass::computeInverseDiagonalOperatorValues() macro_vertex_coord_id_3comp2, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } @@ -504,7 +509,7 @@ void P2ElementwiseKMass::computeInverseDiagonalOperatorValues() this->timingTree_->start( "kernel" ); - computeInverseDiagonalOperatorValues_macro_2D( + computeInverseDiagonalOperatorValues_P2ElementwiseKMass_macro_2D( _data_invDiag_Edge, _data_invDiag_Vertex, @@ -518,6 +523,7 @@ void P2ElementwiseKMass::computeInverseDiagonalOperatorValues() macro_vertex_coord_id_2comp1, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } diff --git a/operators/k_mass/P2ElementwiseKMass.hpp b/operators/k_mass/P2ElementwiseKMass.hpp index 9de04491..89469870 100644 --- a/operators/k_mass/P2ElementwiseKMass.hpp +++ b/operators/k_mass/P2ElementwiseKMass.hpp @@ -29,6 +29,7 @@ #include "core/DataTypes.h" #include "hyteg/LikwidWrapper.hpp" +#include "hyteg/boundary/BoundaryConditions.hpp" #include "hyteg/communication/Syncing.hpp" #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp" #include "hyteg/operators/Operator.hpp" @@ -36,6 +37,7 @@ #include "hyteg/primitivestorage/PrimitiveStorage.hpp" #include "hyteg/solvers/Smoothables.hpp" #include "hyteg/sparseassembly/SparseMatrixProxy.hpp" +#include "hyteg/types/types.hpp" #define FUNC_PREFIX @@ -82,142 +84,171 @@ class P2ElementwiseKMass : public Operator< P2Function< real_t >, P2Function< re protected: private: - /// Kernel type: apply + /// Integral: P2ElementwiseKMass + /// - volume element: triangle, dim: 2, vertices: 3, spacedim: 2 + /// - kernel type: apply + /// - loop strategy: SAWTOOTH /// - quadrature rule: Dunavant 4 | points: 6, degree: 4 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 290 300 12 0 0 0 0 1 - void apply_macro_2D( real_t* RESTRICT _data_dstEdge, - real_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_kEdge, - real_t* RESTRICT _data_kVertex, - real_t* RESTRICT _data_srcEdge, - real_t* RESTRICT _data_srcVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; - /// Kernel type: apply + void apply_P2ElementwiseKMass_macro_2D( real_t* RESTRICT _data_dstEdge, + real_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_kEdge, + real_t* RESTRICT _data_kVertex, + real_t* RESTRICT _data_srcEdge, + real_t* RESTRICT _data_srcVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; + + /// Integral: P2ElementwiseKMass + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: apply + /// - loop strategy: SAWTOOTH /// - quadrature rule: Jaśkowiec-Sukumar 04 | points: 11, degree: 4 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 1117 1118 36 0 0 0 0 1 - void apply_macro_3D( real_t* RESTRICT _data_dstEdge, - real_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_kEdge, - real_t* RESTRICT _data_kVertex, - real_t* RESTRICT _data_srcEdge, - real_t* RESTRICT _data_srcVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; - /// Kernel type: toMatrix + void apply_P2ElementwiseKMass_macro_3D( real_t* RESTRICT _data_dstEdge, + real_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_kEdge, + real_t* RESTRICT _data_kVertex, + real_t* RESTRICT _data_srcEdge, + real_t* RESTRICT _data_srcVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; + + /// Integral: P2ElementwiseKMass + /// - volume element: triangle, dim: 2, vertices: 3, spacedim: 2 + /// - kernel type: toMatrix + /// - loop strategy: SAWTOOTH /// - quadrature rule: Dunavant 4 | points: 6, degree: 4 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 254 264 12 0 0 0 0 4 - void toMatrix_macro_2D( idx_t* RESTRICT _data_dstEdge, - idx_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_kEdge, - real_t* RESTRICT _data_kVertex, - idx_t* RESTRICT _data_srcEdge, - idx_t* RESTRICT _data_srcVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - std::shared_ptr< SparseMatrixProxy > mat, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; - /// Kernel type: toMatrix + void toMatrix_P2ElementwiseKMass_macro_2D( idx_t* RESTRICT _data_dstEdge, + idx_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_kEdge, + real_t* RESTRICT _data_kVertex, + idx_t* RESTRICT _data_srcEdge, + idx_t* RESTRICT _data_srcVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + std::shared_ptr< SparseMatrixProxy > mat, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; + + /// Integral: P2ElementwiseKMass + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: toMatrix + /// - loop strategy: SAWTOOTH /// - quadrature rule: Jaśkowiec-Sukumar 04 | points: 11, degree: 4 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 1017 1018 36 0 0 0 0 4 - void toMatrix_macro_3D( idx_t* RESTRICT _data_dstEdge, - idx_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_kEdge, - real_t* RESTRICT _data_kVertex, - idx_t* RESTRICT _data_srcEdge, - idx_t* RESTRICT _data_srcVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - std::shared_ptr< SparseMatrixProxy > mat, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; - /// Kernel type: computeInverseDiagonalOperatorValues + void toMatrix_P2ElementwiseKMass_macro_3D( idx_t* RESTRICT _data_dstEdge, + idx_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_kEdge, + real_t* RESTRICT _data_kVertex, + idx_t* RESTRICT _data_srcEdge, + idx_t* RESTRICT _data_srcVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + std::shared_ptr< SparseMatrixProxy > mat, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; + + /// Integral: P2ElementwiseKMass + /// - volume element: triangle, dim: 2, vertices: 3, spacedim: 2 + /// - kernel type: computeInverseDiagonalOperatorValues + /// - loop strategy: SAWTOOTH /// - quadrature rule: Dunavant 4 | points: 6, degree: 4 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 170 174 12 0 0 0 0 1 - void computeInverseDiagonalOperatorValues_macro_2D( real_t* RESTRICT _data_invDiag_Edge, - real_t* RESTRICT _data_invDiag_Vertex, - real_t* RESTRICT _data_kEdge, - real_t* RESTRICT _data_kVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; - /// Kernel type: computeInverseDiagonalOperatorValues + void computeInverseDiagonalOperatorValues_P2ElementwiseKMass_macro_2D( real_t* RESTRICT _data_invDiag_Edge, + real_t* RESTRICT _data_invDiag_Vertex, + real_t* RESTRICT _data_kEdge, + real_t* RESTRICT _data_kVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; + + /// Integral: P2ElementwiseKMass + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: computeInverseDiagonalOperatorValues + /// - loop strategy: SAWTOOTH /// - quadrature rule: Jaśkowiec-Sukumar 04 | points: 11, degree: 4 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 532 523 36 0 0 0 0 1 - void computeInverseDiagonalOperatorValues_macro_3D( real_t* RESTRICT _data_invDiag_Edge, - real_t* RESTRICT _data_invDiag_Vertex, - real_t* RESTRICT _data_kEdge, - real_t* RESTRICT _data_kVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; + void computeInverseDiagonalOperatorValues_P2ElementwiseKMass_macro_3D( real_t* RESTRICT _data_invDiag_Edge, + real_t* RESTRICT _data_invDiag_Vertex, + real_t* RESTRICT _data_kEdge, + real_t* RESTRICT _data_kVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; std::shared_ptr< P2Function< real_t > > invDiag_; P2Function< real_t > k; diff --git a/operators/k_mass/P2ElementwiseKMassAnnulusMap.cpp b/operators/k_mass/P2ElementwiseKMassAnnulusMap.cpp index dac8d5ff..d5318c89 100644 --- a/operators/k_mass/P2ElementwiseKMassAnnulusMap.cpp +++ b/operators/k_mass/P2ElementwiseKMassAnnulusMap.cpp @@ -149,7 +149,7 @@ void P2ElementwiseKMassAnnulusMap::apply( const P2Function< real_t >& src, this->timingTree_->start( "kernel" ); - apply_macro_2D( + apply_P2ElementwiseKMassAnnulusMap_macro_2D( _data_dstEdge, _data_dstVertex, @@ -173,6 +173,7 @@ void P2ElementwiseKMassAnnulusMap::apply( const P2Function< real_t >& src, refVertex_1, thrVertex_0, thrVertex_1 ); + this->timingTree_->stop( "kernel" ); } @@ -256,7 +257,7 @@ void P2ElementwiseKMassAnnulusMap::toMatrix( const std::shared_ptr< SparseMatrix this->timingTree_->start( "kernel" ); - toMatrix_macro_2D( + toMatrix_P2ElementwiseKMassAnnulusMap_macro_2D( _data_dstEdge, _data_dstVertex, @@ -281,6 +282,7 @@ void P2ElementwiseKMassAnnulusMap::toMatrix( const std::shared_ptr< SparseMatrix refVertex_1, thrVertex_0, thrVertex_1 ); + this->timingTree_->stop( "kernel" ); } } @@ -349,7 +351,7 @@ void P2ElementwiseKMassAnnulusMap::computeInverseDiagonalOperatorValues() this->timingTree_->start( "kernel" ); - computeInverseDiagonalOperatorValues_macro_2D( + computeInverseDiagonalOperatorValues_P2ElementwiseKMassAnnulusMap_macro_2D( _data_invDiag_Edge, _data_invDiag_Vertex, @@ -371,6 +373,7 @@ void P2ElementwiseKMassAnnulusMap::computeInverseDiagonalOperatorValues() refVertex_1, thrVertex_0, thrVertex_1 ); + this->timingTree_->stop( "kernel" ); } diff --git a/operators/k_mass/P2ElementwiseKMassAnnulusMap.hpp b/operators/k_mass/P2ElementwiseKMassAnnulusMap.hpp index 49ab3356..1964dced 100644 --- a/operators/k_mass/P2ElementwiseKMassAnnulusMap.hpp +++ b/operators/k_mass/P2ElementwiseKMassAnnulusMap.hpp @@ -29,6 +29,7 @@ #include "core/DataTypes.h" #include "hyteg/LikwidWrapper.hpp" +#include "hyteg/boundary/BoundaryConditions.hpp" #include "hyteg/communication/Syncing.hpp" #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp" #include "hyteg/geometry/AnnulusMap.hpp" @@ -37,6 +38,7 @@ #include "hyteg/primitivestorage/PrimitiveStorage.hpp" #include "hyteg/solvers/Smoothables.hpp" #include "hyteg/sparseassembly/SparseMatrixProxy.hpp" +#include "hyteg/types/types.hpp" #define FUNC_PREFIX @@ -83,89 +85,103 @@ class P2ElementwiseKMassAnnulusMap : public Operator< P2Function< real_t >, P2Fu protected: private: - /// Kernel type: apply + /// Integral: P2ElementwiseKMassAnnulusMap + /// - volume element: triangle, dim: 2, vertices: 3, spacedim: 2 + /// - kernel type: apply + /// - loop strategy: SAWTOOTH /// - quadrature rule: Dunavant 4 | points: 6, degree: 4 + /// - blending map: AnnulusMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 668 960 30 30 6 0 0 1 - void apply_macro_2D( real_t* RESTRICT _data_dstEdge, - real_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_kEdge, - real_t* RESTRICT _data_kVertex, - real_t* RESTRICT _data_srcEdge, - real_t* RESTRICT _data_srcVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t refVertex_0, - real_t refVertex_1, - real_t thrVertex_0, - real_t thrVertex_1 ) const; - /// Kernel type: toMatrix + void apply_P2ElementwiseKMassAnnulusMap_macro_2D( real_t* RESTRICT _data_dstEdge, + real_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_kEdge, + real_t* RESTRICT _data_kVertex, + real_t* RESTRICT _data_srcEdge, + real_t* RESTRICT _data_srcVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t refVertex_0, + real_t refVertex_1, + real_t thrVertex_0, + real_t thrVertex_1 ) const; + + /// Integral: P2ElementwiseKMassAnnulusMap + /// - volume element: triangle, dim: 2, vertices: 3, spacedim: 2 + /// - kernel type: toMatrix + /// - loop strategy: SAWTOOTH /// - quadrature rule: Dunavant 4 | points: 6, degree: 4 + /// - blending map: AnnulusMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 632 924 30 30 6 0 0 4 - void toMatrix_macro_2D( idx_t* RESTRICT _data_dstEdge, - idx_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_kEdge, - real_t* RESTRICT _data_kVertex, - idx_t* RESTRICT _data_srcEdge, - idx_t* RESTRICT _data_srcVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - std::shared_ptr< SparseMatrixProxy > mat, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t refVertex_0, - real_t refVertex_1, - real_t thrVertex_0, - real_t thrVertex_1 ) const; - /// Kernel type: computeInverseDiagonalOperatorValues + void toMatrix_P2ElementwiseKMassAnnulusMap_macro_2D( idx_t* RESTRICT _data_dstEdge, + idx_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_kEdge, + real_t* RESTRICT _data_kVertex, + idx_t* RESTRICT _data_srcEdge, + idx_t* RESTRICT _data_srcVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + std::shared_ptr< SparseMatrixProxy > mat, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t refVertex_0, + real_t refVertex_1, + real_t thrVertex_0, + real_t thrVertex_1 ) const; + + /// Integral: P2ElementwiseKMassAnnulusMap + /// - volume element: triangle, dim: 2, vertices: 3, spacedim: 2 + /// - kernel type: computeInverseDiagonalOperatorValues + /// - loop strategy: SAWTOOTH /// - quadrature rule: Dunavant 4 | points: 6, degree: 4 + /// - blending map: AnnulusMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 548 834 30 30 6 0 0 1 - void computeInverseDiagonalOperatorValues_macro_2D( real_t* RESTRICT _data_invDiag_Edge, - real_t* RESTRICT _data_invDiag_Vertex, - real_t* RESTRICT _data_kEdge, - real_t* RESTRICT _data_kVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t refVertex_0, - real_t refVertex_1, - real_t thrVertex_0, - real_t thrVertex_1 ) const; + void computeInverseDiagonalOperatorValues_P2ElementwiseKMassAnnulusMap_macro_2D( real_t* RESTRICT _data_invDiag_Edge, + real_t* RESTRICT _data_invDiag_Vertex, + real_t* RESTRICT _data_kEdge, + real_t* RESTRICT _data_kVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t refVertex_0, + real_t refVertex_1, + real_t thrVertex_0, + real_t thrVertex_1 ) const; std::shared_ptr< P2Function< real_t > > invDiag_; P2Function< real_t > k; diff --git a/operators/k_mass/P2ElementwiseKMassIcosahedralShellMap.cpp b/operators/k_mass/P2ElementwiseKMassIcosahedralShellMap.cpp index 929c124e..6dfcc2e2 100644 --- a/operators/k_mass/P2ElementwiseKMassIcosahedralShellMap.cpp +++ b/operators/k_mass/P2ElementwiseKMassIcosahedralShellMap.cpp @@ -153,7 +153,7 @@ void P2ElementwiseKMassIcosahedralShellMap::apply( const P2Function< real_t >& s this->timingTree_->start( "kernel" ); - apply_macro_3D( + apply_P2ElementwiseKMassIcosahedralShellMap_macro_3D( _data_dstEdge, _data_dstVertex, @@ -189,6 +189,7 @@ void P2ElementwiseKMassIcosahedralShellMap::apply( const P2Function< real_t >& s thrVertex_0, thrVertex_1, thrVertex_2 ); + this->timingTree_->stop( "kernel" ); } @@ -284,7 +285,7 @@ void P2ElementwiseKMassIcosahedralShellMap::toMatrix( const std::shared_ptr< Spa this->timingTree_->start( "kernel" ); - toMatrix_macro_3D( + toMatrix_P2ElementwiseKMassIcosahedralShellMap_macro_3D( _data_dstEdge, _data_dstVertex, @@ -321,6 +322,7 @@ void P2ElementwiseKMassIcosahedralShellMap::toMatrix( const std::shared_ptr< Spa thrVertex_0, thrVertex_1, thrVertex_2 ); + this->timingTree_->stop( "kernel" ); } } @@ -400,7 +402,7 @@ void P2ElementwiseKMassIcosahedralShellMap::computeInverseDiagonalOperatorValues this->timingTree_->start( "kernel" ); - computeInverseDiagonalOperatorValues_macro_3D( + computeInverseDiagonalOperatorValues_P2ElementwiseKMassIcosahedralShellMap_macro_3D( _data_invDiag_Edge, _data_invDiag_Vertex, @@ -434,6 +436,7 @@ void P2ElementwiseKMassIcosahedralShellMap::computeInverseDiagonalOperatorValues thrVertex_0, thrVertex_1, thrVertex_2 ); + this->timingTree_->stop( "kernel" ); } diff --git a/operators/k_mass/P2ElementwiseKMassIcosahedralShellMap.hpp b/operators/k_mass/P2ElementwiseKMassIcosahedralShellMap.hpp index 00dddcfe..8dfcbe8a 100644 --- a/operators/k_mass/P2ElementwiseKMassIcosahedralShellMap.hpp +++ b/operators/k_mass/P2ElementwiseKMassIcosahedralShellMap.hpp @@ -29,6 +29,7 @@ #include "core/DataTypes.h" #include "hyteg/LikwidWrapper.hpp" +#include "hyteg/boundary/BoundaryConditions.hpp" #include "hyteg/communication/Syncing.hpp" #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp" #include "hyteg/geometry/IcosahedralShellMap.hpp" @@ -37,6 +38,7 @@ #include "hyteg/primitivestorage/PrimitiveStorage.hpp" #include "hyteg/solvers/Smoothables.hpp" #include "hyteg/sparseassembly/SparseMatrixProxy.hpp" +#include "hyteg/types/types.hpp" #define FUNC_PREFIX @@ -83,125 +85,140 @@ class P2ElementwiseKMassIcosahedralShellMap : public Operator< P2Function< real_ protected: private: - /// Kernel type: apply + /// Integral: P2ElementwiseKMassIcosahedralShellMap + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: apply + /// - loop strategy: SAWTOOTH /// - quadrature rule: Jaśkowiec-Sukumar 04 | points: 11, degree: 4 + /// - blending map: IcosahedralShellMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 2690 4154 69 33 11 0 0 1 - void apply_macro_3D( real_t* RESTRICT _data_dstEdge, - real_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_kEdge, - real_t* RESTRICT _data_kVertex, - real_t* RESTRICT _data_srcEdge, - real_t* RESTRICT _data_srcVertex, - real_t forVertex_0, - real_t forVertex_1, - real_t forVertex_2, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t rayVertex_2, - real_t refVertex_0, - real_t refVertex_1, - real_t refVertex_2, - real_t thrVertex_0, - real_t thrVertex_1, - real_t thrVertex_2 ) const; - /// Kernel type: toMatrix + void apply_P2ElementwiseKMassIcosahedralShellMap_macro_3D( real_t* RESTRICT _data_dstEdge, + real_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_kEdge, + real_t* RESTRICT _data_kVertex, + real_t* RESTRICT _data_srcEdge, + real_t* RESTRICT _data_srcVertex, + real_t forVertex_0, + real_t forVertex_1, + real_t forVertex_2, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t rayVertex_2, + real_t refVertex_0, + real_t refVertex_1, + real_t refVertex_2, + real_t thrVertex_0, + real_t thrVertex_1, + real_t thrVertex_2 ) const; + + /// Integral: P2ElementwiseKMassIcosahedralShellMap + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: toMatrix + /// - loop strategy: SAWTOOTH /// - quadrature rule: Jaśkowiec-Sukumar 04 | points: 11, degree: 4 + /// - blending map: IcosahedralShellMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 2590 4054 69 33 11 0 0 4 - void toMatrix_macro_3D( idx_t* RESTRICT _data_dstEdge, - idx_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_kEdge, - real_t* RESTRICT _data_kVertex, - idx_t* RESTRICT _data_srcEdge, - idx_t* RESTRICT _data_srcVertex, - real_t forVertex_0, - real_t forVertex_1, - real_t forVertex_2, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - std::shared_ptr< SparseMatrixProxy > mat, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t rayVertex_2, - real_t refVertex_0, - real_t refVertex_1, - real_t refVertex_2, - real_t thrVertex_0, - real_t thrVertex_1, - real_t thrVertex_2 ) const; - /// Kernel type: computeInverseDiagonalOperatorValues + void toMatrix_P2ElementwiseKMassIcosahedralShellMap_macro_3D( idx_t* RESTRICT _data_dstEdge, + idx_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_kEdge, + real_t* RESTRICT _data_kVertex, + idx_t* RESTRICT _data_srcEdge, + idx_t* RESTRICT _data_srcVertex, + real_t forVertex_0, + real_t forVertex_1, + real_t forVertex_2, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + std::shared_ptr< SparseMatrixProxy > mat, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t rayVertex_2, + real_t refVertex_0, + real_t refVertex_1, + real_t refVertex_2, + real_t thrVertex_0, + real_t thrVertex_1, + real_t thrVertex_2 ) const; + + /// Integral: P2ElementwiseKMassIcosahedralShellMap + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: computeInverseDiagonalOperatorValues + /// - loop strategy: SAWTOOTH /// - quadrature rule: Jaśkowiec-Sukumar 04 | points: 11, degree: 4 + /// - blending map: IcosahedralShellMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 2105 3559 69 33 11 0 0 1 - void computeInverseDiagonalOperatorValues_macro_3D( real_t* RESTRICT _data_invDiag_Edge, - real_t* RESTRICT _data_invDiag_Vertex, - real_t* RESTRICT _data_kEdge, - real_t* RESTRICT _data_kVertex, - real_t forVertex_0, - real_t forVertex_1, - real_t forVertex_2, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t rayVertex_2, - real_t refVertex_0, - real_t refVertex_1, - real_t refVertex_2, - real_t thrVertex_0, - real_t thrVertex_1, - real_t thrVertex_2 ) const; + void computeInverseDiagonalOperatorValues_P2ElementwiseKMassIcosahedralShellMap_macro_3D( + real_t* RESTRICT _data_invDiag_Edge, + real_t* RESTRICT _data_invDiag_Vertex, + real_t* RESTRICT _data_kEdge, + real_t* RESTRICT _data_kVertex, + real_t forVertex_0, + real_t forVertex_1, + real_t forVertex_2, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t rayVertex_2, + real_t refVertex_0, + real_t refVertex_1, + real_t refVertex_2, + real_t thrVertex_0, + real_t thrVertex_1, + real_t thrVertex_2 ) const; std::shared_ptr< P2Function< real_t > > invDiag_; P2Function< real_t > k; diff --git a/operators/k_mass/P2ToP1ElementwiseKMass.cpp b/operators/k_mass/P2ToP1ElementwiseKMass.cpp index f5cfd0f3..f445349c 100644 --- a/operators/k_mass/P2ToP1ElementwiseKMass.cpp +++ b/operators/k_mass/P2ToP1ElementwiseKMass.cpp @@ -135,7 +135,7 @@ void P2ToP1ElementwiseKMass::apply( const P2Function< real_t >& src, this->timingTree_->start( "kernel" ); - apply_macro_3D( + apply_P2ToP1ElementwiseKMass_macro_3D( _data_dst, _data_kEdge, @@ -156,6 +156,7 @@ void P2ToP1ElementwiseKMass::apply( const P2Function< real_t >& src, macro_vertex_coord_id_3comp2, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } @@ -206,7 +207,7 @@ void P2ToP1ElementwiseKMass::apply( const P2Function< real_t >& src, this->timingTree_->start( "kernel" ); - apply_macro_2D( + apply_P2ToP1ElementwiseKMass_macro_2D( _data_dst, _data_kEdge, @@ -221,6 +222,7 @@ void P2ToP1ElementwiseKMass::apply( const P2Function< real_t >& src, macro_vertex_coord_id_2comp1, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } @@ -286,7 +288,7 @@ void P2ToP1ElementwiseKMass::toMatrix( const std::shared_ptr< SparseMatrixProxy this->timingTree_->start( "kernel" ); - toMatrix_macro_3D( + toMatrix_P2ToP1ElementwiseKMass_macro_3D( _data_dst, _data_kEdge, @@ -308,6 +310,7 @@ void P2ToP1ElementwiseKMass::toMatrix( const std::shared_ptr< SparseMatrixProxy mat, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } } @@ -339,7 +342,7 @@ void P2ToP1ElementwiseKMass::toMatrix( const std::shared_ptr< SparseMatrixProxy this->timingTree_->start( "kernel" ); - toMatrix_macro_2D( + toMatrix_P2ToP1ElementwiseKMass_macro_2D( _data_dst, _data_kEdge, @@ -355,6 +358,7 @@ void P2ToP1ElementwiseKMass::toMatrix( const std::shared_ptr< SparseMatrixProxy mat, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } } diff --git a/operators/k_mass/P2ToP1ElementwiseKMass.hpp b/operators/k_mass/P2ToP1ElementwiseKMass.hpp index b30f8512..aa6a35f2 100644 --- a/operators/k_mass/P2ToP1ElementwiseKMass.hpp +++ b/operators/k_mass/P2ToP1ElementwiseKMass.hpp @@ -29,6 +29,7 @@ #include "core/DataTypes.h" #include "hyteg/LikwidWrapper.hpp" +#include "hyteg/boundary/BoundaryConditions.hpp" #include "hyteg/communication/Syncing.hpp" #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp" #include "hyteg/operators/Operator.hpp" @@ -36,6 +37,7 @@ #include "hyteg/p2functionspace/P2Function.hpp" #include "hyteg/primitivestorage/PrimitiveStorage.hpp" #include "hyteg/sparseassembly/SparseMatrixProxy.hpp" +#include "hyteg/types/types.hpp" #define FUNC_PREFIX @@ -77,96 +79,115 @@ class P2ToP1ElementwiseKMass : public Operator< P2Function< real_t >, P1Function protected: private: - /// Kernel type: apply + /// Integral: P2ToP1ElementwiseKMass + /// - volume element: triangle, dim: 2, vertices: 3, spacedim: 2 + /// - kernel type: apply + /// - loop strategy: SAWTOOTH /// - quadrature rule: Dunavant 4 | points: 6, degree: 4 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 254 264 12 0 0 0 0 1 - void apply_macro_2D( real_t* RESTRICT _data_dst, - real_t* RESTRICT _data_kEdge, - real_t* RESTRICT _data_kVertex, - real_t* RESTRICT _data_srcEdge, - real_t* RESTRICT _data_srcVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; - /// Kernel type: apply + void apply_P2ToP1ElementwiseKMass_macro_2D( real_t* RESTRICT _data_dst, + real_t* RESTRICT _data_kEdge, + real_t* RESTRICT _data_kVertex, + real_t* RESTRICT _data_srcEdge, + real_t* RESTRICT _data_srcVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; + + /// Integral: P2ToP1ElementwiseKMass + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: apply + /// - loop strategy: SAWTOOTH /// - quadrature rule: Jaśkowiec-Sukumar 04 | points: 11, degree: 4 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 892 871 36 0 0 0 0 1 - void apply_macro_3D( real_t* RESTRICT _data_dst, - real_t* RESTRICT _data_kEdge, - real_t* RESTRICT _data_kVertex, - real_t* RESTRICT _data_srcEdge, - real_t* RESTRICT _data_srcVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; - /// Kernel type: toMatrix + void apply_P2ToP1ElementwiseKMass_macro_3D( real_t* RESTRICT _data_dst, + real_t* RESTRICT _data_kEdge, + real_t* RESTRICT _data_kVertex, + real_t* RESTRICT _data_srcEdge, + real_t* RESTRICT _data_srcVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; + + /// Integral: P2ToP1ElementwiseKMass + /// - volume element: triangle, dim: 2, vertices: 3, spacedim: 2 + /// - kernel type: toMatrix + /// - loop strategy: SAWTOOTH /// - quadrature rule: Dunavant 4 | points: 6, degree: 4 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 236 246 12 0 0 0 0 4 - void toMatrix_macro_2D( idx_t* RESTRICT _data_dst, - real_t* RESTRICT _data_kEdge, - real_t* RESTRICT _data_kVertex, - idx_t* RESTRICT _data_srcEdge, - idx_t* RESTRICT _data_srcVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - std::shared_ptr< SparseMatrixProxy > mat, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; - /// Kernel type: toMatrix + void toMatrix_P2ToP1ElementwiseKMass_macro_2D( idx_t* RESTRICT _data_dst, + real_t* RESTRICT _data_kEdge, + real_t* RESTRICT _data_kVertex, + idx_t* RESTRICT _data_srcEdge, + idx_t* RESTRICT _data_srcVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + std::shared_ptr< SparseMatrixProxy > mat, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; + + /// Integral: P2ToP1ElementwiseKMass + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: toMatrix + /// - loop strategy: SAWTOOTH /// - quadrature rule: Jaśkowiec-Sukumar 04 | points: 11, degree: 4 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 852 831 36 0 0 0 0 4 - void toMatrix_macro_3D( idx_t* RESTRICT _data_dst, - real_t* RESTRICT _data_kEdge, - real_t* RESTRICT _data_kVertex, - idx_t* RESTRICT _data_srcEdge, - idx_t* RESTRICT _data_srcVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - std::shared_ptr< SparseMatrixProxy > mat, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; + void toMatrix_P2ToP1ElementwiseKMass_macro_3D( idx_t* RESTRICT _data_dst, + real_t* RESTRICT _data_kEdge, + real_t* RESTRICT _data_kVertex, + idx_t* RESTRICT _data_srcEdge, + idx_t* RESTRICT _data_srcVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + std::shared_ptr< SparseMatrixProxy > mat, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; P2Function< real_t > k; }; diff --git a/operators/k_mass/P2ToP1ElementwiseKMassAnnulusMap.cpp b/operators/k_mass/P2ToP1ElementwiseKMassAnnulusMap.cpp index d9170035..a250d3cb 100644 --- a/operators/k_mass/P2ToP1ElementwiseKMassAnnulusMap.cpp +++ b/operators/k_mass/P2ToP1ElementwiseKMassAnnulusMap.cpp @@ -137,7 +137,7 @@ void P2ToP1ElementwiseKMassAnnulusMap::apply( const P2Function< real_t >& src, this->timingTree_->start( "kernel" ); - apply_macro_2D( + apply_P2ToP1ElementwiseKMassAnnulusMap_macro_2D( _data_dst, _data_kEdge, @@ -160,6 +160,7 @@ void P2ToP1ElementwiseKMassAnnulusMap::apply( const P2Function< real_t >& src, refVertex_1, thrVertex_0, thrVertex_1 ); + this->timingTree_->stop( "kernel" ); } @@ -238,7 +239,7 @@ void P2ToP1ElementwiseKMassAnnulusMap::toMatrix( const std::shared_ptr< SparseMa this->timingTree_->start( "kernel" ); - toMatrix_macro_2D( + toMatrix_P2ToP1ElementwiseKMassAnnulusMap_macro_2D( _data_dst, _data_kEdge, @@ -262,6 +263,7 @@ void P2ToP1ElementwiseKMassAnnulusMap::toMatrix( const std::shared_ptr< SparseMa refVertex_1, thrVertex_0, thrVertex_1 ); + this->timingTree_->stop( "kernel" ); } } diff --git a/operators/k_mass/P2ToP1ElementwiseKMassAnnulusMap.hpp b/operators/k_mass/P2ToP1ElementwiseKMassAnnulusMap.hpp index 8d34dcff..3222cf1b 100644 --- a/operators/k_mass/P2ToP1ElementwiseKMassAnnulusMap.hpp +++ b/operators/k_mass/P2ToP1ElementwiseKMassAnnulusMap.hpp @@ -29,6 +29,7 @@ #include "core/DataTypes.h" #include "hyteg/LikwidWrapper.hpp" +#include "hyteg/boundary/BoundaryConditions.hpp" #include "hyteg/communication/Syncing.hpp" #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp" #include "hyteg/geometry/AnnulusMap.hpp" @@ -37,6 +38,7 @@ #include "hyteg/p2functionspace/P2Function.hpp" #include "hyteg/primitivestorage/PrimitiveStorage.hpp" #include "hyteg/sparseassembly/SparseMatrixProxy.hpp" +#include "hyteg/types/types.hpp" #define FUNC_PREFIX @@ -78,61 +80,70 @@ class P2ToP1ElementwiseKMassAnnulusMap : public Operator< P2Function< real_t >, protected: private: - /// Kernel type: apply + /// Integral: P2ToP1ElementwiseKMassAnnulusMap + /// - volume element: triangle, dim: 2, vertices: 3, spacedim: 2 + /// - kernel type: apply + /// - loop strategy: SAWTOOTH /// - quadrature rule: Dunavant 4 | points: 6, degree: 4 + /// - blending map: AnnulusMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 632 924 30 30 6 0 0 1 - void apply_macro_2D( real_t* RESTRICT _data_dst, - real_t* RESTRICT _data_kEdge, - real_t* RESTRICT _data_kVertex, - real_t* RESTRICT _data_srcEdge, - real_t* RESTRICT _data_srcVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t refVertex_0, - real_t refVertex_1, - real_t thrVertex_0, - real_t thrVertex_1 ) const; - /// Kernel type: toMatrix + void apply_P2ToP1ElementwiseKMassAnnulusMap_macro_2D( real_t* RESTRICT _data_dst, + real_t* RESTRICT _data_kEdge, + real_t* RESTRICT _data_kVertex, + real_t* RESTRICT _data_srcEdge, + real_t* RESTRICT _data_srcVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t refVertex_0, + real_t refVertex_1, + real_t thrVertex_0, + real_t thrVertex_1 ) const; + + /// Integral: P2ToP1ElementwiseKMassAnnulusMap + /// - volume element: triangle, dim: 2, vertices: 3, spacedim: 2 + /// - kernel type: toMatrix + /// - loop strategy: SAWTOOTH /// - quadrature rule: Dunavant 4 | points: 6, degree: 4 + /// - blending map: AnnulusMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 614 906 30 30 6 0 0 4 - void toMatrix_macro_2D( idx_t* RESTRICT _data_dst, - real_t* RESTRICT _data_kEdge, - real_t* RESTRICT _data_kVertex, - idx_t* RESTRICT _data_srcEdge, - idx_t* RESTRICT _data_srcVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - std::shared_ptr< SparseMatrixProxy > mat, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t refVertex_0, - real_t refVertex_1, - real_t thrVertex_0, - real_t thrVertex_1 ) const; + void toMatrix_P2ToP1ElementwiseKMassAnnulusMap_macro_2D( idx_t* RESTRICT _data_dst, + real_t* RESTRICT _data_kEdge, + real_t* RESTRICT _data_kVertex, + idx_t* RESTRICT _data_srcEdge, + idx_t* RESTRICT _data_srcVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + std::shared_ptr< SparseMatrixProxy > mat, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t refVertex_0, + real_t refVertex_1, + real_t thrVertex_0, + real_t thrVertex_1 ) const; P2Function< real_t > k; }; diff --git a/operators/k_mass/P2ToP1ElementwiseKMassIcosahedralShellMap.cpp b/operators/k_mass/P2ToP1ElementwiseKMassIcosahedralShellMap.cpp index 027f78ad..ecb5c90d 100644 --- a/operators/k_mass/P2ToP1ElementwiseKMassIcosahedralShellMap.cpp +++ b/operators/k_mass/P2ToP1ElementwiseKMassIcosahedralShellMap.cpp @@ -152,7 +152,7 @@ void P2ToP1ElementwiseKMassIcosahedralShellMap::apply( const P2Function< real_t this->timingTree_->start( "kernel" ); - apply_macro_3D( + apply_P2ToP1ElementwiseKMassIcosahedralShellMap_macro_3D( _data_dst, _data_kEdge, @@ -187,6 +187,7 @@ void P2ToP1ElementwiseKMassIcosahedralShellMap::apply( const P2Function< real_t thrVertex_0, thrVertex_1, thrVertex_2 ); + this->timingTree_->stop( "kernel" ); } @@ -274,7 +275,7 @@ void P2ToP1ElementwiseKMassIcosahedralShellMap::toMatrix( const std::shared_ptr< this->timingTree_->start( "kernel" ); - toMatrix_macro_3D( + toMatrix_P2ToP1ElementwiseKMassIcosahedralShellMap_macro_3D( _data_dst, _data_kEdge, @@ -310,6 +311,7 @@ void P2ToP1ElementwiseKMassIcosahedralShellMap::toMatrix( const std::shared_ptr< thrVertex_0, thrVertex_1, thrVertex_2 ); + this->timingTree_->stop( "kernel" ); } } diff --git a/operators/k_mass/P2ToP1ElementwiseKMassIcosahedralShellMap.hpp b/operators/k_mass/P2ToP1ElementwiseKMassIcosahedralShellMap.hpp index 7b5a9afd..3bac6117 100644 --- a/operators/k_mass/P2ToP1ElementwiseKMassIcosahedralShellMap.hpp +++ b/operators/k_mass/P2ToP1ElementwiseKMassIcosahedralShellMap.hpp @@ -29,6 +29,7 @@ #include "core/DataTypes.h" #include "hyteg/LikwidWrapper.hpp" +#include "hyteg/boundary/BoundaryConditions.hpp" #include "hyteg/communication/Syncing.hpp" #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp" #include "hyteg/geometry/IcosahedralShellMap.hpp" @@ -37,6 +38,7 @@ #include "hyteg/p2functionspace/P2Function.hpp" #include "hyteg/primitivestorage/PrimitiveStorage.hpp" #include "hyteg/sparseassembly/SparseMatrixProxy.hpp" +#include "hyteg/types/types.hpp" #define FUNC_PREFIX @@ -78,85 +80,94 @@ class P2ToP1ElementwiseKMassIcosahedralShellMap : public Operator< P2Function< r protected: private: - /// Kernel type: apply + /// Integral: P2ToP1ElementwiseKMassIcosahedralShellMap + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: apply + /// - loop strategy: SAWTOOTH /// - quadrature rule: Jaśkowiec-Sukumar 04 | points: 11, degree: 4 + /// - blending map: IcosahedralShellMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 2465 3907 69 33 11 0 0 1 - void apply_macro_3D( real_t* RESTRICT _data_dst, - real_t* RESTRICT _data_kEdge, - real_t* RESTRICT _data_kVertex, - real_t* RESTRICT _data_srcEdge, - real_t* RESTRICT _data_srcVertex, - real_t forVertex_0, - real_t forVertex_1, - real_t forVertex_2, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t rayVertex_2, - real_t refVertex_0, - real_t refVertex_1, - real_t refVertex_2, - real_t thrVertex_0, - real_t thrVertex_1, - real_t thrVertex_2 ) const; - /// Kernel type: toMatrix + void apply_P2ToP1ElementwiseKMassIcosahedralShellMap_macro_3D( real_t* RESTRICT _data_dst, + real_t* RESTRICT _data_kEdge, + real_t* RESTRICT _data_kVertex, + real_t* RESTRICT _data_srcEdge, + real_t* RESTRICT _data_srcVertex, + real_t forVertex_0, + real_t forVertex_1, + real_t forVertex_2, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t rayVertex_2, + real_t refVertex_0, + real_t refVertex_1, + real_t refVertex_2, + real_t thrVertex_0, + real_t thrVertex_1, + real_t thrVertex_2 ) const; + + /// Integral: P2ToP1ElementwiseKMassIcosahedralShellMap + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: toMatrix + /// - loop strategy: SAWTOOTH /// - quadrature rule: Jaśkowiec-Sukumar 04 | points: 11, degree: 4 + /// - blending map: IcosahedralShellMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 2425 3867 69 33 11 0 0 4 - void toMatrix_macro_3D( idx_t* RESTRICT _data_dst, - real_t* RESTRICT _data_kEdge, - real_t* RESTRICT _data_kVertex, - idx_t* RESTRICT _data_srcEdge, - idx_t* RESTRICT _data_srcVertex, - real_t forVertex_0, - real_t forVertex_1, - real_t forVertex_2, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - std::shared_ptr< SparseMatrixProxy > mat, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t rayVertex_2, - real_t refVertex_0, - real_t refVertex_1, - real_t refVertex_2, - real_t thrVertex_0, - real_t thrVertex_1, - real_t thrVertex_2 ) const; + void toMatrix_P2ToP1ElementwiseKMassIcosahedralShellMap_macro_3D( idx_t* RESTRICT _data_dst, + real_t* RESTRICT _data_kEdge, + real_t* RESTRICT _data_kVertex, + idx_t* RESTRICT _data_srcEdge, + idx_t* RESTRICT _data_srcVertex, + real_t forVertex_0, + real_t forVertex_1, + real_t forVertex_2, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + std::shared_ptr< SparseMatrixProxy > mat, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t rayVertex_2, + real_t refVertex_0, + real_t refVertex_1, + real_t refVertex_2, + real_t thrVertex_0, + real_t thrVertex_1, + real_t thrVertex_2 ) const; P2Function< real_t > k; }; diff --git a/operators/k_mass/avx/P1ElementwiseKMassAnnulusMap_apply_macro_2D.cpp b/operators/k_mass/avx/P1ElementwiseKMassAnnulusMap_apply_P1ElementwiseKMassAnnulusMap_macro_2D.cpp similarity index 99% rename from operators/k_mass/avx/P1ElementwiseKMassAnnulusMap_apply_macro_2D.cpp rename to operators/k_mass/avx/P1ElementwiseKMassAnnulusMap_apply_P1ElementwiseKMassAnnulusMap_macro_2D.cpp index c776dd6f..81ee593a 100644 --- a/operators/k_mass/avx/P1ElementwiseKMassAnnulusMap_apply_macro_2D.cpp +++ b/operators/k_mass/avx/P1ElementwiseKMassAnnulusMap_apply_P1ElementwiseKMassAnnulusMap_macro_2D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P1ElementwiseKMassAnnulusMap::apply_macro_2D( real_t * RESTRICT _data_dst, real_t * RESTRICT _data_k, real_t * RESTRICT _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const +void P1ElementwiseKMassAnnulusMap::apply_P1ElementwiseKMassAnnulusMap_macro_2D( real_t * RESTRICT _data_dst, real_t * RESTRICT _data_k, real_t * RESTRICT _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const { { const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669}; diff --git a/operators/k_mass/avx/P1ElementwiseKMassAnnulusMap_computeInverseDiagonalOperatorValues_macro_2D.cpp b/operators/k_mass/avx/P1ElementwiseKMassAnnulusMap_computeInverseDiagonalOperatorValues_P1ElementwiseKMassAnnulusMap_macro_2D.cpp similarity index 99% rename from operators/k_mass/avx/P1ElementwiseKMassAnnulusMap_computeInverseDiagonalOperatorValues_macro_2D.cpp rename to operators/k_mass/avx/P1ElementwiseKMassAnnulusMap_computeInverseDiagonalOperatorValues_P1ElementwiseKMassAnnulusMap_macro_2D.cpp index cf1f1cf6..e5b33a9b 100644 --- a/operators/k_mass/avx/P1ElementwiseKMassAnnulusMap_computeInverseDiagonalOperatorValues_macro_2D.cpp +++ b/operators/k_mass/avx/P1ElementwiseKMassAnnulusMap_computeInverseDiagonalOperatorValues_P1ElementwiseKMassAnnulusMap_macro_2D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P1ElementwiseKMassAnnulusMap::computeInverseDiagonalOperatorValues_macro_2D( real_t * RESTRICT _data_invDiag_, real_t * RESTRICT _data_k, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const +void P1ElementwiseKMassAnnulusMap::computeInverseDiagonalOperatorValues_P1ElementwiseKMassAnnulusMap_macro_2D( real_t * RESTRICT _data_invDiag_, real_t * RESTRICT _data_k, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const { { const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669}; diff --git a/operators/k_mass/avx/P1ElementwiseKMassIcosahedralShellMap_apply_macro_3D.cpp b/operators/k_mass/avx/P1ElementwiseKMassIcosahedralShellMap_apply_P1ElementwiseKMassIcosahedralShellMap_macro_3D.cpp similarity index 99% rename from operators/k_mass/avx/P1ElementwiseKMassIcosahedralShellMap_apply_macro_3D.cpp rename to operators/k_mass/avx/P1ElementwiseKMassIcosahedralShellMap_apply_P1ElementwiseKMassIcosahedralShellMap_macro_3D.cpp index 7b8617a3..af403e77 100644 --- a/operators/k_mass/avx/P1ElementwiseKMassIcosahedralShellMap_apply_macro_3D.cpp +++ b/operators/k_mass/avx/P1ElementwiseKMassIcosahedralShellMap_apply_P1ElementwiseKMassIcosahedralShellMap_macro_3D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P1ElementwiseKMassIcosahedralShellMap::apply_macro_3D( real_t * RESTRICT _data_dst, real_t * RESTRICT _data_k, real_t * RESTRICT _data_src, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const +void P1ElementwiseKMassIcosahedralShellMap::apply_P1ElementwiseKMassIcosahedralShellMap_macro_3D( real_t * RESTRICT _data_dst, real_t * RESTRICT _data_k, real_t * RESTRICT _data_src, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const { { const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983}; diff --git a/operators/k_mass/avx/P1ElementwiseKMassIcosahedralShellMap_computeInverseDiagonalOperatorValues_macro_3D.cpp b/operators/k_mass/avx/P1ElementwiseKMassIcosahedralShellMap_computeInverseDiagonalOperatorValues_P1ElementwiseKMassIcosahedralShellMap_macro_3D.cpp similarity index 99% rename from operators/k_mass/avx/P1ElementwiseKMassIcosahedralShellMap_computeInverseDiagonalOperatorValues_macro_3D.cpp rename to operators/k_mass/avx/P1ElementwiseKMassIcosahedralShellMap_computeInverseDiagonalOperatorValues_P1ElementwiseKMassIcosahedralShellMap_macro_3D.cpp index 6089632c..d4047efb 100644 --- a/operators/k_mass/avx/P1ElementwiseKMassIcosahedralShellMap_computeInverseDiagonalOperatorValues_macro_3D.cpp +++ b/operators/k_mass/avx/P1ElementwiseKMassIcosahedralShellMap_computeInverseDiagonalOperatorValues_P1ElementwiseKMassIcosahedralShellMap_macro_3D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P1ElementwiseKMassIcosahedralShellMap::computeInverseDiagonalOperatorValues_macro_3D( real_t * RESTRICT _data_invDiag_, real_t * RESTRICT _data_k, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const +void P1ElementwiseKMassIcosahedralShellMap::computeInverseDiagonalOperatorValues_P1ElementwiseKMassIcosahedralShellMap_macro_3D( real_t * RESTRICT _data_invDiag_, real_t * RESTRICT _data_k, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const { { const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983}; diff --git a/operators/k_mass/avx/P1ElementwiseKMass_apply_macro_2D.cpp b/operators/k_mass/avx/P1ElementwiseKMass_apply_P1ElementwiseKMass_macro_2D.cpp similarity index 99% rename from operators/k_mass/avx/P1ElementwiseKMass_apply_macro_2D.cpp rename to operators/k_mass/avx/P1ElementwiseKMass_apply_P1ElementwiseKMass_macro_2D.cpp index 0a8b0754..3b788b47 100644 --- a/operators/k_mass/avx/P1ElementwiseKMass_apply_macro_2D.cpp +++ b/operators/k_mass/avx/P1ElementwiseKMass_apply_P1ElementwiseKMass_macro_2D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P1ElementwiseKMass::apply_macro_2D( real_t * RESTRICT _data_dst, real_t * RESTRICT _data_k, real_t * RESTRICT _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P1ElementwiseKMass::apply_P1ElementwiseKMass_macro_2D( real_t * RESTRICT _data_dst, real_t * RESTRICT _data_k, real_t * RESTRICT _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669}; diff --git a/operators/k_mass/avx/P1ElementwiseKMass_apply_macro_3D.cpp b/operators/k_mass/avx/P1ElementwiseKMass_apply_P1ElementwiseKMass_macro_3D.cpp similarity index 99% rename from operators/k_mass/avx/P1ElementwiseKMass_apply_macro_3D.cpp rename to operators/k_mass/avx/P1ElementwiseKMass_apply_P1ElementwiseKMass_macro_3D.cpp index 5ccb6846..8960390a 100644 --- a/operators/k_mass/avx/P1ElementwiseKMass_apply_macro_3D.cpp +++ b/operators/k_mass/avx/P1ElementwiseKMass_apply_P1ElementwiseKMass_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P1ElementwiseKMass::apply_macro_3D( real_t * RESTRICT _data_dst, real_t * RESTRICT _data_k, real_t * RESTRICT _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P1ElementwiseKMass::apply_P1ElementwiseKMass_macro_3D( real_t * RESTRICT _data_dst, real_t * RESTRICT _data_k, real_t * RESTRICT _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983}; diff --git a/operators/k_mass/avx/P1ElementwiseKMass_computeInverseDiagonalOperatorValues_macro_2D.cpp b/operators/k_mass/avx/P1ElementwiseKMass_computeInverseDiagonalOperatorValues_P1ElementwiseKMass_macro_2D.cpp similarity index 99% rename from operators/k_mass/avx/P1ElementwiseKMass_computeInverseDiagonalOperatorValues_macro_2D.cpp rename to operators/k_mass/avx/P1ElementwiseKMass_computeInverseDiagonalOperatorValues_P1ElementwiseKMass_macro_2D.cpp index 745e3a50..41ce9ee4 100644 --- a/operators/k_mass/avx/P1ElementwiseKMass_computeInverseDiagonalOperatorValues_macro_2D.cpp +++ b/operators/k_mass/avx/P1ElementwiseKMass_computeInverseDiagonalOperatorValues_P1ElementwiseKMass_macro_2D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P1ElementwiseKMass::computeInverseDiagonalOperatorValues_macro_2D( real_t * RESTRICT _data_invDiag_, real_t * RESTRICT _data_k, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P1ElementwiseKMass::computeInverseDiagonalOperatorValues_P1ElementwiseKMass_macro_2D( real_t * RESTRICT _data_invDiag_, real_t * RESTRICT _data_k, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669}; diff --git a/operators/k_mass/avx/P1ElementwiseKMass_computeInverseDiagonalOperatorValues_macro_3D.cpp b/operators/k_mass/avx/P1ElementwiseKMass_computeInverseDiagonalOperatorValues_P1ElementwiseKMass_macro_3D.cpp similarity index 99% rename from operators/k_mass/avx/P1ElementwiseKMass_computeInverseDiagonalOperatorValues_macro_3D.cpp rename to operators/k_mass/avx/P1ElementwiseKMass_computeInverseDiagonalOperatorValues_P1ElementwiseKMass_macro_3D.cpp index 91886fb4..4a8be4ff 100644 --- a/operators/k_mass/avx/P1ElementwiseKMass_computeInverseDiagonalOperatorValues_macro_3D.cpp +++ b/operators/k_mass/avx/P1ElementwiseKMass_computeInverseDiagonalOperatorValues_P1ElementwiseKMass_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P1ElementwiseKMass::computeInverseDiagonalOperatorValues_macro_3D( real_t * RESTRICT _data_invDiag_, real_t * RESTRICT _data_k, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P1ElementwiseKMass::computeInverseDiagonalOperatorValues_P1ElementwiseKMass_macro_3D( real_t * RESTRICT _data_invDiag_, real_t * RESTRICT _data_k, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983}; diff --git a/operators/k_mass/avx/P2ElementwiseKMassAnnulusMap_apply_macro_2D.cpp b/operators/k_mass/avx/P2ElementwiseKMassAnnulusMap_apply_P2ElementwiseKMassAnnulusMap_macro_2D.cpp similarity index 99% rename from operators/k_mass/avx/P2ElementwiseKMassAnnulusMap_apply_macro_2D.cpp rename to operators/k_mass/avx/P2ElementwiseKMassAnnulusMap_apply_P2ElementwiseKMassAnnulusMap_macro_2D.cpp index c8eeaf5a..e2a9f03d 100644 --- a/operators/k_mass/avx/P2ElementwiseKMassAnnulusMap_apply_macro_2D.cpp +++ b/operators/k_mass/avx/P2ElementwiseKMassAnnulusMap_apply_P2ElementwiseKMassAnnulusMap_macro_2D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseKMassAnnulusMap::apply_macro_2D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_kEdge, real_t * RESTRICT _data_kVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const +void P2ElementwiseKMassAnnulusMap::apply_P2ElementwiseKMassAnnulusMap_macro_2D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_kEdge, real_t * RESTRICT _data_kVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const { { const real_t _data_q_w [] = {0.11169079483900581, 0.054975871827660949, 0.11169079483900581, 0.054975871827660949, 0.11169079483900581, 0.054975871827660949}; diff --git a/operators/k_mass/avx/P2ElementwiseKMassAnnulusMap_computeInverseDiagonalOperatorValues_macro_2D.cpp b/operators/k_mass/avx/P2ElementwiseKMassAnnulusMap_computeInverseDiagonalOperatorValues_P2ElementwiseKMassAnnulusMap_macro_2D.cpp similarity index 99% rename from operators/k_mass/avx/P2ElementwiseKMassAnnulusMap_computeInverseDiagonalOperatorValues_macro_2D.cpp rename to operators/k_mass/avx/P2ElementwiseKMassAnnulusMap_computeInverseDiagonalOperatorValues_P2ElementwiseKMassAnnulusMap_macro_2D.cpp index 6e61a798..433b3949 100644 --- a/operators/k_mass/avx/P2ElementwiseKMassAnnulusMap_computeInverseDiagonalOperatorValues_macro_2D.cpp +++ b/operators/k_mass/avx/P2ElementwiseKMassAnnulusMap_computeInverseDiagonalOperatorValues_P2ElementwiseKMassAnnulusMap_macro_2D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseKMassAnnulusMap::computeInverseDiagonalOperatorValues_macro_2D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_kEdge, real_t * RESTRICT _data_kVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const +void P2ElementwiseKMassAnnulusMap::computeInverseDiagonalOperatorValues_P2ElementwiseKMassAnnulusMap_macro_2D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_kEdge, real_t * RESTRICT _data_kVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const { { const real_t _data_q_w [] = {0.11169079483900581, 0.054975871827660949, 0.11169079483900581, 0.054975871827660949, 0.11169079483900581, 0.054975871827660949}; diff --git a/operators/k_mass/avx/P2ElementwiseKMassIcosahedralShellMap_apply_macro_3D.cpp b/operators/k_mass/avx/P2ElementwiseKMassIcosahedralShellMap_apply_P2ElementwiseKMassIcosahedralShellMap_macro_3D.cpp similarity index 99% rename from operators/k_mass/avx/P2ElementwiseKMassIcosahedralShellMap_apply_macro_3D.cpp rename to operators/k_mass/avx/P2ElementwiseKMassIcosahedralShellMap_apply_P2ElementwiseKMassIcosahedralShellMap_macro_3D.cpp index 0603eaa3..0e20b6d5 100644 --- a/operators/k_mass/avx/P2ElementwiseKMassIcosahedralShellMap_apply_macro_3D.cpp +++ b/operators/k_mass/avx/P2ElementwiseKMassIcosahedralShellMap_apply_P2ElementwiseKMassIcosahedralShellMap_macro_3D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseKMassIcosahedralShellMap::apply_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_kEdge, real_t * RESTRICT _data_kVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const +void P2ElementwiseKMassIcosahedralShellMap::apply_P2ElementwiseKMassIcosahedralShellMap_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_kEdge, real_t * RESTRICT _data_kVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const { { const real_t _data_q_w [] = {0.02364442875776411, 0.0041220137385117907, 0.025486503738822306, 0.0091363578885896128, 0.0052827872295400091, 0.02526054964096567, 0.01949470769240411, 0.0094909452962763929, 0.019522077110586403, 0.018549261128954084, 0.0066770344442521412}; diff --git a/operators/k_mass/avx/P2ElementwiseKMassIcosahedralShellMap_computeInverseDiagonalOperatorValues_macro_3D.cpp b/operators/k_mass/avx/P2ElementwiseKMassIcosahedralShellMap_computeInverseDiagonalOperatorValues_P2ElementwiseKMassIcosahedralShellMap_macro_3D.cpp similarity index 99% rename from operators/k_mass/avx/P2ElementwiseKMassIcosahedralShellMap_computeInverseDiagonalOperatorValues_macro_3D.cpp rename to operators/k_mass/avx/P2ElementwiseKMassIcosahedralShellMap_computeInverseDiagonalOperatorValues_P2ElementwiseKMassIcosahedralShellMap_macro_3D.cpp index 0d225d70..49cfdad7 100644 --- a/operators/k_mass/avx/P2ElementwiseKMassIcosahedralShellMap_computeInverseDiagonalOperatorValues_macro_3D.cpp +++ b/operators/k_mass/avx/P2ElementwiseKMassIcosahedralShellMap_computeInverseDiagonalOperatorValues_P2ElementwiseKMassIcosahedralShellMap_macro_3D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseKMassIcosahedralShellMap::computeInverseDiagonalOperatorValues_macro_3D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_kEdge, real_t * RESTRICT _data_kVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const +void P2ElementwiseKMassIcosahedralShellMap::computeInverseDiagonalOperatorValues_P2ElementwiseKMassIcosahedralShellMap_macro_3D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_kEdge, real_t * RESTRICT _data_kVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const { { const real_t _data_q_w [] = {0.02364442875776411, 0.0041220137385117907, 0.025486503738822306, 0.0091363578885896128, 0.0052827872295400091, 0.02526054964096567, 0.01949470769240411, 0.0094909452962763929, 0.019522077110586403, 0.018549261128954084, 0.0066770344442521412}; diff --git a/operators/k_mass/avx/P2ElementwiseKMass_apply_macro_2D.cpp b/operators/k_mass/avx/P2ElementwiseKMass_apply_P2ElementwiseKMass_macro_2D.cpp similarity index 99% rename from operators/k_mass/avx/P2ElementwiseKMass_apply_macro_2D.cpp rename to operators/k_mass/avx/P2ElementwiseKMass_apply_P2ElementwiseKMass_macro_2D.cpp index 0895583a..798e2085 100644 --- a/operators/k_mass/avx/P2ElementwiseKMass_apply_macro_2D.cpp +++ b/operators/k_mass/avx/P2ElementwiseKMass_apply_P2ElementwiseKMass_macro_2D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseKMass::apply_macro_2D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_kEdge, real_t * RESTRICT _data_kVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseKMass::apply_P2ElementwiseKMass_macro_2D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_kEdge, real_t * RESTRICT _data_kVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.11169079483900581, 0.054975871827660949, 0.11169079483900581, 0.054975871827660949, 0.11169079483900581, 0.054975871827660949}; diff --git a/operators/k_mass/avx/P2ElementwiseKMass_apply_macro_3D.cpp b/operators/k_mass/avx/P2ElementwiseKMass_apply_P2ElementwiseKMass_macro_3D.cpp similarity index 99% rename from operators/k_mass/avx/P2ElementwiseKMass_apply_macro_3D.cpp rename to operators/k_mass/avx/P2ElementwiseKMass_apply_P2ElementwiseKMass_macro_3D.cpp index 77716215..a8bd6e36 100644 --- a/operators/k_mass/avx/P2ElementwiseKMass_apply_macro_3D.cpp +++ b/operators/k_mass/avx/P2ElementwiseKMass_apply_P2ElementwiseKMass_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseKMass::apply_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_kEdge, real_t * RESTRICT _data_kVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseKMass::apply_P2ElementwiseKMass_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_kEdge, real_t * RESTRICT _data_kVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.02364442875776411, 0.0041220137385117907, 0.025486503738822306, 0.0091363578885896128, 0.0052827872295400091, 0.02526054964096567, 0.01949470769240411, 0.0094909452962763929, 0.019522077110586403, 0.018549261128954084, 0.0066770344442521412}; diff --git a/operators/k_mass/avx/P2ElementwiseKMass_computeInverseDiagonalOperatorValues_macro_2D.cpp b/operators/k_mass/avx/P2ElementwiseKMass_computeInverseDiagonalOperatorValues_P2ElementwiseKMass_macro_2D.cpp similarity index 99% rename from operators/k_mass/avx/P2ElementwiseKMass_computeInverseDiagonalOperatorValues_macro_2D.cpp rename to operators/k_mass/avx/P2ElementwiseKMass_computeInverseDiagonalOperatorValues_P2ElementwiseKMass_macro_2D.cpp index f6367d97..3a559920 100644 --- a/operators/k_mass/avx/P2ElementwiseKMass_computeInverseDiagonalOperatorValues_macro_2D.cpp +++ b/operators/k_mass/avx/P2ElementwiseKMass_computeInverseDiagonalOperatorValues_P2ElementwiseKMass_macro_2D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseKMass::computeInverseDiagonalOperatorValues_macro_2D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_kEdge, real_t * RESTRICT _data_kVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseKMass::computeInverseDiagonalOperatorValues_P2ElementwiseKMass_macro_2D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_kEdge, real_t * RESTRICT _data_kVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.11169079483900581, 0.054975871827660949, 0.11169079483900581, 0.054975871827660949, 0.11169079483900581, 0.054975871827660949}; diff --git a/operators/k_mass/avx/P2ElementwiseKMass_computeInverseDiagonalOperatorValues_macro_3D.cpp b/operators/k_mass/avx/P2ElementwiseKMass_computeInverseDiagonalOperatorValues_P2ElementwiseKMass_macro_3D.cpp similarity index 99% rename from operators/k_mass/avx/P2ElementwiseKMass_computeInverseDiagonalOperatorValues_macro_3D.cpp rename to operators/k_mass/avx/P2ElementwiseKMass_computeInverseDiagonalOperatorValues_P2ElementwiseKMass_macro_3D.cpp index b32f4fb2..a64442b1 100644 --- a/operators/k_mass/avx/P2ElementwiseKMass_computeInverseDiagonalOperatorValues_macro_3D.cpp +++ b/operators/k_mass/avx/P2ElementwiseKMass_computeInverseDiagonalOperatorValues_P2ElementwiseKMass_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseKMass::computeInverseDiagonalOperatorValues_macro_3D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_kEdge, real_t * RESTRICT _data_kVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseKMass::computeInverseDiagonalOperatorValues_P2ElementwiseKMass_macro_3D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_kEdge, real_t * RESTRICT _data_kVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.02364442875776411, 0.0041220137385117907, 0.025486503738822306, 0.0091363578885896128, 0.0052827872295400091, 0.02526054964096567, 0.01949470769240411, 0.0094909452962763929, 0.019522077110586403, 0.018549261128954084, 0.0066770344442521412}; diff --git a/operators/k_mass/avx/P2ToP1ElementwiseKMassAnnulusMap_apply_macro_2D.cpp b/operators/k_mass/avx/P2ToP1ElementwiseKMassAnnulusMap_apply_P2ToP1ElementwiseKMassAnnulusMap_macro_2D.cpp similarity index 99% rename from operators/k_mass/avx/P2ToP1ElementwiseKMassAnnulusMap_apply_macro_2D.cpp rename to operators/k_mass/avx/P2ToP1ElementwiseKMassAnnulusMap_apply_P2ToP1ElementwiseKMassAnnulusMap_macro_2D.cpp index 5d10e3c8..04ffa667 100644 --- a/operators/k_mass/avx/P2ToP1ElementwiseKMassAnnulusMap_apply_macro_2D.cpp +++ b/operators/k_mass/avx/P2ToP1ElementwiseKMassAnnulusMap_apply_P2ToP1ElementwiseKMassAnnulusMap_macro_2D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ToP1ElementwiseKMassAnnulusMap::apply_macro_2D( real_t * RESTRICT _data_dst, real_t * RESTRICT _data_kEdge, real_t * RESTRICT _data_kVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const +void P2ToP1ElementwiseKMassAnnulusMap::apply_P2ToP1ElementwiseKMassAnnulusMap_macro_2D( real_t * RESTRICT _data_dst, real_t * RESTRICT _data_kEdge, real_t * RESTRICT _data_kVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const { { const real_t _data_q_w [] = {0.11169079483900581, 0.054975871827660949, 0.11169079483900581, 0.054975871827660949, 0.11169079483900581, 0.054975871827660949}; diff --git a/operators/k_mass/avx/P2ToP1ElementwiseKMassIcosahedralShellMap_apply_macro_3D.cpp b/operators/k_mass/avx/P2ToP1ElementwiseKMassIcosahedralShellMap_apply_P2ToP1ElementwiseKMassIcosahedralShellMap_macro_3D.cpp similarity index 99% rename from operators/k_mass/avx/P2ToP1ElementwiseKMassIcosahedralShellMap_apply_macro_3D.cpp rename to operators/k_mass/avx/P2ToP1ElementwiseKMassIcosahedralShellMap_apply_P2ToP1ElementwiseKMassIcosahedralShellMap_macro_3D.cpp index 207aa641..3058fce8 100644 --- a/operators/k_mass/avx/P2ToP1ElementwiseKMassIcosahedralShellMap_apply_macro_3D.cpp +++ b/operators/k_mass/avx/P2ToP1ElementwiseKMassIcosahedralShellMap_apply_P2ToP1ElementwiseKMassIcosahedralShellMap_macro_3D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ToP1ElementwiseKMassIcosahedralShellMap::apply_macro_3D( real_t * RESTRICT _data_dst, real_t * RESTRICT _data_kEdge, real_t * RESTRICT _data_kVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const +void P2ToP1ElementwiseKMassIcosahedralShellMap::apply_P2ToP1ElementwiseKMassIcosahedralShellMap_macro_3D( real_t * RESTRICT _data_dst, real_t * RESTRICT _data_kEdge, real_t * RESTRICT _data_kVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const { { const real_t _data_q_w [] = {0.02364442875776411, 0.0041220137385117907, 0.025486503738822306, 0.0091363578885896128, 0.0052827872295400091, 0.02526054964096567, 0.01949470769240411, 0.0094909452962763929, 0.019522077110586403, 0.018549261128954084, 0.0066770344442521412}; diff --git a/operators/k_mass/avx/P2ToP1ElementwiseKMass_apply_macro_2D.cpp b/operators/k_mass/avx/P2ToP1ElementwiseKMass_apply_P2ToP1ElementwiseKMass_macro_2D.cpp similarity index 99% rename from operators/k_mass/avx/P2ToP1ElementwiseKMass_apply_macro_2D.cpp rename to operators/k_mass/avx/P2ToP1ElementwiseKMass_apply_P2ToP1ElementwiseKMass_macro_2D.cpp index 6e3c69b5..9a4caa03 100644 --- a/operators/k_mass/avx/P2ToP1ElementwiseKMass_apply_macro_2D.cpp +++ b/operators/k_mass/avx/P2ToP1ElementwiseKMass_apply_P2ToP1ElementwiseKMass_macro_2D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ToP1ElementwiseKMass::apply_macro_2D( real_t * RESTRICT _data_dst, real_t * RESTRICT _data_kEdge, real_t * RESTRICT _data_kVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ToP1ElementwiseKMass::apply_P2ToP1ElementwiseKMass_macro_2D( real_t * RESTRICT _data_dst, real_t * RESTRICT _data_kEdge, real_t * RESTRICT _data_kVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.11169079483900581, 0.054975871827660949, 0.11169079483900581, 0.054975871827660949, 0.11169079483900581, 0.054975871827660949}; diff --git a/operators/k_mass/avx/P2ToP1ElementwiseKMass_apply_macro_3D.cpp b/operators/k_mass/avx/P2ToP1ElementwiseKMass_apply_P2ToP1ElementwiseKMass_macro_3D.cpp similarity index 99% rename from operators/k_mass/avx/P2ToP1ElementwiseKMass_apply_macro_3D.cpp rename to operators/k_mass/avx/P2ToP1ElementwiseKMass_apply_P2ToP1ElementwiseKMass_macro_3D.cpp index 05f61d82..7f4b5024 100644 --- a/operators/k_mass/avx/P2ToP1ElementwiseKMass_apply_macro_3D.cpp +++ b/operators/k_mass/avx/P2ToP1ElementwiseKMass_apply_P2ToP1ElementwiseKMass_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ToP1ElementwiseKMass::apply_macro_3D( real_t * RESTRICT _data_dst, real_t * RESTRICT _data_kEdge, real_t * RESTRICT _data_kVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ToP1ElementwiseKMass::apply_P2ToP1ElementwiseKMass_macro_3D( real_t * RESTRICT _data_dst, real_t * RESTRICT _data_kEdge, real_t * RESTRICT _data_kVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.02364442875776411, 0.0041220137385117907, 0.025486503738822306, 0.0091363578885896128, 0.0052827872295400091, 0.02526054964096567, 0.01949470769240411, 0.0094909452962763929, 0.019522077110586403, 0.018549261128954084, 0.0066770344442521412}; diff --git a/operators/k_mass/noarch/P1ElementwiseKMassAnnulusMap_apply_macro_2D.cpp b/operators/k_mass/noarch/P1ElementwiseKMassAnnulusMap_apply_P1ElementwiseKMassAnnulusMap_macro_2D.cpp similarity index 98% rename from operators/k_mass/noarch/P1ElementwiseKMassAnnulusMap_apply_macro_2D.cpp rename to operators/k_mass/noarch/P1ElementwiseKMassAnnulusMap_apply_P1ElementwiseKMassAnnulusMap_macro_2D.cpp index 1dfd53fc..71e9d2d2 100644 --- a/operators/k_mass/noarch/P1ElementwiseKMassAnnulusMap_apply_macro_2D.cpp +++ b/operators/k_mass/noarch/P1ElementwiseKMassAnnulusMap_apply_P1ElementwiseKMassAnnulusMap_macro_2D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P1ElementwiseKMassAnnulusMap::apply_macro_2D( real_t * RESTRICT _data_dst, real_t * RESTRICT _data_k, real_t * RESTRICT _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const +void P1ElementwiseKMassAnnulusMap::apply_P1ElementwiseKMassAnnulusMap_macro_2D( real_t * RESTRICT _data_dst, real_t * RESTRICT _data_k, real_t * RESTRICT _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const { { const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669}; diff --git a/operators/k_mass/noarch/P1ElementwiseKMassAnnulusMap_computeInverseDiagonalOperatorValues_macro_2D.cpp b/operators/k_mass/noarch/P1ElementwiseKMassAnnulusMap_computeInverseDiagonalOperatorValues_P1ElementwiseKMassAnnulusMap_macro_2D.cpp similarity index 98% rename from operators/k_mass/noarch/P1ElementwiseKMassAnnulusMap_computeInverseDiagonalOperatorValues_macro_2D.cpp rename to operators/k_mass/noarch/P1ElementwiseKMassAnnulusMap_computeInverseDiagonalOperatorValues_P1ElementwiseKMassAnnulusMap_macro_2D.cpp index 807eedc8..d4cc3e55 100644 --- a/operators/k_mass/noarch/P1ElementwiseKMassAnnulusMap_computeInverseDiagonalOperatorValues_macro_2D.cpp +++ b/operators/k_mass/noarch/P1ElementwiseKMassAnnulusMap_computeInverseDiagonalOperatorValues_P1ElementwiseKMassAnnulusMap_macro_2D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P1ElementwiseKMassAnnulusMap::computeInverseDiagonalOperatorValues_macro_2D( real_t * RESTRICT _data_invDiag_, real_t * RESTRICT _data_k, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const +void P1ElementwiseKMassAnnulusMap::computeInverseDiagonalOperatorValues_P1ElementwiseKMassAnnulusMap_macro_2D( real_t * RESTRICT _data_invDiag_, real_t * RESTRICT _data_k, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const { { const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669}; diff --git a/operators/k_mass/noarch/P1ElementwiseKMassAnnulusMap_toMatrix_macro_2D.cpp b/operators/k_mass/noarch/P1ElementwiseKMassAnnulusMap_toMatrix_P1ElementwiseKMassAnnulusMap_macro_2D.cpp similarity index 98% rename from operators/k_mass/noarch/P1ElementwiseKMassAnnulusMap_toMatrix_macro_2D.cpp rename to operators/k_mass/noarch/P1ElementwiseKMassAnnulusMap_toMatrix_P1ElementwiseKMassAnnulusMap_macro_2D.cpp index e016a2bb..ae546804 100644 --- a/operators/k_mass/noarch/P1ElementwiseKMassAnnulusMap_toMatrix_macro_2D.cpp +++ b/operators/k_mass/noarch/P1ElementwiseKMassAnnulusMap_toMatrix_P1ElementwiseKMassAnnulusMap_macro_2D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P1ElementwiseKMassAnnulusMap::toMatrix_macro_2D( idx_t * RESTRICT _data_dst, real_t * RESTRICT _data_k, idx_t * RESTRICT _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const +void P1ElementwiseKMassAnnulusMap::toMatrix_P1ElementwiseKMassAnnulusMap_macro_2D( idx_t * RESTRICT _data_dst, real_t * RESTRICT _data_k, idx_t * RESTRICT _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const { { const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669}; diff --git a/operators/k_mass/noarch/P1ElementwiseKMassIcosahedralShellMap_apply_macro_3D.cpp b/operators/k_mass/noarch/P1ElementwiseKMassIcosahedralShellMap_apply_P1ElementwiseKMassIcosahedralShellMap_macro_3D.cpp similarity index 99% rename from operators/k_mass/noarch/P1ElementwiseKMassIcosahedralShellMap_apply_macro_3D.cpp rename to operators/k_mass/noarch/P1ElementwiseKMassIcosahedralShellMap_apply_P1ElementwiseKMassIcosahedralShellMap_macro_3D.cpp index ad55c60e..47749c53 100644 --- a/operators/k_mass/noarch/P1ElementwiseKMassIcosahedralShellMap_apply_macro_3D.cpp +++ b/operators/k_mass/noarch/P1ElementwiseKMassIcosahedralShellMap_apply_P1ElementwiseKMassIcosahedralShellMap_macro_3D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P1ElementwiseKMassIcosahedralShellMap::apply_macro_3D( real_t * RESTRICT _data_dst, real_t * RESTRICT _data_k, real_t * RESTRICT _data_src, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const +void P1ElementwiseKMassIcosahedralShellMap::apply_P1ElementwiseKMassIcosahedralShellMap_macro_3D( real_t * RESTRICT _data_dst, real_t * RESTRICT _data_k, real_t * RESTRICT _data_src, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const { { const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983}; diff --git a/operators/k_mass/noarch/P1ElementwiseKMassIcosahedralShellMap_computeInverseDiagonalOperatorValues_macro_3D.cpp b/operators/k_mass/noarch/P1ElementwiseKMassIcosahedralShellMap_computeInverseDiagonalOperatorValues_P1ElementwiseKMassIcosahedralShellMap_macro_3D.cpp similarity index 99% rename from operators/k_mass/noarch/P1ElementwiseKMassIcosahedralShellMap_computeInverseDiagonalOperatorValues_macro_3D.cpp rename to operators/k_mass/noarch/P1ElementwiseKMassIcosahedralShellMap_computeInverseDiagonalOperatorValues_P1ElementwiseKMassIcosahedralShellMap_macro_3D.cpp index e0db2013..823f67ed 100644 --- a/operators/k_mass/noarch/P1ElementwiseKMassIcosahedralShellMap_computeInverseDiagonalOperatorValues_macro_3D.cpp +++ b/operators/k_mass/noarch/P1ElementwiseKMassIcosahedralShellMap_computeInverseDiagonalOperatorValues_P1ElementwiseKMassIcosahedralShellMap_macro_3D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P1ElementwiseKMassIcosahedralShellMap::computeInverseDiagonalOperatorValues_macro_3D( real_t * RESTRICT _data_invDiag_, real_t * RESTRICT _data_k, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const +void P1ElementwiseKMassIcosahedralShellMap::computeInverseDiagonalOperatorValues_P1ElementwiseKMassIcosahedralShellMap_macro_3D( real_t * RESTRICT _data_invDiag_, real_t * RESTRICT _data_k, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const { { const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983}; diff --git a/operators/k_mass/noarch/P1ElementwiseKMassIcosahedralShellMap_toMatrix_macro_3D.cpp b/operators/k_mass/noarch/P1ElementwiseKMassIcosahedralShellMap_toMatrix_P1ElementwiseKMassIcosahedralShellMap_macro_3D.cpp similarity index 99% rename from operators/k_mass/noarch/P1ElementwiseKMassIcosahedralShellMap_toMatrix_macro_3D.cpp rename to operators/k_mass/noarch/P1ElementwiseKMassIcosahedralShellMap_toMatrix_P1ElementwiseKMassIcosahedralShellMap_macro_3D.cpp index be0908de..9120fc34 100644 --- a/operators/k_mass/noarch/P1ElementwiseKMassIcosahedralShellMap_toMatrix_macro_3D.cpp +++ b/operators/k_mass/noarch/P1ElementwiseKMassIcosahedralShellMap_toMatrix_P1ElementwiseKMassIcosahedralShellMap_macro_3D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P1ElementwiseKMassIcosahedralShellMap::toMatrix_macro_3D( idx_t * RESTRICT _data_dst, real_t * RESTRICT _data_k, idx_t * RESTRICT _data_src, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const +void P1ElementwiseKMassIcosahedralShellMap::toMatrix_P1ElementwiseKMassIcosahedralShellMap_macro_3D( idx_t * RESTRICT _data_dst, real_t * RESTRICT _data_k, idx_t * RESTRICT _data_src, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const { { const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983}; diff --git a/operators/k_mass/noarch/P1ElementwiseKMass_apply_macro_2D.cpp b/operators/k_mass/noarch/P1ElementwiseKMass_apply_P1ElementwiseKMass_macro_2D.cpp similarity index 97% rename from operators/k_mass/noarch/P1ElementwiseKMass_apply_macro_2D.cpp rename to operators/k_mass/noarch/P1ElementwiseKMass_apply_P1ElementwiseKMass_macro_2D.cpp index f07d06ea..1686847a 100644 --- a/operators/k_mass/noarch/P1ElementwiseKMass_apply_macro_2D.cpp +++ b/operators/k_mass/noarch/P1ElementwiseKMass_apply_P1ElementwiseKMass_macro_2D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P1ElementwiseKMass::apply_macro_2D( real_t * RESTRICT _data_dst, real_t * RESTRICT _data_k, real_t * RESTRICT _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P1ElementwiseKMass::apply_P1ElementwiseKMass_macro_2D( real_t * RESTRICT _data_dst, real_t * RESTRICT _data_k, real_t * RESTRICT _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669}; diff --git a/operators/k_mass/noarch/P1ElementwiseKMass_apply_macro_3D.cpp b/operators/k_mass/noarch/P1ElementwiseKMass_apply_P1ElementwiseKMass_macro_3D.cpp similarity index 99% rename from operators/k_mass/noarch/P1ElementwiseKMass_apply_macro_3D.cpp rename to operators/k_mass/noarch/P1ElementwiseKMass_apply_P1ElementwiseKMass_macro_3D.cpp index 0f03bea4..f5ad2b07 100644 --- a/operators/k_mass/noarch/P1ElementwiseKMass_apply_macro_3D.cpp +++ b/operators/k_mass/noarch/P1ElementwiseKMass_apply_P1ElementwiseKMass_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P1ElementwiseKMass::apply_macro_3D( real_t * RESTRICT _data_dst, real_t * RESTRICT _data_k, real_t * RESTRICT _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P1ElementwiseKMass::apply_P1ElementwiseKMass_macro_3D( real_t * RESTRICT _data_dst, real_t * RESTRICT _data_k, real_t * RESTRICT _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983}; diff --git a/operators/k_mass/noarch/P1ElementwiseKMass_computeInverseDiagonalOperatorValues_macro_2D.cpp b/operators/k_mass/noarch/P1ElementwiseKMass_computeInverseDiagonalOperatorValues_P1ElementwiseKMass_macro_2D.cpp similarity index 97% rename from operators/k_mass/noarch/P1ElementwiseKMass_computeInverseDiagonalOperatorValues_macro_2D.cpp rename to operators/k_mass/noarch/P1ElementwiseKMass_computeInverseDiagonalOperatorValues_P1ElementwiseKMass_macro_2D.cpp index afe18e1f..1d9ff5ca 100644 --- a/operators/k_mass/noarch/P1ElementwiseKMass_computeInverseDiagonalOperatorValues_macro_2D.cpp +++ b/operators/k_mass/noarch/P1ElementwiseKMass_computeInverseDiagonalOperatorValues_P1ElementwiseKMass_macro_2D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P1ElementwiseKMass::computeInverseDiagonalOperatorValues_macro_2D( real_t * RESTRICT _data_invDiag_, real_t * RESTRICT _data_k, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P1ElementwiseKMass::computeInverseDiagonalOperatorValues_P1ElementwiseKMass_macro_2D( real_t * RESTRICT _data_invDiag_, real_t * RESTRICT _data_k, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669}; diff --git a/operators/k_mass/noarch/P1ElementwiseKMass_computeInverseDiagonalOperatorValues_macro_3D.cpp b/operators/k_mass/noarch/P1ElementwiseKMass_computeInverseDiagonalOperatorValues_P1ElementwiseKMass_macro_3D.cpp similarity index 99% rename from operators/k_mass/noarch/P1ElementwiseKMass_computeInverseDiagonalOperatorValues_macro_3D.cpp rename to operators/k_mass/noarch/P1ElementwiseKMass_computeInverseDiagonalOperatorValues_P1ElementwiseKMass_macro_3D.cpp index 8ca88be9..8cabb2ec 100644 --- a/operators/k_mass/noarch/P1ElementwiseKMass_computeInverseDiagonalOperatorValues_macro_3D.cpp +++ b/operators/k_mass/noarch/P1ElementwiseKMass_computeInverseDiagonalOperatorValues_P1ElementwiseKMass_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P1ElementwiseKMass::computeInverseDiagonalOperatorValues_macro_3D( real_t * RESTRICT _data_invDiag_, real_t * RESTRICT _data_k, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P1ElementwiseKMass::computeInverseDiagonalOperatorValues_P1ElementwiseKMass_macro_3D( real_t * RESTRICT _data_invDiag_, real_t * RESTRICT _data_k, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983}; diff --git a/operators/k_mass/noarch/P1ElementwiseKMass_toMatrix_macro_2D.cpp b/operators/k_mass/noarch/P1ElementwiseKMass_toMatrix_P1ElementwiseKMass_macro_2D.cpp similarity index 97% rename from operators/k_mass/noarch/P1ElementwiseKMass_toMatrix_macro_2D.cpp rename to operators/k_mass/noarch/P1ElementwiseKMass_toMatrix_P1ElementwiseKMass_macro_2D.cpp index 8085859a..6f7dab01 100644 --- a/operators/k_mass/noarch/P1ElementwiseKMass_toMatrix_macro_2D.cpp +++ b/operators/k_mass/noarch/P1ElementwiseKMass_toMatrix_P1ElementwiseKMass_macro_2D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P1ElementwiseKMass::toMatrix_macro_2D( idx_t * RESTRICT _data_dst, real_t * RESTRICT _data_k, idx_t * RESTRICT _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P1ElementwiseKMass::toMatrix_P1ElementwiseKMass_macro_2D( idx_t * RESTRICT _data_dst, real_t * RESTRICT _data_k, idx_t * RESTRICT _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669}; diff --git a/operators/k_mass/noarch/P1ElementwiseKMass_toMatrix_macro_3D.cpp b/operators/k_mass/noarch/P1ElementwiseKMass_toMatrix_P1ElementwiseKMass_macro_3D.cpp similarity index 99% rename from operators/k_mass/noarch/P1ElementwiseKMass_toMatrix_macro_3D.cpp rename to operators/k_mass/noarch/P1ElementwiseKMass_toMatrix_P1ElementwiseKMass_macro_3D.cpp index 8d2fc53d..990646a8 100644 --- a/operators/k_mass/noarch/P1ElementwiseKMass_toMatrix_macro_3D.cpp +++ b/operators/k_mass/noarch/P1ElementwiseKMass_toMatrix_P1ElementwiseKMass_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P1ElementwiseKMass::toMatrix_macro_3D( idx_t * RESTRICT _data_dst, real_t * RESTRICT _data_k, idx_t * RESTRICT _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P1ElementwiseKMass::toMatrix_P1ElementwiseKMass_macro_3D( idx_t * RESTRICT _data_dst, real_t * RESTRICT _data_k, idx_t * RESTRICT _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983}; diff --git a/operators/k_mass/noarch/P2ElementwiseKMassAnnulusMap_apply_macro_2D.cpp b/operators/k_mass/noarch/P2ElementwiseKMassAnnulusMap_apply_P2ElementwiseKMassAnnulusMap_macro_2D.cpp similarity index 98% rename from operators/k_mass/noarch/P2ElementwiseKMassAnnulusMap_apply_macro_2D.cpp rename to operators/k_mass/noarch/P2ElementwiseKMassAnnulusMap_apply_P2ElementwiseKMassAnnulusMap_macro_2D.cpp index 83306a21..b33f4e31 100644 --- a/operators/k_mass/noarch/P2ElementwiseKMassAnnulusMap_apply_macro_2D.cpp +++ b/operators/k_mass/noarch/P2ElementwiseKMassAnnulusMap_apply_P2ElementwiseKMassAnnulusMap_macro_2D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseKMassAnnulusMap::apply_macro_2D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_kEdge, real_t * RESTRICT _data_kVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const +void P2ElementwiseKMassAnnulusMap::apply_P2ElementwiseKMassAnnulusMap_macro_2D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_kEdge, real_t * RESTRICT _data_kVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const { { const real_t _data_q_w [] = {0.11169079483900581, 0.054975871827660949, 0.11169079483900581, 0.054975871827660949, 0.11169079483900581, 0.054975871827660949}; diff --git a/operators/k_mass/noarch/P2ElementwiseKMassAnnulusMap_computeInverseDiagonalOperatorValues_macro_2D.cpp b/operators/k_mass/noarch/P2ElementwiseKMassAnnulusMap_computeInverseDiagonalOperatorValues_P2ElementwiseKMassAnnulusMap_macro_2D.cpp similarity index 98% rename from operators/k_mass/noarch/P2ElementwiseKMassAnnulusMap_computeInverseDiagonalOperatorValues_macro_2D.cpp rename to operators/k_mass/noarch/P2ElementwiseKMassAnnulusMap_computeInverseDiagonalOperatorValues_P2ElementwiseKMassAnnulusMap_macro_2D.cpp index c1c5c8a0..5aab5da2 100644 --- a/operators/k_mass/noarch/P2ElementwiseKMassAnnulusMap_computeInverseDiagonalOperatorValues_macro_2D.cpp +++ b/operators/k_mass/noarch/P2ElementwiseKMassAnnulusMap_computeInverseDiagonalOperatorValues_P2ElementwiseKMassAnnulusMap_macro_2D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseKMassAnnulusMap::computeInverseDiagonalOperatorValues_macro_2D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_kEdge, real_t * RESTRICT _data_kVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const +void P2ElementwiseKMassAnnulusMap::computeInverseDiagonalOperatorValues_P2ElementwiseKMassAnnulusMap_macro_2D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_kEdge, real_t * RESTRICT _data_kVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const { { const real_t _data_q_w [] = {0.11169079483900581, 0.054975871827660949, 0.11169079483900581, 0.054975871827660949, 0.11169079483900581, 0.054975871827660949}; diff --git a/operators/k_mass/noarch/P2ElementwiseKMassAnnulusMap_toMatrix_macro_2D.cpp b/operators/k_mass/noarch/P2ElementwiseKMassAnnulusMap_toMatrix_P2ElementwiseKMassAnnulusMap_macro_2D.cpp similarity index 98% rename from operators/k_mass/noarch/P2ElementwiseKMassAnnulusMap_toMatrix_macro_2D.cpp rename to operators/k_mass/noarch/P2ElementwiseKMassAnnulusMap_toMatrix_P2ElementwiseKMassAnnulusMap_macro_2D.cpp index 88fd0df5..bf38bd10 100644 --- a/operators/k_mass/noarch/P2ElementwiseKMassAnnulusMap_toMatrix_macro_2D.cpp +++ b/operators/k_mass/noarch/P2ElementwiseKMassAnnulusMap_toMatrix_P2ElementwiseKMassAnnulusMap_macro_2D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseKMassAnnulusMap::toMatrix_macro_2D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_kEdge, real_t * RESTRICT _data_kVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const +void P2ElementwiseKMassAnnulusMap::toMatrix_P2ElementwiseKMassAnnulusMap_macro_2D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_kEdge, real_t * RESTRICT _data_kVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const { { const real_t _data_q_w [] = {0.11169079483900581, 0.054975871827660949, 0.11169079483900581, 0.054975871827660949, 0.11169079483900581, 0.054975871827660949}; diff --git a/operators/k_mass/noarch/P2ElementwiseKMassIcosahedralShellMap_apply_macro_3D.cpp b/operators/k_mass/noarch/P2ElementwiseKMassIcosahedralShellMap_apply_P2ElementwiseKMassIcosahedralShellMap_macro_3D.cpp similarity index 99% rename from operators/k_mass/noarch/P2ElementwiseKMassIcosahedralShellMap_apply_macro_3D.cpp rename to operators/k_mass/noarch/P2ElementwiseKMassIcosahedralShellMap_apply_P2ElementwiseKMassIcosahedralShellMap_macro_3D.cpp index 30a275c0..71beb348 100644 --- a/operators/k_mass/noarch/P2ElementwiseKMassIcosahedralShellMap_apply_macro_3D.cpp +++ b/operators/k_mass/noarch/P2ElementwiseKMassIcosahedralShellMap_apply_P2ElementwiseKMassIcosahedralShellMap_macro_3D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseKMassIcosahedralShellMap::apply_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_kEdge, real_t * RESTRICT _data_kVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const +void P2ElementwiseKMassIcosahedralShellMap::apply_P2ElementwiseKMassIcosahedralShellMap_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_kEdge, real_t * RESTRICT _data_kVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const { { const real_t _data_q_w [] = {0.02364442875776411, 0.0041220137385117907, 0.025486503738822306, 0.0091363578885896128, 0.0052827872295400091, 0.02526054964096567, 0.01949470769240411, 0.0094909452962763929, 0.019522077110586403, 0.018549261128954084, 0.0066770344442521412}; diff --git a/operators/k_mass/noarch/P2ElementwiseKMassIcosahedralShellMap_computeInverseDiagonalOperatorValues_macro_3D.cpp b/operators/k_mass/noarch/P2ElementwiseKMassIcosahedralShellMap_computeInverseDiagonalOperatorValues_P2ElementwiseKMassIcosahedralShellMap_macro_3D.cpp similarity index 99% rename from operators/k_mass/noarch/P2ElementwiseKMassIcosahedralShellMap_computeInverseDiagonalOperatorValues_macro_3D.cpp rename to operators/k_mass/noarch/P2ElementwiseKMassIcosahedralShellMap_computeInverseDiagonalOperatorValues_P2ElementwiseKMassIcosahedralShellMap_macro_3D.cpp index 37bea1c6..cc866e9f 100644 --- a/operators/k_mass/noarch/P2ElementwiseKMassIcosahedralShellMap_computeInverseDiagonalOperatorValues_macro_3D.cpp +++ b/operators/k_mass/noarch/P2ElementwiseKMassIcosahedralShellMap_computeInverseDiagonalOperatorValues_P2ElementwiseKMassIcosahedralShellMap_macro_3D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseKMassIcosahedralShellMap::computeInverseDiagonalOperatorValues_macro_3D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_kEdge, real_t * RESTRICT _data_kVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const +void P2ElementwiseKMassIcosahedralShellMap::computeInverseDiagonalOperatorValues_P2ElementwiseKMassIcosahedralShellMap_macro_3D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_kEdge, real_t * RESTRICT _data_kVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const { { const real_t _data_q_w [] = {0.02364442875776411, 0.0041220137385117907, 0.025486503738822306, 0.0091363578885896128, 0.0052827872295400091, 0.02526054964096567, 0.01949470769240411, 0.0094909452962763929, 0.019522077110586403, 0.018549261128954084, 0.0066770344442521412}; diff --git a/operators/k_mass/noarch/P2ElementwiseKMassIcosahedralShellMap_toMatrix_macro_3D.cpp b/operators/k_mass/noarch/P2ElementwiseKMassIcosahedralShellMap_toMatrix_P2ElementwiseKMassIcosahedralShellMap_macro_3D.cpp similarity index 99% rename from operators/k_mass/noarch/P2ElementwiseKMassIcosahedralShellMap_toMatrix_macro_3D.cpp rename to operators/k_mass/noarch/P2ElementwiseKMassIcosahedralShellMap_toMatrix_P2ElementwiseKMassIcosahedralShellMap_macro_3D.cpp index 1e9383df..fe178fb2 100644 --- a/operators/k_mass/noarch/P2ElementwiseKMassIcosahedralShellMap_toMatrix_macro_3D.cpp +++ b/operators/k_mass/noarch/P2ElementwiseKMassIcosahedralShellMap_toMatrix_P2ElementwiseKMassIcosahedralShellMap_macro_3D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseKMassIcosahedralShellMap::toMatrix_macro_3D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_kEdge, real_t * RESTRICT _data_kVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const +void P2ElementwiseKMassIcosahedralShellMap::toMatrix_P2ElementwiseKMassIcosahedralShellMap_macro_3D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_kEdge, real_t * RESTRICT _data_kVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const { { const real_t _data_q_w [] = {0.02364442875776411, 0.0041220137385117907, 0.025486503738822306, 0.0091363578885896128, 0.0052827872295400091, 0.02526054964096567, 0.01949470769240411, 0.0094909452962763929, 0.019522077110586403, 0.018549261128954084, 0.0066770344442521412}; diff --git a/operators/k_mass/noarch/P2ElementwiseKMass_apply_macro_2D.cpp b/operators/k_mass/noarch/P2ElementwiseKMass_apply_P2ElementwiseKMass_macro_2D.cpp similarity index 98% rename from operators/k_mass/noarch/P2ElementwiseKMass_apply_macro_2D.cpp rename to operators/k_mass/noarch/P2ElementwiseKMass_apply_P2ElementwiseKMass_macro_2D.cpp index 09140703..65bfd3c7 100644 --- a/operators/k_mass/noarch/P2ElementwiseKMass_apply_macro_2D.cpp +++ b/operators/k_mass/noarch/P2ElementwiseKMass_apply_P2ElementwiseKMass_macro_2D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseKMass::apply_macro_2D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_kEdge, real_t * RESTRICT _data_kVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseKMass::apply_P2ElementwiseKMass_macro_2D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_kEdge, real_t * RESTRICT _data_kVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.11169079483900581, 0.054975871827660949, 0.11169079483900581, 0.054975871827660949, 0.11169079483900581, 0.054975871827660949}; diff --git a/operators/k_mass/noarch/P2ElementwiseKMass_apply_macro_3D.cpp b/operators/k_mass/noarch/P2ElementwiseKMass_apply_P2ElementwiseKMass_macro_3D.cpp similarity index 99% rename from operators/k_mass/noarch/P2ElementwiseKMass_apply_macro_3D.cpp rename to operators/k_mass/noarch/P2ElementwiseKMass_apply_P2ElementwiseKMass_macro_3D.cpp index 6304e35f..0192f7b9 100644 --- a/operators/k_mass/noarch/P2ElementwiseKMass_apply_macro_3D.cpp +++ b/operators/k_mass/noarch/P2ElementwiseKMass_apply_P2ElementwiseKMass_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseKMass::apply_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_kEdge, real_t * RESTRICT _data_kVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseKMass::apply_P2ElementwiseKMass_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_kEdge, real_t * RESTRICT _data_kVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.02364442875776411, 0.0041220137385117907, 0.025486503738822306, 0.0091363578885896128, 0.0052827872295400091, 0.02526054964096567, 0.01949470769240411, 0.0094909452962763929, 0.019522077110586403, 0.018549261128954084, 0.0066770344442521412}; diff --git a/operators/k_mass/noarch/P2ElementwiseKMass_computeInverseDiagonalOperatorValues_macro_2D.cpp b/operators/k_mass/noarch/P2ElementwiseKMass_computeInverseDiagonalOperatorValues_P2ElementwiseKMass_macro_2D.cpp similarity index 98% rename from operators/k_mass/noarch/P2ElementwiseKMass_computeInverseDiagonalOperatorValues_macro_2D.cpp rename to operators/k_mass/noarch/P2ElementwiseKMass_computeInverseDiagonalOperatorValues_P2ElementwiseKMass_macro_2D.cpp index 1d2a08e7..efc0a36f 100644 --- a/operators/k_mass/noarch/P2ElementwiseKMass_computeInverseDiagonalOperatorValues_macro_2D.cpp +++ b/operators/k_mass/noarch/P2ElementwiseKMass_computeInverseDiagonalOperatorValues_P2ElementwiseKMass_macro_2D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseKMass::computeInverseDiagonalOperatorValues_macro_2D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_kEdge, real_t * RESTRICT _data_kVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseKMass::computeInverseDiagonalOperatorValues_P2ElementwiseKMass_macro_2D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_kEdge, real_t * RESTRICT _data_kVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.11169079483900581, 0.054975871827660949, 0.11169079483900581, 0.054975871827660949, 0.11169079483900581, 0.054975871827660949}; diff --git a/operators/k_mass/noarch/P2ElementwiseKMass_computeInverseDiagonalOperatorValues_macro_3D.cpp b/operators/k_mass/noarch/P2ElementwiseKMass_computeInverseDiagonalOperatorValues_P2ElementwiseKMass_macro_3D.cpp similarity index 99% rename from operators/k_mass/noarch/P2ElementwiseKMass_computeInverseDiagonalOperatorValues_macro_3D.cpp rename to operators/k_mass/noarch/P2ElementwiseKMass_computeInverseDiagonalOperatorValues_P2ElementwiseKMass_macro_3D.cpp index 61a30778..cfc48f0c 100644 --- a/operators/k_mass/noarch/P2ElementwiseKMass_computeInverseDiagonalOperatorValues_macro_3D.cpp +++ b/operators/k_mass/noarch/P2ElementwiseKMass_computeInverseDiagonalOperatorValues_P2ElementwiseKMass_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseKMass::computeInverseDiagonalOperatorValues_macro_3D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_kEdge, real_t * RESTRICT _data_kVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseKMass::computeInverseDiagonalOperatorValues_P2ElementwiseKMass_macro_3D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_kEdge, real_t * RESTRICT _data_kVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.02364442875776411, 0.0041220137385117907, 0.025486503738822306, 0.0091363578885896128, 0.0052827872295400091, 0.02526054964096567, 0.01949470769240411, 0.0094909452962763929, 0.019522077110586403, 0.018549261128954084, 0.0066770344442521412}; diff --git a/operators/k_mass/noarch/P2ElementwiseKMass_toMatrix_macro_2D.cpp b/operators/k_mass/noarch/P2ElementwiseKMass_toMatrix_P2ElementwiseKMass_macro_2D.cpp similarity index 98% rename from operators/k_mass/noarch/P2ElementwiseKMass_toMatrix_macro_2D.cpp rename to operators/k_mass/noarch/P2ElementwiseKMass_toMatrix_P2ElementwiseKMass_macro_2D.cpp index 6036f21b..2fcab826 100644 --- a/operators/k_mass/noarch/P2ElementwiseKMass_toMatrix_macro_2D.cpp +++ b/operators/k_mass/noarch/P2ElementwiseKMass_toMatrix_P2ElementwiseKMass_macro_2D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseKMass::toMatrix_macro_2D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_kEdge, real_t * RESTRICT _data_kVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseKMass::toMatrix_P2ElementwiseKMass_macro_2D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_kEdge, real_t * RESTRICT _data_kVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.11169079483900581, 0.054975871827660949, 0.11169079483900581, 0.054975871827660949, 0.11169079483900581, 0.054975871827660949}; diff --git a/operators/k_mass/noarch/P2ElementwiseKMass_toMatrix_macro_3D.cpp b/operators/k_mass/noarch/P2ElementwiseKMass_toMatrix_P2ElementwiseKMass_macro_3D.cpp similarity index 99% rename from operators/k_mass/noarch/P2ElementwiseKMass_toMatrix_macro_3D.cpp rename to operators/k_mass/noarch/P2ElementwiseKMass_toMatrix_P2ElementwiseKMass_macro_3D.cpp index 6278372f..8e738245 100644 --- a/operators/k_mass/noarch/P2ElementwiseKMass_toMatrix_macro_3D.cpp +++ b/operators/k_mass/noarch/P2ElementwiseKMass_toMatrix_P2ElementwiseKMass_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseKMass::toMatrix_macro_3D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_kEdge, real_t * RESTRICT _data_kVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseKMass::toMatrix_P2ElementwiseKMass_macro_3D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_kEdge, real_t * RESTRICT _data_kVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.02364442875776411, 0.0041220137385117907, 0.025486503738822306, 0.0091363578885896128, 0.0052827872295400091, 0.02526054964096567, 0.01949470769240411, 0.0094909452962763929, 0.019522077110586403, 0.018549261128954084, 0.0066770344442521412}; diff --git a/operators/k_mass/noarch/P2ToP1ElementwiseKMassAnnulusMap_apply_macro_2D.cpp b/operators/k_mass/noarch/P2ToP1ElementwiseKMassAnnulusMap_apply_P2ToP1ElementwiseKMassAnnulusMap_macro_2D.cpp similarity index 98% rename from operators/k_mass/noarch/P2ToP1ElementwiseKMassAnnulusMap_apply_macro_2D.cpp rename to operators/k_mass/noarch/P2ToP1ElementwiseKMassAnnulusMap_apply_P2ToP1ElementwiseKMassAnnulusMap_macro_2D.cpp index d4947a80..195afdda 100644 --- a/operators/k_mass/noarch/P2ToP1ElementwiseKMassAnnulusMap_apply_macro_2D.cpp +++ b/operators/k_mass/noarch/P2ToP1ElementwiseKMassAnnulusMap_apply_P2ToP1ElementwiseKMassAnnulusMap_macro_2D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ToP1ElementwiseKMassAnnulusMap::apply_macro_2D( real_t * RESTRICT _data_dst, real_t * RESTRICT _data_kEdge, real_t * RESTRICT _data_kVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const +void P2ToP1ElementwiseKMassAnnulusMap::apply_P2ToP1ElementwiseKMassAnnulusMap_macro_2D( real_t * RESTRICT _data_dst, real_t * RESTRICT _data_kEdge, real_t * RESTRICT _data_kVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const { { const real_t _data_q_w [] = {0.11169079483900581, 0.054975871827660949, 0.11169079483900581, 0.054975871827660949, 0.11169079483900581, 0.054975871827660949}; diff --git a/operators/k_mass/noarch/P2ToP1ElementwiseKMassAnnulusMap_toMatrix_macro_2D.cpp b/operators/k_mass/noarch/P2ToP1ElementwiseKMassAnnulusMap_toMatrix_P2ToP1ElementwiseKMassAnnulusMap_macro_2D.cpp similarity index 98% rename from operators/k_mass/noarch/P2ToP1ElementwiseKMassAnnulusMap_toMatrix_macro_2D.cpp rename to operators/k_mass/noarch/P2ToP1ElementwiseKMassAnnulusMap_toMatrix_P2ToP1ElementwiseKMassAnnulusMap_macro_2D.cpp index fd1d1423..071cafb2 100644 --- a/operators/k_mass/noarch/P2ToP1ElementwiseKMassAnnulusMap_toMatrix_macro_2D.cpp +++ b/operators/k_mass/noarch/P2ToP1ElementwiseKMassAnnulusMap_toMatrix_P2ToP1ElementwiseKMassAnnulusMap_macro_2D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ToP1ElementwiseKMassAnnulusMap::toMatrix_macro_2D( idx_t * RESTRICT _data_dst, real_t * RESTRICT _data_kEdge, real_t * RESTRICT _data_kVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const +void P2ToP1ElementwiseKMassAnnulusMap::toMatrix_P2ToP1ElementwiseKMassAnnulusMap_macro_2D( idx_t * RESTRICT _data_dst, real_t * RESTRICT _data_kEdge, real_t * RESTRICT _data_kVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const { { const real_t _data_q_w [] = {0.11169079483900581, 0.054975871827660949, 0.11169079483900581, 0.054975871827660949, 0.11169079483900581, 0.054975871827660949}; diff --git a/operators/k_mass/noarch/P2ToP1ElementwiseKMassIcosahedralShellMap_apply_macro_3D.cpp b/operators/k_mass/noarch/P2ToP1ElementwiseKMassIcosahedralShellMap_apply_P2ToP1ElementwiseKMassIcosahedralShellMap_macro_3D.cpp similarity index 99% rename from operators/k_mass/noarch/P2ToP1ElementwiseKMassIcosahedralShellMap_apply_macro_3D.cpp rename to operators/k_mass/noarch/P2ToP1ElementwiseKMassIcosahedralShellMap_apply_P2ToP1ElementwiseKMassIcosahedralShellMap_macro_3D.cpp index f16047bc..e84fa7b7 100644 --- a/operators/k_mass/noarch/P2ToP1ElementwiseKMassIcosahedralShellMap_apply_macro_3D.cpp +++ b/operators/k_mass/noarch/P2ToP1ElementwiseKMassIcosahedralShellMap_apply_P2ToP1ElementwiseKMassIcosahedralShellMap_macro_3D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ToP1ElementwiseKMassIcosahedralShellMap::apply_macro_3D( real_t * RESTRICT _data_dst, real_t * RESTRICT _data_kEdge, real_t * RESTRICT _data_kVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const +void P2ToP1ElementwiseKMassIcosahedralShellMap::apply_P2ToP1ElementwiseKMassIcosahedralShellMap_macro_3D( real_t * RESTRICT _data_dst, real_t * RESTRICT _data_kEdge, real_t * RESTRICT _data_kVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const { { const real_t _data_q_w [] = {0.02364442875776411, 0.0041220137385117907, 0.025486503738822306, 0.0091363578885896128, 0.0052827872295400091, 0.02526054964096567, 0.01949470769240411, 0.0094909452962763929, 0.019522077110586403, 0.018549261128954084, 0.0066770344442521412}; diff --git a/operators/k_mass/noarch/P2ToP1ElementwiseKMassIcosahedralShellMap_toMatrix_macro_3D.cpp b/operators/k_mass/noarch/P2ToP1ElementwiseKMassIcosahedralShellMap_toMatrix_P2ToP1ElementwiseKMassIcosahedralShellMap_macro_3D.cpp similarity index 99% rename from operators/k_mass/noarch/P2ToP1ElementwiseKMassIcosahedralShellMap_toMatrix_macro_3D.cpp rename to operators/k_mass/noarch/P2ToP1ElementwiseKMassIcosahedralShellMap_toMatrix_P2ToP1ElementwiseKMassIcosahedralShellMap_macro_3D.cpp index 669f4f0a..7ba05ac7 100644 --- a/operators/k_mass/noarch/P2ToP1ElementwiseKMassIcosahedralShellMap_toMatrix_macro_3D.cpp +++ b/operators/k_mass/noarch/P2ToP1ElementwiseKMassIcosahedralShellMap_toMatrix_P2ToP1ElementwiseKMassIcosahedralShellMap_macro_3D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ToP1ElementwiseKMassIcosahedralShellMap::toMatrix_macro_3D( idx_t * RESTRICT _data_dst, real_t * RESTRICT _data_kEdge, real_t * RESTRICT _data_kVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const +void P2ToP1ElementwiseKMassIcosahedralShellMap::toMatrix_P2ToP1ElementwiseKMassIcosahedralShellMap_macro_3D( idx_t * RESTRICT _data_dst, real_t * RESTRICT _data_kEdge, real_t * RESTRICT _data_kVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const { { const real_t _data_q_w [] = {0.02364442875776411, 0.0041220137385117907, 0.025486503738822306, 0.0091363578885896128, 0.0052827872295400091, 0.02526054964096567, 0.01949470769240411, 0.0094909452962763929, 0.019522077110586403, 0.018549261128954084, 0.0066770344442521412}; diff --git a/operators/k_mass/noarch/P2ToP1ElementwiseKMass_apply_macro_2D.cpp b/operators/k_mass/noarch/P2ToP1ElementwiseKMass_apply_P2ToP1ElementwiseKMass_macro_2D.cpp similarity index 98% rename from operators/k_mass/noarch/P2ToP1ElementwiseKMass_apply_macro_2D.cpp rename to operators/k_mass/noarch/P2ToP1ElementwiseKMass_apply_P2ToP1ElementwiseKMass_macro_2D.cpp index 0b07e333..24c2c68f 100644 --- a/operators/k_mass/noarch/P2ToP1ElementwiseKMass_apply_macro_2D.cpp +++ b/operators/k_mass/noarch/P2ToP1ElementwiseKMass_apply_P2ToP1ElementwiseKMass_macro_2D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ToP1ElementwiseKMass::apply_macro_2D( real_t * RESTRICT _data_dst, real_t * RESTRICT _data_kEdge, real_t * RESTRICT _data_kVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ToP1ElementwiseKMass::apply_P2ToP1ElementwiseKMass_macro_2D( real_t * RESTRICT _data_dst, real_t * RESTRICT _data_kEdge, real_t * RESTRICT _data_kVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.11169079483900581, 0.054975871827660949, 0.11169079483900581, 0.054975871827660949, 0.11169079483900581, 0.054975871827660949}; diff --git a/operators/k_mass/noarch/P2ToP1ElementwiseKMass_apply_macro_3D.cpp b/operators/k_mass/noarch/P2ToP1ElementwiseKMass_apply_P2ToP1ElementwiseKMass_macro_3D.cpp similarity index 99% rename from operators/k_mass/noarch/P2ToP1ElementwiseKMass_apply_macro_3D.cpp rename to operators/k_mass/noarch/P2ToP1ElementwiseKMass_apply_P2ToP1ElementwiseKMass_macro_3D.cpp index be10ce61..2e061eb2 100644 --- a/operators/k_mass/noarch/P2ToP1ElementwiseKMass_apply_macro_3D.cpp +++ b/operators/k_mass/noarch/P2ToP1ElementwiseKMass_apply_P2ToP1ElementwiseKMass_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ToP1ElementwiseKMass::apply_macro_3D( real_t * RESTRICT _data_dst, real_t * RESTRICT _data_kEdge, real_t * RESTRICT _data_kVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ToP1ElementwiseKMass::apply_P2ToP1ElementwiseKMass_macro_3D( real_t * RESTRICT _data_dst, real_t * RESTRICT _data_kEdge, real_t * RESTRICT _data_kVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.02364442875776411, 0.0041220137385117907, 0.025486503738822306, 0.0091363578885896128, 0.0052827872295400091, 0.02526054964096567, 0.01949470769240411, 0.0094909452962763929, 0.019522077110586403, 0.018549261128954084, 0.0066770344442521412}; diff --git a/operators/k_mass/noarch/P2ToP1ElementwiseKMass_toMatrix_macro_2D.cpp b/operators/k_mass/noarch/P2ToP1ElementwiseKMass_toMatrix_P2ToP1ElementwiseKMass_macro_2D.cpp similarity index 98% rename from operators/k_mass/noarch/P2ToP1ElementwiseKMass_toMatrix_macro_2D.cpp rename to operators/k_mass/noarch/P2ToP1ElementwiseKMass_toMatrix_P2ToP1ElementwiseKMass_macro_2D.cpp index 37bd3800..b896d870 100644 --- a/operators/k_mass/noarch/P2ToP1ElementwiseKMass_toMatrix_macro_2D.cpp +++ b/operators/k_mass/noarch/P2ToP1ElementwiseKMass_toMatrix_P2ToP1ElementwiseKMass_macro_2D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ToP1ElementwiseKMass::toMatrix_macro_2D( idx_t * RESTRICT _data_dst, real_t * RESTRICT _data_kEdge, real_t * RESTRICT _data_kVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ToP1ElementwiseKMass::toMatrix_P2ToP1ElementwiseKMass_macro_2D( idx_t * RESTRICT _data_dst, real_t * RESTRICT _data_kEdge, real_t * RESTRICT _data_kVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.11169079483900581, 0.054975871827660949, 0.11169079483900581, 0.054975871827660949, 0.11169079483900581, 0.054975871827660949}; diff --git a/operators/k_mass/noarch/P2ToP1ElementwiseKMass_toMatrix_macro_3D.cpp b/operators/k_mass/noarch/P2ToP1ElementwiseKMass_toMatrix_P2ToP1ElementwiseKMass_macro_3D.cpp similarity index 99% rename from operators/k_mass/noarch/P2ToP1ElementwiseKMass_toMatrix_macro_3D.cpp rename to operators/k_mass/noarch/P2ToP1ElementwiseKMass_toMatrix_P2ToP1ElementwiseKMass_macro_3D.cpp index 38e31c23..16fa83dc 100644 --- a/operators/k_mass/noarch/P2ToP1ElementwiseKMass_toMatrix_macro_3D.cpp +++ b/operators/k_mass/noarch/P2ToP1ElementwiseKMass_toMatrix_P2ToP1ElementwiseKMass_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ToP1ElementwiseKMass::toMatrix_macro_3D( idx_t * RESTRICT _data_dst, real_t * RESTRICT _data_kEdge, real_t * RESTRICT _data_kVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ToP1ElementwiseKMass::toMatrix_P2ToP1ElementwiseKMass_macro_3D( idx_t * RESTRICT _data_dst, real_t * RESTRICT _data_kEdge, real_t * RESTRICT _data_kVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.02364442875776411, 0.0041220137385117907, 0.025486503738822306, 0.0091363578885896128, 0.0052827872295400091, 0.02526054964096567, 0.01949470769240411, 0.0094909452962763929, 0.019522077110586403, 0.018549261128954084, 0.0066770344442521412}; diff --git a/operators/mass/CMakeLists.txt b/operators/mass/CMakeLists.txt index 999c1ae5..9ed525d0 100644 --- a/operators/mass/CMakeLists.txt +++ b/operators/mass/CMakeLists.txt @@ -13,40 +13,40 @@ add_library( opgen-mass if(HYTEG_BUILD_WITH_AVX AND WALBERLA_DOUBLE_ACCURACY) target_sources(opgen-mass PRIVATE - avx/P1ElementwiseMass_apply_macro_2D.cpp - avx/P1ElementwiseMass_apply_macro_3D.cpp - avx/P1ElementwiseMass_computeInverseDiagonalOperatorValues_macro_2D.cpp - avx/P1ElementwiseMass_computeInverseDiagonalOperatorValues_macro_3D.cpp - avx/P2ElementwiseMassAnnulusMap_apply_macro_2D.cpp - avx/P2ElementwiseMassAnnulusMap_computeInverseDiagonalOperatorValues_macro_2D.cpp - avx/P2ElementwiseMassIcosahedralShellMap_apply_macro_3D.cpp - avx/P2ElementwiseMassIcosahedralShellMap_computeInverseDiagonalOperatorValues_macro_3D.cpp - avx/P2ElementwiseMass_apply_macro_2D.cpp - avx/P2ElementwiseMass_apply_macro_3D.cpp - avx/P2ElementwiseMass_computeInverseDiagonalOperatorValues_macro_2D.cpp - avx/P2ElementwiseMass_computeInverseDiagonalOperatorValues_macro_3D.cpp - noarch/P1ElementwiseMass_toMatrix_macro_2D.cpp - noarch/P1ElementwiseMass_toMatrix_macro_3D.cpp - noarch/P2ElementwiseMassAnnulusMap_toMatrix_macro_2D.cpp - noarch/P2ElementwiseMassIcosahedralShellMap_toMatrix_macro_3D.cpp - noarch/P2ElementwiseMass_toMatrix_macro_2D.cpp - noarch/P2ElementwiseMass_toMatrix_macro_3D.cpp + avx/P1ElementwiseMass_apply_P1ElementwiseMass_macro_2D.cpp + avx/P1ElementwiseMass_apply_P1ElementwiseMass_macro_3D.cpp + avx/P1ElementwiseMass_computeInverseDiagonalOperatorValues_P1ElementwiseMass_macro_2D.cpp + avx/P1ElementwiseMass_computeInverseDiagonalOperatorValues_P1ElementwiseMass_macro_3D.cpp + avx/P2ElementwiseMassAnnulusMap_apply_P2ElementwiseMassAnnulusMap_macro_2D.cpp + avx/P2ElementwiseMassAnnulusMap_computeInverseDiagonalOperatorValues_P2ElementwiseMassAnnulusMap_macro_2D.cpp + avx/P2ElementwiseMassIcosahedralShellMap_apply_P2ElementwiseMassIcosahedralShellMap_macro_3D.cpp + avx/P2ElementwiseMassIcosahedralShellMap_computeInverseDiagonalOperatorValues_P2ElementwiseMassIcosahedralShellMap_macro_3D.cpp + avx/P2ElementwiseMass_apply_P2ElementwiseMass_macro_2D.cpp + avx/P2ElementwiseMass_apply_P2ElementwiseMass_macro_3D.cpp + avx/P2ElementwiseMass_computeInverseDiagonalOperatorValues_P2ElementwiseMass_macro_2D.cpp + avx/P2ElementwiseMass_computeInverseDiagonalOperatorValues_P2ElementwiseMass_macro_3D.cpp + noarch/P1ElementwiseMass_toMatrix_P1ElementwiseMass_macro_2D.cpp + noarch/P1ElementwiseMass_toMatrix_P1ElementwiseMass_macro_3D.cpp + noarch/P2ElementwiseMassAnnulusMap_toMatrix_P2ElementwiseMassAnnulusMap_macro_2D.cpp + noarch/P2ElementwiseMassIcosahedralShellMap_toMatrix_P2ElementwiseMassIcosahedralShellMap_macro_3D.cpp + noarch/P2ElementwiseMass_toMatrix_P2ElementwiseMass_macro_2D.cpp + noarch/P2ElementwiseMass_toMatrix_P2ElementwiseMass_macro_3D.cpp ) set_source_files_properties( - avx/P1ElementwiseMass_apply_macro_2D.cpp - avx/P1ElementwiseMass_apply_macro_3D.cpp - avx/P1ElementwiseMass_computeInverseDiagonalOperatorValues_macro_2D.cpp - avx/P1ElementwiseMass_computeInverseDiagonalOperatorValues_macro_3D.cpp - avx/P2ElementwiseMassAnnulusMap_apply_macro_2D.cpp - avx/P2ElementwiseMassAnnulusMap_computeInverseDiagonalOperatorValues_macro_2D.cpp - avx/P2ElementwiseMassIcosahedralShellMap_apply_macro_3D.cpp - avx/P2ElementwiseMassIcosahedralShellMap_computeInverseDiagonalOperatorValues_macro_3D.cpp - avx/P2ElementwiseMass_apply_macro_2D.cpp - avx/P2ElementwiseMass_apply_macro_3D.cpp - avx/P2ElementwiseMass_computeInverseDiagonalOperatorValues_macro_2D.cpp - avx/P2ElementwiseMass_computeInverseDiagonalOperatorValues_macro_3D.cpp + avx/P1ElementwiseMass_apply_P1ElementwiseMass_macro_2D.cpp + avx/P1ElementwiseMass_apply_P1ElementwiseMass_macro_3D.cpp + avx/P1ElementwiseMass_computeInverseDiagonalOperatorValues_P1ElementwiseMass_macro_2D.cpp + avx/P1ElementwiseMass_computeInverseDiagonalOperatorValues_P1ElementwiseMass_macro_3D.cpp + avx/P2ElementwiseMassAnnulusMap_apply_P2ElementwiseMassAnnulusMap_macro_2D.cpp + avx/P2ElementwiseMassAnnulusMap_computeInverseDiagonalOperatorValues_P2ElementwiseMassAnnulusMap_macro_2D.cpp + avx/P2ElementwiseMassIcosahedralShellMap_apply_P2ElementwiseMassIcosahedralShellMap_macro_3D.cpp + avx/P2ElementwiseMassIcosahedralShellMap_computeInverseDiagonalOperatorValues_P2ElementwiseMassIcosahedralShellMap_macro_3D.cpp + avx/P2ElementwiseMass_apply_P2ElementwiseMass_macro_2D.cpp + avx/P2ElementwiseMass_apply_P2ElementwiseMass_macro_3D.cpp + avx/P2ElementwiseMass_computeInverseDiagonalOperatorValues_P2ElementwiseMass_macro_2D.cpp + avx/P2ElementwiseMass_computeInverseDiagonalOperatorValues_P2ElementwiseMass_macro_3D.cpp PROPERTIES COMPILE_OPTIONS ${HYTEG_COMPILER_NATIVE_FLAGS} ) @@ -57,24 +57,24 @@ else() target_sources(opgen-mass PRIVATE - noarch/P1ElementwiseMass_apply_macro_2D.cpp - noarch/P1ElementwiseMass_apply_macro_3D.cpp - noarch/P1ElementwiseMass_computeInverseDiagonalOperatorValues_macro_2D.cpp - noarch/P1ElementwiseMass_computeInverseDiagonalOperatorValues_macro_3D.cpp - noarch/P1ElementwiseMass_toMatrix_macro_2D.cpp - noarch/P1ElementwiseMass_toMatrix_macro_3D.cpp - noarch/P2ElementwiseMassAnnulusMap_apply_macro_2D.cpp - noarch/P2ElementwiseMassAnnulusMap_computeInverseDiagonalOperatorValues_macro_2D.cpp - noarch/P2ElementwiseMassAnnulusMap_toMatrix_macro_2D.cpp - noarch/P2ElementwiseMassIcosahedralShellMap_apply_macro_3D.cpp - noarch/P2ElementwiseMassIcosahedralShellMap_computeInverseDiagonalOperatorValues_macro_3D.cpp - noarch/P2ElementwiseMassIcosahedralShellMap_toMatrix_macro_3D.cpp - noarch/P2ElementwiseMass_apply_macro_2D.cpp - noarch/P2ElementwiseMass_apply_macro_3D.cpp - noarch/P2ElementwiseMass_computeInverseDiagonalOperatorValues_macro_2D.cpp - noarch/P2ElementwiseMass_computeInverseDiagonalOperatorValues_macro_3D.cpp - noarch/P2ElementwiseMass_toMatrix_macro_2D.cpp - noarch/P2ElementwiseMass_toMatrix_macro_3D.cpp + noarch/P1ElementwiseMass_apply_P1ElementwiseMass_macro_2D.cpp + noarch/P1ElementwiseMass_apply_P1ElementwiseMass_macro_3D.cpp + noarch/P1ElementwiseMass_computeInverseDiagonalOperatorValues_P1ElementwiseMass_macro_2D.cpp + noarch/P1ElementwiseMass_computeInverseDiagonalOperatorValues_P1ElementwiseMass_macro_3D.cpp + noarch/P1ElementwiseMass_toMatrix_P1ElementwiseMass_macro_2D.cpp + noarch/P1ElementwiseMass_toMatrix_P1ElementwiseMass_macro_3D.cpp + noarch/P2ElementwiseMassAnnulusMap_apply_P2ElementwiseMassAnnulusMap_macro_2D.cpp + noarch/P2ElementwiseMassAnnulusMap_computeInverseDiagonalOperatorValues_P2ElementwiseMassAnnulusMap_macro_2D.cpp + noarch/P2ElementwiseMassAnnulusMap_toMatrix_P2ElementwiseMassAnnulusMap_macro_2D.cpp + noarch/P2ElementwiseMassIcosahedralShellMap_apply_P2ElementwiseMassIcosahedralShellMap_macro_3D.cpp + noarch/P2ElementwiseMassIcosahedralShellMap_computeInverseDiagonalOperatorValues_P2ElementwiseMassIcosahedralShellMap_macro_3D.cpp + noarch/P2ElementwiseMassIcosahedralShellMap_toMatrix_P2ElementwiseMassIcosahedralShellMap_macro_3D.cpp + noarch/P2ElementwiseMass_apply_P2ElementwiseMass_macro_2D.cpp + noarch/P2ElementwiseMass_apply_P2ElementwiseMass_macro_3D.cpp + noarch/P2ElementwiseMass_computeInverseDiagonalOperatorValues_P2ElementwiseMass_macro_2D.cpp + noarch/P2ElementwiseMass_computeInverseDiagonalOperatorValues_P2ElementwiseMass_macro_3D.cpp + noarch/P2ElementwiseMass_toMatrix_P2ElementwiseMass_macro_2D.cpp + noarch/P2ElementwiseMass_toMatrix_P2ElementwiseMass_macro_3D.cpp ) endif() diff --git a/operators/mass/P1ElementwiseMass.cpp b/operators/mass/P1ElementwiseMass.cpp index c0cb9172..4d2d4680 100644 --- a/operators/mass/P1ElementwiseMass.cpp +++ b/operators/mass/P1ElementwiseMass.cpp @@ -124,7 +124,7 @@ void P1ElementwiseMass::apply( const P1Function< real_t >& src, this->timingTree_->start( "kernel" ); - apply_macro_3D( + apply_P1ElementwiseMass_macro_3D( _data_dst, _data_src, @@ -142,6 +142,7 @@ void P1ElementwiseMass::apply( const P1Function< real_t >& src, macro_vertex_coord_id_3comp2, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } @@ -189,7 +190,7 @@ void P1ElementwiseMass::apply( const P1Function< real_t >& src, this->timingTree_->start( "kernel" ); - apply_macro_2D( + apply_P1ElementwiseMass_macro_2D( _data_dst, _data_src, @@ -201,6 +202,7 @@ void P1ElementwiseMass::apply( const P1Function< real_t >& src, macro_vertex_coord_id_2comp1, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } @@ -261,7 +263,7 @@ void P1ElementwiseMass::toMatrix( const std::shared_ptr< SparseMatrixProxy >& ma this->timingTree_->start( "kernel" ); - toMatrix_macro_3D( + toMatrix_P1ElementwiseMass_macro_3D( _data_dst, _data_src, @@ -280,6 +282,7 @@ void P1ElementwiseMass::toMatrix( const std::shared_ptr< SparseMatrixProxy >& ma mat, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } } @@ -308,7 +311,7 @@ void P1ElementwiseMass::toMatrix( const std::shared_ptr< SparseMatrixProxy >& ma this->timingTree_->start( "kernel" ); - toMatrix_macro_2D( + toMatrix_P1ElementwiseMass_macro_2D( _data_dst, _data_src, @@ -321,6 +324,7 @@ void P1ElementwiseMass::toMatrix( const std::shared_ptr< SparseMatrixProxy >& ma mat, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } } @@ -369,7 +373,7 @@ void P1ElementwiseMass::computeInverseDiagonalOperatorValues() this->timingTree_->start( "kernel" ); - computeInverseDiagonalOperatorValues_macro_3D( + computeInverseDiagonalOperatorValues_P1ElementwiseMass_macro_3D( _data_invDiag_, macro_vertex_coord_id_0comp0, @@ -386,6 +390,7 @@ void P1ElementwiseMass::computeInverseDiagonalOperatorValues() macro_vertex_coord_id_3comp2, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } @@ -424,7 +429,7 @@ void P1ElementwiseMass::computeInverseDiagonalOperatorValues() this->timingTree_->start( "kernel" ); - computeInverseDiagonalOperatorValues_macro_2D( + computeInverseDiagonalOperatorValues_P1ElementwiseMass_macro_2D( _data_invDiag_, macro_vertex_coord_id_0comp0, @@ -435,6 +440,7 @@ void P1ElementwiseMass::computeInverseDiagonalOperatorValues() macro_vertex_coord_id_2comp1, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } diff --git a/operators/mass/P1ElementwiseMass.hpp b/operators/mass/P1ElementwiseMass.hpp index c9f4ea4f..cb592e21 100644 --- a/operators/mass/P1ElementwiseMass.hpp +++ b/operators/mass/P1ElementwiseMass.hpp @@ -29,6 +29,7 @@ #include "core/DataTypes.h" #include "hyteg/LikwidWrapper.hpp" +#include "hyteg/boundary/BoundaryConditions.hpp" #include "hyteg/communication/Syncing.hpp" #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp" #include "hyteg/operators/Operator.hpp" @@ -36,6 +37,7 @@ #include "hyteg/primitivestorage/PrimitiveStorage.hpp" #include "hyteg/solvers/Smoothables.hpp" #include "hyteg/sparseassembly/SparseMatrixProxy.hpp" +#include "hyteg/types/types.hpp" #define FUNC_PREFIX @@ -78,120 +80,149 @@ class P1ElementwiseMass : public Operator< P1Function< real_t >, P1Function< rea protected: private: - /// Kernel type: apply + /// Integral: P1ElementwiseMass + /// - volume element: triangle, dim: 2, vertices: 3, spacedim: 2 + /// - kernel type: apply + /// - loop strategy: SAWTOOTH /// - quadrature rule: Dunavant 2 | points: 3, degree: 2 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 59 51 12 0 0 0 0 1 - void apply_macro_2D( real_t* RESTRICT _data_dst, - real_t* RESTRICT _data_src, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; - /// Kernel type: apply + void apply_P1ElementwiseMass_macro_2D( real_t* RESTRICT _data_dst, + real_t* RESTRICT _data_src, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; + + /// Integral: P1ElementwiseMass + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: apply + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 1 | points: 4, degree: 2 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 149 128 36 0 0 0 0 1 - void apply_macro_3D( real_t* RESTRICT _data_dst, - real_t* RESTRICT _data_src, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; - /// Kernel type: toMatrix + void apply_P1ElementwiseMass_macro_3D( real_t* RESTRICT _data_dst, + real_t* RESTRICT _data_src, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; + + /// Integral: P1ElementwiseMass + /// - volume element: triangle, dim: 2, vertices: 3, spacedim: 2 + /// - kernel type: toMatrix + /// - loop strategy: SAWTOOTH /// - quadrature rule: Dunavant 2 | points: 3, degree: 2 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 50 42 12 0 0 0 0 4 - void toMatrix_macro_2D( idx_t* RESTRICT _data_dst, - idx_t* RESTRICT _data_src, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - std::shared_ptr< SparseMatrixProxy > mat, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; - /// Kernel type: toMatrix + void toMatrix_P1ElementwiseMass_macro_2D( idx_t* RESTRICT _data_dst, + idx_t* RESTRICT _data_src, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + std::shared_ptr< SparseMatrixProxy > mat, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; + + /// Integral: P1ElementwiseMass + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: toMatrix + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 1 | points: 4, degree: 2 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 133 112 36 0 0 0 0 4 - void toMatrix_macro_3D( idx_t* RESTRICT _data_dst, - idx_t* RESTRICT _data_src, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - std::shared_ptr< SparseMatrixProxy > mat, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; - /// Kernel type: computeInverseDiagonalOperatorValues + void toMatrix_P1ElementwiseMass_macro_3D( idx_t* RESTRICT _data_dst, + idx_t* RESTRICT _data_src, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + std::shared_ptr< SparseMatrixProxy > mat, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; + + /// Integral: P1ElementwiseMass + /// - volume element: triangle, dim: 2, vertices: 3, spacedim: 2 + /// - kernel type: computeInverseDiagonalOperatorValues + /// - loop strategy: SAWTOOTH /// - quadrature rule: Dunavant 2 | points: 3, degree: 2 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 44 33 12 0 0 0 0 1 - void computeInverseDiagonalOperatorValues_macro_2D( real_t* RESTRICT _data_invDiag_, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; - /// Kernel type: computeInverseDiagonalOperatorValues + void computeInverseDiagonalOperatorValues_P1ElementwiseMass_macro_2D( real_t* RESTRICT _data_invDiag_, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; + + /// Integral: P1ElementwiseMass + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: computeInverseDiagonalOperatorValues + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 1 | points: 4, degree: 2 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 113 88 36 0 0 0 0 1 - void computeInverseDiagonalOperatorValues_macro_3D( real_t* RESTRICT _data_invDiag_, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; + void computeInverseDiagonalOperatorValues_P1ElementwiseMass_macro_3D( real_t* RESTRICT _data_invDiag_, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; std::shared_ptr< P1Function< real_t > > invDiag_; }; diff --git a/operators/mass/P2ElementwiseMass.cpp b/operators/mass/P2ElementwiseMass.cpp index b061b464..2cf057fe 100644 --- a/operators/mass/P2ElementwiseMass.cpp +++ b/operators/mass/P2ElementwiseMass.cpp @@ -127,7 +127,7 @@ void P2ElementwiseMass::apply( const P2Function< real_t >& src, this->timingTree_->start( "kernel" ); - apply_macro_3D( + apply_P2ElementwiseMass_macro_3D( _data_dstEdge, _data_dstVertex, @@ -147,6 +147,7 @@ void P2ElementwiseMass::apply( const P2Function< real_t >& src, macro_vertex_coord_id_3comp2, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } @@ -214,7 +215,7 @@ void P2ElementwiseMass::apply( const P2Function< real_t >& src, this->timingTree_->start( "kernel" ); - apply_macro_2D( + apply_P2ElementwiseMass_macro_2D( _data_dstEdge, _data_dstVertex, @@ -228,6 +229,7 @@ void P2ElementwiseMass::apply( const P2Function< real_t >& src, macro_vertex_coord_id_2comp1, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } @@ -294,7 +296,7 @@ void P2ElementwiseMass::toMatrix( const std::shared_ptr< SparseMatrixProxy >& ma this->timingTree_->start( "kernel" ); - toMatrix_macro_3D( + toMatrix_P2ElementwiseMass_macro_3D( _data_dstEdge, _data_dstVertex, @@ -315,6 +317,7 @@ void P2ElementwiseMass::toMatrix( const std::shared_ptr< SparseMatrixProxy >& ma mat, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } } @@ -345,7 +348,7 @@ void P2ElementwiseMass::toMatrix( const std::shared_ptr< SparseMatrixProxy >& ma this->timingTree_->start( "kernel" ); - toMatrix_macro_2D( + toMatrix_P2ElementwiseMass_macro_2D( _data_dstEdge, _data_dstVertex, @@ -360,6 +363,7 @@ void P2ElementwiseMass::toMatrix( const std::shared_ptr< SparseMatrixProxy >& ma mat, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } } @@ -410,7 +414,7 @@ void P2ElementwiseMass::computeInverseDiagonalOperatorValues() this->timingTree_->start( "kernel" ); - computeInverseDiagonalOperatorValues_macro_3D( + computeInverseDiagonalOperatorValues_P2ElementwiseMass_macro_3D( _data_invDiag_Edge, _data_invDiag_Vertex, @@ -428,6 +432,7 @@ void P2ElementwiseMass::computeInverseDiagonalOperatorValues() macro_vertex_coord_id_3comp2, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } @@ -470,7 +475,7 @@ void P2ElementwiseMass::computeInverseDiagonalOperatorValues() this->timingTree_->start( "kernel" ); - computeInverseDiagonalOperatorValues_macro_2D( + computeInverseDiagonalOperatorValues_P2ElementwiseMass_macro_2D( _data_invDiag_Edge, _data_invDiag_Vertex, @@ -482,6 +487,7 @@ void P2ElementwiseMass::computeInverseDiagonalOperatorValues() macro_vertex_coord_id_2comp1, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } diff --git a/operators/mass/P2ElementwiseMass.hpp b/operators/mass/P2ElementwiseMass.hpp index 21eb0970..acfec559 100644 --- a/operators/mass/P2ElementwiseMass.hpp +++ b/operators/mass/P2ElementwiseMass.hpp @@ -29,6 +29,7 @@ #include "core/DataTypes.h" #include "hyteg/LikwidWrapper.hpp" +#include "hyteg/boundary/BoundaryConditions.hpp" #include "hyteg/communication/Syncing.hpp" #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp" #include "hyteg/operators/Operator.hpp" @@ -36,6 +37,7 @@ #include "hyteg/primitivestorage/PrimitiveStorage.hpp" #include "hyteg/solvers/Smoothables.hpp" #include "hyteg/sparseassembly/SparseMatrixProxy.hpp" +#include "hyteg/types/types.hpp" #define FUNC_PREFIX @@ -78,130 +80,159 @@ class P2ElementwiseMass : public Operator< P2Function< real_t >, P2Function< rea protected: private: - /// Kernel type: apply + /// Integral: P2ElementwiseMass + /// - volume element: triangle, dim: 2, vertices: 3, spacedim: 2 + /// - kernel type: apply + /// - loop strategy: SAWTOOTH /// - quadrature rule: Dunavant 4 | points: 6, degree: 4 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 260 336 12 0 0 0 0 1 - void apply_macro_2D( real_t* RESTRICT _data_dstEdge, - real_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_srcEdge, - real_t* RESTRICT _data_srcVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; - /// Kernel type: apply + void apply_P2ElementwiseMass_macro_2D( real_t* RESTRICT _data_dstEdge, + real_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_srcEdge, + real_t* RESTRICT _data_srcVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; + + /// Integral: P2ElementwiseMass + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: apply + /// - loop strategy: SAWTOOTH /// - quadrature rule: Jaśkowiec-Sukumar 04 | points: 11, degree: 4 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 1238 1327 36 0 0 0 0 1 - void apply_macro_3D( real_t* RESTRICT _data_dstEdge, - real_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_srcEdge, - real_t* RESTRICT _data_srcVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; - /// Kernel type: toMatrix + void apply_P2ElementwiseMass_macro_3D( real_t* RESTRICT _data_dstEdge, + real_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_srcEdge, + real_t* RESTRICT _data_srcVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; + + /// Integral: P2ElementwiseMass + /// - volume element: triangle, dim: 2, vertices: 3, spacedim: 2 + /// - kernel type: toMatrix + /// - loop strategy: SAWTOOTH /// - quadrature rule: Dunavant 4 | points: 6, degree: 4 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 224 300 12 0 0 0 0 4 - void toMatrix_macro_2D( idx_t* RESTRICT _data_dstEdge, - idx_t* RESTRICT _data_dstVertex, - idx_t* RESTRICT _data_srcEdge, - idx_t* RESTRICT _data_srcVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - std::shared_ptr< SparseMatrixProxy > mat, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; - /// Kernel type: toMatrix + void toMatrix_P2ElementwiseMass_macro_2D( idx_t* RESTRICT _data_dstEdge, + idx_t* RESTRICT _data_dstVertex, + idx_t* RESTRICT _data_srcEdge, + idx_t* RESTRICT _data_srcVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + std::shared_ptr< SparseMatrixProxy > mat, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; + + /// Integral: P2ElementwiseMass + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: toMatrix + /// - loop strategy: SAWTOOTH /// - quadrature rule: Jaśkowiec-Sukumar 04 | points: 11, degree: 4 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 1138 1227 36 0 0 0 0 4 - void toMatrix_macro_3D( idx_t* RESTRICT _data_dstEdge, - idx_t* RESTRICT _data_dstVertex, - idx_t* RESTRICT _data_srcEdge, - idx_t* RESTRICT _data_srcVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - std::shared_ptr< SparseMatrixProxy > mat, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; - /// Kernel type: computeInverseDiagonalOperatorValues + void toMatrix_P2ElementwiseMass_macro_3D( idx_t* RESTRICT _data_dstEdge, + idx_t* RESTRICT _data_dstVertex, + idx_t* RESTRICT _data_srcEdge, + idx_t* RESTRICT _data_srcVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + std::shared_ptr< SparseMatrixProxy > mat, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; + + /// Integral: P2ElementwiseMass + /// - volume element: triangle, dim: 2, vertices: 3, spacedim: 2 + /// - kernel type: computeInverseDiagonalOperatorValues + /// - loop strategy: SAWTOOTH /// - quadrature rule: Dunavant 4 | points: 6, degree: 4 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 140 180 12 0 0 0 0 1 - void computeInverseDiagonalOperatorValues_macro_2D( real_t* RESTRICT _data_invDiag_Edge, - real_t* RESTRICT _data_invDiag_Vertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; - /// Kernel type: computeInverseDiagonalOperatorValues + void computeInverseDiagonalOperatorValues_P2ElementwiseMass_macro_2D( real_t* RESTRICT _data_invDiag_Edge, + real_t* RESTRICT _data_invDiag_Vertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; + + /// Integral: P2ElementwiseMass + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: computeInverseDiagonalOperatorValues + /// - loop strategy: SAWTOOTH /// - quadrature rule: Jaśkowiec-Sukumar 04 | points: 11, degree: 4 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 433 479 36 0 0 0 0 1 - void computeInverseDiagonalOperatorValues_macro_3D( real_t* RESTRICT _data_invDiag_Edge, - real_t* RESTRICT _data_invDiag_Vertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; + void computeInverseDiagonalOperatorValues_P2ElementwiseMass_macro_3D( real_t* RESTRICT _data_invDiag_Edge, + real_t* RESTRICT _data_invDiag_Vertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; std::shared_ptr< P2Function< real_t > > invDiag_; }; diff --git a/operators/mass/P2ElementwiseMassAnnulusMap.cpp b/operators/mass/P2ElementwiseMassAnnulusMap.cpp index 5ed2042e..c1240ffd 100644 --- a/operators/mass/P2ElementwiseMassAnnulusMap.cpp +++ b/operators/mass/P2ElementwiseMassAnnulusMap.cpp @@ -144,7 +144,7 @@ void P2ElementwiseMassAnnulusMap::apply( const P2Function< real_t >& src, this->timingTree_->start( "kernel" ); - apply_macro_2D( + apply_P2ElementwiseMassAnnulusMap_macro_2D( _data_dstEdge, _data_dstVertex, @@ -166,6 +166,7 @@ void P2ElementwiseMassAnnulusMap::apply( const P2Function< real_t >& src, refVertex_1, thrVertex_0, thrVertex_1 ); + this->timingTree_->stop( "kernel" ); } @@ -245,7 +246,7 @@ void P2ElementwiseMassAnnulusMap::toMatrix( const std::shared_ptr< SparseMatrixP this->timingTree_->start( "kernel" ); - toMatrix_macro_2D( + toMatrix_P2ElementwiseMassAnnulusMap_macro_2D( _data_dstEdge, _data_dstVertex, @@ -268,6 +269,7 @@ void P2ElementwiseMassAnnulusMap::toMatrix( const std::shared_ptr< SparseMatrixP refVertex_1, thrVertex_0, thrVertex_1 ); + this->timingTree_->stop( "kernel" ); } } @@ -332,7 +334,7 @@ void P2ElementwiseMassAnnulusMap::computeInverseDiagonalOperatorValues() this->timingTree_->start( "kernel" ); - computeInverseDiagonalOperatorValues_macro_2D( + computeInverseDiagonalOperatorValues_P2ElementwiseMassAnnulusMap_macro_2D( _data_invDiag_Edge, _data_invDiag_Vertex, @@ -352,6 +354,7 @@ void P2ElementwiseMassAnnulusMap::computeInverseDiagonalOperatorValues() refVertex_1, thrVertex_0, thrVertex_1 ); + this->timingTree_->stop( "kernel" ); } diff --git a/operators/mass/P2ElementwiseMassAnnulusMap.hpp b/operators/mass/P2ElementwiseMassAnnulusMap.hpp index 94b5634d..f0f2aafe 100644 --- a/operators/mass/P2ElementwiseMassAnnulusMap.hpp +++ b/operators/mass/P2ElementwiseMassAnnulusMap.hpp @@ -29,6 +29,7 @@ #include "core/DataTypes.h" #include "hyteg/LikwidWrapper.hpp" +#include "hyteg/boundary/BoundaryConditions.hpp" #include "hyteg/communication/Syncing.hpp" #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp" #include "hyteg/geometry/AnnulusMap.hpp" @@ -37,6 +38,7 @@ #include "hyteg/primitivestorage/PrimitiveStorage.hpp" #include "hyteg/solvers/Smoothables.hpp" #include "hyteg/sparseassembly/SparseMatrixProxy.hpp" +#include "hyteg/types/types.hpp" #define FUNC_PREFIX @@ -79,83 +81,97 @@ class P2ElementwiseMassAnnulusMap : public Operator< P2Function< real_t >, P2Fun protected: private: - /// Kernel type: apply + /// Integral: P2ElementwiseMassAnnulusMap + /// - volume element: triangle, dim: 2, vertices: 3, spacedim: 2 + /// - kernel type: apply + /// - loop strategy: SAWTOOTH /// - quadrature rule: Dunavant 4 | points: 6, degree: 4 + /// - blending map: AnnulusMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 638 996 30 30 6 0 0 1 - void apply_macro_2D( real_t* RESTRICT _data_dstEdge, - real_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_srcEdge, - real_t* RESTRICT _data_srcVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t refVertex_0, - real_t refVertex_1, - real_t thrVertex_0, - real_t thrVertex_1 ) const; - /// Kernel type: toMatrix + void apply_P2ElementwiseMassAnnulusMap_macro_2D( real_t* RESTRICT _data_dstEdge, + real_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_srcEdge, + real_t* RESTRICT _data_srcVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t refVertex_0, + real_t refVertex_1, + real_t thrVertex_0, + real_t thrVertex_1 ) const; + + /// Integral: P2ElementwiseMassAnnulusMap + /// - volume element: triangle, dim: 2, vertices: 3, spacedim: 2 + /// - kernel type: toMatrix + /// - loop strategy: SAWTOOTH /// - quadrature rule: Dunavant 4 | points: 6, degree: 4 + /// - blending map: AnnulusMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 602 960 30 30 6 0 0 4 - void toMatrix_macro_2D( idx_t* RESTRICT _data_dstEdge, - idx_t* RESTRICT _data_dstVertex, - idx_t* RESTRICT _data_srcEdge, - idx_t* RESTRICT _data_srcVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - std::shared_ptr< SparseMatrixProxy > mat, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t refVertex_0, - real_t refVertex_1, - real_t thrVertex_0, - real_t thrVertex_1 ) const; - /// Kernel type: computeInverseDiagonalOperatorValues + void toMatrix_P2ElementwiseMassAnnulusMap_macro_2D( idx_t* RESTRICT _data_dstEdge, + idx_t* RESTRICT _data_dstVertex, + idx_t* RESTRICT _data_srcEdge, + idx_t* RESTRICT _data_srcVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + std::shared_ptr< SparseMatrixProxy > mat, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t refVertex_0, + real_t refVertex_1, + real_t thrVertex_0, + real_t thrVertex_1 ) const; + + /// Integral: P2ElementwiseMassAnnulusMap + /// - volume element: triangle, dim: 2, vertices: 3, spacedim: 2 + /// - kernel type: computeInverseDiagonalOperatorValues + /// - loop strategy: SAWTOOTH /// - quadrature rule: Dunavant 4 | points: 6, degree: 4 + /// - blending map: AnnulusMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 518 840 30 30 6 0 0 1 - void computeInverseDiagonalOperatorValues_macro_2D( real_t* RESTRICT _data_invDiag_Edge, - real_t* RESTRICT _data_invDiag_Vertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t refVertex_0, - real_t refVertex_1, - real_t thrVertex_0, - real_t thrVertex_1 ) const; + void computeInverseDiagonalOperatorValues_P2ElementwiseMassAnnulusMap_macro_2D( real_t* RESTRICT _data_invDiag_Edge, + real_t* RESTRICT _data_invDiag_Vertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t refVertex_0, + real_t refVertex_1, + real_t thrVertex_0, + real_t thrVertex_1 ) const; std::shared_ptr< P2Function< real_t > > invDiag_; }; diff --git a/operators/mass/P2ElementwiseMassIcosahedralShellMap.cpp b/operators/mass/P2ElementwiseMassIcosahedralShellMap.cpp index a2a18b33..05302703 100644 --- a/operators/mass/P2ElementwiseMassIcosahedralShellMap.cpp +++ b/operators/mass/P2ElementwiseMassIcosahedralShellMap.cpp @@ -146,7 +146,7 @@ void P2ElementwiseMassIcosahedralShellMap::apply( const P2Function< real_t >& sr this->timingTree_->start( "kernel" ); - apply_macro_3D( + apply_P2ElementwiseMassIcosahedralShellMap_macro_3D( _data_dstEdge, _data_dstVertex, @@ -180,6 +180,7 @@ void P2ElementwiseMassIcosahedralShellMap::apply( const P2Function< real_t >& sr thrVertex_0, thrVertex_1, thrVertex_2 ); + this->timingTree_->stop( "kernel" ); } @@ -271,7 +272,7 @@ void P2ElementwiseMassIcosahedralShellMap::toMatrix( const std::shared_ptr< Spar this->timingTree_->start( "kernel" ); - toMatrix_macro_3D( + toMatrix_P2ElementwiseMassIcosahedralShellMap_macro_3D( _data_dstEdge, _data_dstVertex, @@ -306,6 +307,7 @@ void P2ElementwiseMassIcosahedralShellMap::toMatrix( const std::shared_ptr< Spar thrVertex_0, thrVertex_1, thrVertex_2 ); + this->timingTree_->stop( "kernel" ); } } @@ -381,7 +383,7 @@ void P2ElementwiseMassIcosahedralShellMap::computeInverseDiagonalOperatorValues( this->timingTree_->start( "kernel" ); - computeInverseDiagonalOperatorValues_macro_3D( + computeInverseDiagonalOperatorValues_P2ElementwiseMassIcosahedralShellMap_macro_3D( _data_invDiag_Edge, _data_invDiag_Vertex, @@ -413,6 +415,7 @@ void P2ElementwiseMassIcosahedralShellMap::computeInverseDiagonalOperatorValues( thrVertex_0, thrVertex_1, thrVertex_2 ); + this->timingTree_->stop( "kernel" ); } diff --git a/operators/mass/P2ElementwiseMassIcosahedralShellMap.hpp b/operators/mass/P2ElementwiseMassIcosahedralShellMap.hpp index 5df746e5..67f5ee40 100644 --- a/operators/mass/P2ElementwiseMassIcosahedralShellMap.hpp +++ b/operators/mass/P2ElementwiseMassIcosahedralShellMap.hpp @@ -29,6 +29,7 @@ #include "core/DataTypes.h" #include "hyteg/LikwidWrapper.hpp" +#include "hyteg/boundary/BoundaryConditions.hpp" #include "hyteg/communication/Syncing.hpp" #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp" #include "hyteg/geometry/IcosahedralShellMap.hpp" @@ -37,6 +38,7 @@ #include "hyteg/primitivestorage/PrimitiveStorage.hpp" #include "hyteg/solvers/Smoothables.hpp" #include "hyteg/sparseassembly/SparseMatrixProxy.hpp" +#include "hyteg/types/types.hpp" #define FUNC_PREFIX @@ -79,119 +81,134 @@ class P2ElementwiseMassIcosahedralShellMap : public Operator< P2Function< real_t protected: private: - /// Kernel type: apply + /// Integral: P2ElementwiseMassIcosahedralShellMap + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: apply + /// - loop strategy: SAWTOOTH /// - quadrature rule: Jaśkowiec-Sukumar 04 | points: 11, degree: 4 + /// - blending map: IcosahedralShellMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 2811 4363 69 33 11 0 0 1 - void apply_macro_3D( real_t* RESTRICT _data_dstEdge, - real_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_srcEdge, - real_t* RESTRICT _data_srcVertex, - real_t forVertex_0, - real_t forVertex_1, - real_t forVertex_2, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t rayVertex_2, - real_t refVertex_0, - real_t refVertex_1, - real_t refVertex_2, - real_t thrVertex_0, - real_t thrVertex_1, - real_t thrVertex_2 ) const; - /// Kernel type: toMatrix + void apply_P2ElementwiseMassIcosahedralShellMap_macro_3D( real_t* RESTRICT _data_dstEdge, + real_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_srcEdge, + real_t* RESTRICT _data_srcVertex, + real_t forVertex_0, + real_t forVertex_1, + real_t forVertex_2, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t rayVertex_2, + real_t refVertex_0, + real_t refVertex_1, + real_t refVertex_2, + real_t thrVertex_0, + real_t thrVertex_1, + real_t thrVertex_2 ) const; + + /// Integral: P2ElementwiseMassIcosahedralShellMap + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: toMatrix + /// - loop strategy: SAWTOOTH /// - quadrature rule: Jaśkowiec-Sukumar 04 | points: 11, degree: 4 + /// - blending map: IcosahedralShellMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 2711 4263 69 33 11 0 0 4 - void toMatrix_macro_3D( idx_t* RESTRICT _data_dstEdge, - idx_t* RESTRICT _data_dstVertex, - idx_t* RESTRICT _data_srcEdge, - idx_t* RESTRICT _data_srcVertex, - real_t forVertex_0, - real_t forVertex_1, - real_t forVertex_2, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - std::shared_ptr< SparseMatrixProxy > mat, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t rayVertex_2, - real_t refVertex_0, - real_t refVertex_1, - real_t refVertex_2, - real_t thrVertex_0, - real_t thrVertex_1, - real_t thrVertex_2 ) const; - /// Kernel type: computeInverseDiagonalOperatorValues + void toMatrix_P2ElementwiseMassIcosahedralShellMap_macro_3D( idx_t* RESTRICT _data_dstEdge, + idx_t* RESTRICT _data_dstVertex, + idx_t* RESTRICT _data_srcEdge, + idx_t* RESTRICT _data_srcVertex, + real_t forVertex_0, + real_t forVertex_1, + real_t forVertex_2, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + std::shared_ptr< SparseMatrixProxy > mat, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t rayVertex_2, + real_t refVertex_0, + real_t refVertex_1, + real_t refVertex_2, + real_t thrVertex_0, + real_t thrVertex_1, + real_t thrVertex_2 ) const; + + /// Integral: P2ElementwiseMassIcosahedralShellMap + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: computeInverseDiagonalOperatorValues + /// - loop strategy: SAWTOOTH /// - quadrature rule: Jaśkowiec-Sukumar 04 | points: 11, degree: 4 + /// - blending map: IcosahedralShellMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 2006 3515 69 33 11 0 0 1 - void computeInverseDiagonalOperatorValues_macro_3D( real_t* RESTRICT _data_invDiag_Edge, - real_t* RESTRICT _data_invDiag_Vertex, - real_t forVertex_0, - real_t forVertex_1, - real_t forVertex_2, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t rayVertex_2, - real_t refVertex_0, - real_t refVertex_1, - real_t refVertex_2, - real_t thrVertex_0, - real_t thrVertex_1, - real_t thrVertex_2 ) const; + void computeInverseDiagonalOperatorValues_P2ElementwiseMassIcosahedralShellMap_macro_3D( + real_t* RESTRICT _data_invDiag_Edge, + real_t* RESTRICT _data_invDiag_Vertex, + real_t forVertex_0, + real_t forVertex_1, + real_t forVertex_2, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t rayVertex_2, + real_t refVertex_0, + real_t refVertex_1, + real_t refVertex_2, + real_t thrVertex_0, + real_t thrVertex_1, + real_t thrVertex_2 ) const; std::shared_ptr< P2Function< real_t > > invDiag_; }; diff --git a/operators/mass/avx/P1ElementwiseMass_apply_macro_2D.cpp b/operators/mass/avx/P1ElementwiseMass_apply_P1ElementwiseMass_macro_2D.cpp similarity index 99% rename from operators/mass/avx/P1ElementwiseMass_apply_macro_2D.cpp rename to operators/mass/avx/P1ElementwiseMass_apply_P1ElementwiseMass_macro_2D.cpp index 9c0c9004..ec947a38 100644 --- a/operators/mass/avx/P1ElementwiseMass_apply_macro_2D.cpp +++ b/operators/mass/avx/P1ElementwiseMass_apply_P1ElementwiseMass_macro_2D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P1ElementwiseMass::apply_macro_2D( real_t * RESTRICT _data_dst, real_t * RESTRICT _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P1ElementwiseMass::apply_P1ElementwiseMass_macro_2D( real_t * RESTRICT _data_dst, real_t * RESTRICT _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.16666666666666666, 0.16666666666666666, 0.16666666666666666}; diff --git a/operators/mass/avx/P1ElementwiseMass_apply_macro_3D.cpp b/operators/mass/avx/P1ElementwiseMass_apply_P1ElementwiseMass_macro_3D.cpp similarity index 99% rename from operators/mass/avx/P1ElementwiseMass_apply_macro_3D.cpp rename to operators/mass/avx/P1ElementwiseMass_apply_P1ElementwiseMass_macro_3D.cpp index 509bbba7..0087945f 100644 --- a/operators/mass/avx/P1ElementwiseMass_apply_macro_3D.cpp +++ b/operators/mass/avx/P1ElementwiseMass_apply_P1ElementwiseMass_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P1ElementwiseMass::apply_macro_3D( real_t * RESTRICT _data_dst, real_t * RESTRICT _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P1ElementwiseMass::apply_P1ElementwiseMass_macro_3D( real_t * RESTRICT _data_dst, real_t * RESTRICT _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.041666666666666657, 0.041666666666666657, 0.041666666666666657, 0.041666666666666657}; diff --git a/operators/mass/avx/P1ElementwiseMass_computeInverseDiagonalOperatorValues_macro_2D.cpp b/operators/mass/avx/P1ElementwiseMass_computeInverseDiagonalOperatorValues_P1ElementwiseMass_macro_2D.cpp similarity index 98% rename from operators/mass/avx/P1ElementwiseMass_computeInverseDiagonalOperatorValues_macro_2D.cpp rename to operators/mass/avx/P1ElementwiseMass_computeInverseDiagonalOperatorValues_P1ElementwiseMass_macro_2D.cpp index d3c90d05..76afd8ad 100644 --- a/operators/mass/avx/P1ElementwiseMass_computeInverseDiagonalOperatorValues_macro_2D.cpp +++ b/operators/mass/avx/P1ElementwiseMass_computeInverseDiagonalOperatorValues_P1ElementwiseMass_macro_2D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P1ElementwiseMass::computeInverseDiagonalOperatorValues_macro_2D( real_t * RESTRICT _data_invDiag_, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P1ElementwiseMass::computeInverseDiagonalOperatorValues_P1ElementwiseMass_macro_2D( real_t * RESTRICT _data_invDiag_, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.16666666666666666, 0.16666666666666666, 0.16666666666666666}; diff --git a/operators/mass/avx/P1ElementwiseMass_computeInverseDiagonalOperatorValues_macro_3D.cpp b/operators/mass/avx/P1ElementwiseMass_computeInverseDiagonalOperatorValues_P1ElementwiseMass_macro_3D.cpp similarity index 99% rename from operators/mass/avx/P1ElementwiseMass_computeInverseDiagonalOperatorValues_macro_3D.cpp rename to operators/mass/avx/P1ElementwiseMass_computeInverseDiagonalOperatorValues_P1ElementwiseMass_macro_3D.cpp index 23b54bc3..c0c94466 100644 --- a/operators/mass/avx/P1ElementwiseMass_computeInverseDiagonalOperatorValues_macro_3D.cpp +++ b/operators/mass/avx/P1ElementwiseMass_computeInverseDiagonalOperatorValues_P1ElementwiseMass_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P1ElementwiseMass::computeInverseDiagonalOperatorValues_macro_3D( real_t * RESTRICT _data_invDiag_, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P1ElementwiseMass::computeInverseDiagonalOperatorValues_P1ElementwiseMass_macro_3D( real_t * RESTRICT _data_invDiag_, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.041666666666666657, 0.041666666666666657, 0.041666666666666657, 0.041666666666666657}; diff --git a/operators/mass/avx/P2ElementwiseMassAnnulusMap_apply_macro_2D.cpp b/operators/mass/avx/P2ElementwiseMassAnnulusMap_apply_P2ElementwiseMassAnnulusMap_macro_2D.cpp similarity index 99% rename from operators/mass/avx/P2ElementwiseMassAnnulusMap_apply_macro_2D.cpp rename to operators/mass/avx/P2ElementwiseMassAnnulusMap_apply_P2ElementwiseMassAnnulusMap_macro_2D.cpp index 1894c2e7..356847c2 100644 --- a/operators/mass/avx/P2ElementwiseMassAnnulusMap_apply_macro_2D.cpp +++ b/operators/mass/avx/P2ElementwiseMassAnnulusMap_apply_P2ElementwiseMassAnnulusMap_macro_2D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseMassAnnulusMap::apply_macro_2D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const +void P2ElementwiseMassAnnulusMap::apply_P2ElementwiseMassAnnulusMap_macro_2D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const { { const real_t _data_q_w [] = {0.11169079483900581, 0.054975871827660949, 0.11169079483900581, 0.054975871827660949, 0.11169079483900581, 0.054975871827660949}; diff --git a/operators/mass/avx/P2ElementwiseMassAnnulusMap_computeInverseDiagonalOperatorValues_macro_2D.cpp b/operators/mass/avx/P2ElementwiseMassAnnulusMap_computeInverseDiagonalOperatorValues_P2ElementwiseMassAnnulusMap_macro_2D.cpp similarity index 99% rename from operators/mass/avx/P2ElementwiseMassAnnulusMap_computeInverseDiagonalOperatorValues_macro_2D.cpp rename to operators/mass/avx/P2ElementwiseMassAnnulusMap_computeInverseDiagonalOperatorValues_P2ElementwiseMassAnnulusMap_macro_2D.cpp index 472f5568..ae52b270 100644 --- a/operators/mass/avx/P2ElementwiseMassAnnulusMap_computeInverseDiagonalOperatorValues_macro_2D.cpp +++ b/operators/mass/avx/P2ElementwiseMassAnnulusMap_computeInverseDiagonalOperatorValues_P2ElementwiseMassAnnulusMap_macro_2D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseMassAnnulusMap::computeInverseDiagonalOperatorValues_macro_2D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const +void P2ElementwiseMassAnnulusMap::computeInverseDiagonalOperatorValues_P2ElementwiseMassAnnulusMap_macro_2D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const { { const real_t _data_q_w [] = {0.11169079483900581, 0.054975871827660949, 0.11169079483900581, 0.054975871827660949, 0.11169079483900581, 0.054975871827660949}; diff --git a/operators/mass/avx/P2ElementwiseMassIcosahedralShellMap_apply_macro_3D.cpp b/operators/mass/avx/P2ElementwiseMassIcosahedralShellMap_apply_P2ElementwiseMassIcosahedralShellMap_macro_3D.cpp similarity index 99% rename from operators/mass/avx/P2ElementwiseMassIcosahedralShellMap_apply_macro_3D.cpp rename to operators/mass/avx/P2ElementwiseMassIcosahedralShellMap_apply_P2ElementwiseMassIcosahedralShellMap_macro_3D.cpp index 618488dd..a2dc6f49 100644 --- a/operators/mass/avx/P2ElementwiseMassIcosahedralShellMap_apply_macro_3D.cpp +++ b/operators/mass/avx/P2ElementwiseMassIcosahedralShellMap_apply_P2ElementwiseMassIcosahedralShellMap_macro_3D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseMassIcosahedralShellMap::apply_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const +void P2ElementwiseMassIcosahedralShellMap::apply_P2ElementwiseMassIcosahedralShellMap_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const { { const real_t _data_q_w [] = {0.02364442875776411, 0.0041220137385117907, 0.025486503738822306, 0.0091363578885896128, 0.0052827872295400091, 0.02526054964096567, 0.01949470769240411, 0.0094909452962763929, 0.019522077110586403, 0.018549261128954084, 0.0066770344442521412}; diff --git a/operators/mass/avx/P2ElementwiseMassIcosahedralShellMap_computeInverseDiagonalOperatorValues_macro_3D.cpp b/operators/mass/avx/P2ElementwiseMassIcosahedralShellMap_computeInverseDiagonalOperatorValues_P2ElementwiseMassIcosahedralShellMap_macro_3D.cpp similarity index 99% rename from operators/mass/avx/P2ElementwiseMassIcosahedralShellMap_computeInverseDiagonalOperatorValues_macro_3D.cpp rename to operators/mass/avx/P2ElementwiseMassIcosahedralShellMap_computeInverseDiagonalOperatorValues_P2ElementwiseMassIcosahedralShellMap_macro_3D.cpp index 543d0fc8..bd9c7f80 100644 --- a/operators/mass/avx/P2ElementwiseMassIcosahedralShellMap_computeInverseDiagonalOperatorValues_macro_3D.cpp +++ b/operators/mass/avx/P2ElementwiseMassIcosahedralShellMap_computeInverseDiagonalOperatorValues_P2ElementwiseMassIcosahedralShellMap_macro_3D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseMassIcosahedralShellMap::computeInverseDiagonalOperatorValues_macro_3D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const +void P2ElementwiseMassIcosahedralShellMap::computeInverseDiagonalOperatorValues_P2ElementwiseMassIcosahedralShellMap_macro_3D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const { { const real_t _data_q_w [] = {0.02364442875776411, 0.0041220137385117907, 0.025486503738822306, 0.0091363578885896128, 0.0052827872295400091, 0.02526054964096567, 0.01949470769240411, 0.0094909452962763929, 0.019522077110586403, 0.018549261128954084, 0.0066770344442521412}; diff --git a/operators/mass/avx/P2ElementwiseMass_apply_macro_2D.cpp b/operators/mass/avx/P2ElementwiseMass_apply_P2ElementwiseMass_macro_2D.cpp similarity index 99% rename from operators/mass/avx/P2ElementwiseMass_apply_macro_2D.cpp rename to operators/mass/avx/P2ElementwiseMass_apply_P2ElementwiseMass_macro_2D.cpp index 6ae1b62a..cf949b84 100644 --- a/operators/mass/avx/P2ElementwiseMass_apply_macro_2D.cpp +++ b/operators/mass/avx/P2ElementwiseMass_apply_P2ElementwiseMass_macro_2D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseMass::apply_macro_2D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseMass::apply_P2ElementwiseMass_macro_2D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.11169079483900581, 0.054975871827660949, 0.11169079483900581, 0.054975871827660949, 0.11169079483900581, 0.054975871827660949}; diff --git a/operators/mass/avx/P2ElementwiseMass_apply_macro_3D.cpp b/operators/mass/avx/P2ElementwiseMass_apply_P2ElementwiseMass_macro_3D.cpp similarity index 99% rename from operators/mass/avx/P2ElementwiseMass_apply_macro_3D.cpp rename to operators/mass/avx/P2ElementwiseMass_apply_P2ElementwiseMass_macro_3D.cpp index 12520616..45369cfd 100644 --- a/operators/mass/avx/P2ElementwiseMass_apply_macro_3D.cpp +++ b/operators/mass/avx/P2ElementwiseMass_apply_P2ElementwiseMass_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseMass::apply_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseMass::apply_P2ElementwiseMass_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.02364442875776411, 0.0041220137385117907, 0.025486503738822306, 0.0091363578885896128, 0.0052827872295400091, 0.02526054964096567, 0.01949470769240411, 0.0094909452962763929, 0.019522077110586403, 0.018549261128954084, 0.0066770344442521412}; diff --git a/operators/mass/avx/P2ElementwiseMass_computeInverseDiagonalOperatorValues_macro_2D.cpp b/operators/mass/avx/P2ElementwiseMass_computeInverseDiagonalOperatorValues_P2ElementwiseMass_macro_2D.cpp similarity index 99% rename from operators/mass/avx/P2ElementwiseMass_computeInverseDiagonalOperatorValues_macro_2D.cpp rename to operators/mass/avx/P2ElementwiseMass_computeInverseDiagonalOperatorValues_P2ElementwiseMass_macro_2D.cpp index e99cef7b..63255480 100644 --- a/operators/mass/avx/P2ElementwiseMass_computeInverseDiagonalOperatorValues_macro_2D.cpp +++ b/operators/mass/avx/P2ElementwiseMass_computeInverseDiagonalOperatorValues_P2ElementwiseMass_macro_2D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseMass::computeInverseDiagonalOperatorValues_macro_2D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseMass::computeInverseDiagonalOperatorValues_P2ElementwiseMass_macro_2D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.11169079483900581, 0.054975871827660949, 0.11169079483900581, 0.054975871827660949, 0.11169079483900581, 0.054975871827660949}; diff --git a/operators/mass/avx/P2ElementwiseMass_computeInverseDiagonalOperatorValues_macro_3D.cpp b/operators/mass/avx/P2ElementwiseMass_computeInverseDiagonalOperatorValues_P2ElementwiseMass_macro_3D.cpp similarity index 99% rename from operators/mass/avx/P2ElementwiseMass_computeInverseDiagonalOperatorValues_macro_3D.cpp rename to operators/mass/avx/P2ElementwiseMass_computeInverseDiagonalOperatorValues_P2ElementwiseMass_macro_3D.cpp index f27a0a3d..4cfb1301 100644 --- a/operators/mass/avx/P2ElementwiseMass_computeInverseDiagonalOperatorValues_macro_3D.cpp +++ b/operators/mass/avx/P2ElementwiseMass_computeInverseDiagonalOperatorValues_P2ElementwiseMass_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseMass::computeInverseDiagonalOperatorValues_macro_3D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseMass::computeInverseDiagonalOperatorValues_P2ElementwiseMass_macro_3D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.02364442875776411, 0.0041220137385117907, 0.025486503738822306, 0.0091363578885896128, 0.0052827872295400091, 0.02526054964096567, 0.01949470769240411, 0.0094909452962763929, 0.019522077110586403, 0.018549261128954084, 0.0066770344442521412}; diff --git a/operators/mass/noarch/P1ElementwiseMass_apply_macro_2D.cpp b/operators/mass/noarch/P1ElementwiseMass_apply_P1ElementwiseMass_macro_2D.cpp similarity index 97% rename from operators/mass/noarch/P1ElementwiseMass_apply_macro_2D.cpp rename to operators/mass/noarch/P1ElementwiseMass_apply_P1ElementwiseMass_macro_2D.cpp index 50d93e68..285c5a3e 100644 --- a/operators/mass/noarch/P1ElementwiseMass_apply_macro_2D.cpp +++ b/operators/mass/noarch/P1ElementwiseMass_apply_P1ElementwiseMass_macro_2D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P1ElementwiseMass::apply_macro_2D( real_t * RESTRICT _data_dst, real_t * RESTRICT _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P1ElementwiseMass::apply_P1ElementwiseMass_macro_2D( real_t * RESTRICT _data_dst, real_t * RESTRICT _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.16666666666666666, 0.16666666666666666, 0.16666666666666666}; diff --git a/operators/mass/noarch/P1ElementwiseMass_apply_macro_3D.cpp b/operators/mass/noarch/P1ElementwiseMass_apply_P1ElementwiseMass_macro_3D.cpp similarity index 99% rename from operators/mass/noarch/P1ElementwiseMass_apply_macro_3D.cpp rename to operators/mass/noarch/P1ElementwiseMass_apply_P1ElementwiseMass_macro_3D.cpp index 6be52e21..aecb8632 100644 --- a/operators/mass/noarch/P1ElementwiseMass_apply_macro_3D.cpp +++ b/operators/mass/noarch/P1ElementwiseMass_apply_P1ElementwiseMass_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P1ElementwiseMass::apply_macro_3D( real_t * RESTRICT _data_dst, real_t * RESTRICT _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P1ElementwiseMass::apply_P1ElementwiseMass_macro_3D( real_t * RESTRICT _data_dst, real_t * RESTRICT _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.041666666666666657, 0.041666666666666657, 0.041666666666666657, 0.041666666666666657}; diff --git a/operators/mass/noarch/P1ElementwiseMass_computeInverseDiagonalOperatorValues_macro_2D.cpp b/operators/mass/noarch/P1ElementwiseMass_computeInverseDiagonalOperatorValues_P1ElementwiseMass_macro_2D.cpp similarity index 97% rename from operators/mass/noarch/P1ElementwiseMass_computeInverseDiagonalOperatorValues_macro_2D.cpp rename to operators/mass/noarch/P1ElementwiseMass_computeInverseDiagonalOperatorValues_P1ElementwiseMass_macro_2D.cpp index 39fda781..739e5dcc 100644 --- a/operators/mass/noarch/P1ElementwiseMass_computeInverseDiagonalOperatorValues_macro_2D.cpp +++ b/operators/mass/noarch/P1ElementwiseMass_computeInverseDiagonalOperatorValues_P1ElementwiseMass_macro_2D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P1ElementwiseMass::computeInverseDiagonalOperatorValues_macro_2D( real_t * RESTRICT _data_invDiag_, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P1ElementwiseMass::computeInverseDiagonalOperatorValues_P1ElementwiseMass_macro_2D( real_t * RESTRICT _data_invDiag_, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.16666666666666666, 0.16666666666666666, 0.16666666666666666}; diff --git a/operators/mass/noarch/P1ElementwiseMass_computeInverseDiagonalOperatorValues_macro_3D.cpp b/operators/mass/noarch/P1ElementwiseMass_computeInverseDiagonalOperatorValues_P1ElementwiseMass_macro_3D.cpp similarity index 99% rename from operators/mass/noarch/P1ElementwiseMass_computeInverseDiagonalOperatorValues_macro_3D.cpp rename to operators/mass/noarch/P1ElementwiseMass_computeInverseDiagonalOperatorValues_P1ElementwiseMass_macro_3D.cpp index 48fe2941..7a311bcd 100644 --- a/operators/mass/noarch/P1ElementwiseMass_computeInverseDiagonalOperatorValues_macro_3D.cpp +++ b/operators/mass/noarch/P1ElementwiseMass_computeInverseDiagonalOperatorValues_P1ElementwiseMass_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P1ElementwiseMass::computeInverseDiagonalOperatorValues_macro_3D( real_t * RESTRICT _data_invDiag_, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P1ElementwiseMass::computeInverseDiagonalOperatorValues_P1ElementwiseMass_macro_3D( real_t * RESTRICT _data_invDiag_, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.041666666666666657, 0.041666666666666657, 0.041666666666666657, 0.041666666666666657}; diff --git a/operators/mass/noarch/P1ElementwiseMass_toMatrix_macro_2D.cpp b/operators/mass/noarch/P1ElementwiseMass_toMatrix_P1ElementwiseMass_macro_2D.cpp similarity index 97% rename from operators/mass/noarch/P1ElementwiseMass_toMatrix_macro_2D.cpp rename to operators/mass/noarch/P1ElementwiseMass_toMatrix_P1ElementwiseMass_macro_2D.cpp index 67fe128f..2997ba3a 100644 --- a/operators/mass/noarch/P1ElementwiseMass_toMatrix_macro_2D.cpp +++ b/operators/mass/noarch/P1ElementwiseMass_toMatrix_P1ElementwiseMass_macro_2D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P1ElementwiseMass::toMatrix_macro_2D( idx_t * RESTRICT _data_dst, idx_t * RESTRICT _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P1ElementwiseMass::toMatrix_P1ElementwiseMass_macro_2D( idx_t * RESTRICT _data_dst, idx_t * RESTRICT _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.16666666666666666, 0.16666666666666666, 0.16666666666666666}; diff --git a/operators/mass/noarch/P1ElementwiseMass_toMatrix_macro_3D.cpp b/operators/mass/noarch/P1ElementwiseMass_toMatrix_P1ElementwiseMass_macro_3D.cpp similarity index 99% rename from operators/mass/noarch/P1ElementwiseMass_toMatrix_macro_3D.cpp rename to operators/mass/noarch/P1ElementwiseMass_toMatrix_P1ElementwiseMass_macro_3D.cpp index aab78340..c264411c 100644 --- a/operators/mass/noarch/P1ElementwiseMass_toMatrix_macro_3D.cpp +++ b/operators/mass/noarch/P1ElementwiseMass_toMatrix_P1ElementwiseMass_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P1ElementwiseMass::toMatrix_macro_3D( idx_t * RESTRICT _data_dst, idx_t * RESTRICT _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P1ElementwiseMass::toMatrix_P1ElementwiseMass_macro_3D( idx_t * RESTRICT _data_dst, idx_t * RESTRICT _data_src, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.041666666666666657, 0.041666666666666657, 0.041666666666666657, 0.041666666666666657}; diff --git a/operators/mass/noarch/P2ElementwiseMassAnnulusMap_apply_macro_2D.cpp b/operators/mass/noarch/P2ElementwiseMassAnnulusMap_apply_P2ElementwiseMassAnnulusMap_macro_2D.cpp similarity index 98% rename from operators/mass/noarch/P2ElementwiseMassAnnulusMap_apply_macro_2D.cpp rename to operators/mass/noarch/P2ElementwiseMassAnnulusMap_apply_P2ElementwiseMassAnnulusMap_macro_2D.cpp index f9590fda..81da3f28 100644 --- a/operators/mass/noarch/P2ElementwiseMassAnnulusMap_apply_macro_2D.cpp +++ b/operators/mass/noarch/P2ElementwiseMassAnnulusMap_apply_P2ElementwiseMassAnnulusMap_macro_2D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseMassAnnulusMap::apply_macro_2D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const +void P2ElementwiseMassAnnulusMap::apply_P2ElementwiseMassAnnulusMap_macro_2D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const { { const real_t _data_q_w [] = {0.11169079483900581, 0.054975871827660949, 0.11169079483900581, 0.054975871827660949, 0.11169079483900581, 0.054975871827660949}; diff --git a/operators/mass/noarch/P2ElementwiseMassAnnulusMap_computeInverseDiagonalOperatorValues_macro_2D.cpp b/operators/mass/noarch/P2ElementwiseMassAnnulusMap_computeInverseDiagonalOperatorValues_P2ElementwiseMassAnnulusMap_macro_2D.cpp similarity index 98% rename from operators/mass/noarch/P2ElementwiseMassAnnulusMap_computeInverseDiagonalOperatorValues_macro_2D.cpp rename to operators/mass/noarch/P2ElementwiseMassAnnulusMap_computeInverseDiagonalOperatorValues_P2ElementwiseMassAnnulusMap_macro_2D.cpp index 66950b7c..e48f504b 100644 --- a/operators/mass/noarch/P2ElementwiseMassAnnulusMap_computeInverseDiagonalOperatorValues_macro_2D.cpp +++ b/operators/mass/noarch/P2ElementwiseMassAnnulusMap_computeInverseDiagonalOperatorValues_P2ElementwiseMassAnnulusMap_macro_2D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseMassAnnulusMap::computeInverseDiagonalOperatorValues_macro_2D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const +void P2ElementwiseMassAnnulusMap::computeInverseDiagonalOperatorValues_P2ElementwiseMassAnnulusMap_macro_2D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const { { const real_t _data_q_w [] = {0.11169079483900581, 0.054975871827660949, 0.11169079483900581, 0.054975871827660949, 0.11169079483900581, 0.054975871827660949}; diff --git a/operators/mass/noarch/P2ElementwiseMassAnnulusMap_toMatrix_macro_2D.cpp b/operators/mass/noarch/P2ElementwiseMassAnnulusMap_toMatrix_P2ElementwiseMassAnnulusMap_macro_2D.cpp similarity index 98% rename from operators/mass/noarch/P2ElementwiseMassAnnulusMap_toMatrix_macro_2D.cpp rename to operators/mass/noarch/P2ElementwiseMassAnnulusMap_toMatrix_P2ElementwiseMassAnnulusMap_macro_2D.cpp index c6b091cc..a44577fc 100644 --- a/operators/mass/noarch/P2ElementwiseMassAnnulusMap_toMatrix_macro_2D.cpp +++ b/operators/mass/noarch/P2ElementwiseMassAnnulusMap_toMatrix_P2ElementwiseMassAnnulusMap_macro_2D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseMassAnnulusMap::toMatrix_macro_2D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const +void P2ElementwiseMassAnnulusMap::toMatrix_P2ElementwiseMassAnnulusMap_macro_2D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const { { const real_t _data_q_w [] = {0.11169079483900581, 0.054975871827660949, 0.11169079483900581, 0.054975871827660949, 0.11169079483900581, 0.054975871827660949}; diff --git a/operators/mass/noarch/P2ElementwiseMassIcosahedralShellMap_apply_macro_3D.cpp b/operators/mass/noarch/P2ElementwiseMassIcosahedralShellMap_apply_P2ElementwiseMassIcosahedralShellMap_macro_3D.cpp similarity index 99% rename from operators/mass/noarch/P2ElementwiseMassIcosahedralShellMap_apply_macro_3D.cpp rename to operators/mass/noarch/P2ElementwiseMassIcosahedralShellMap_apply_P2ElementwiseMassIcosahedralShellMap_macro_3D.cpp index 27a6f56a..9d68eacf 100644 --- a/operators/mass/noarch/P2ElementwiseMassIcosahedralShellMap_apply_macro_3D.cpp +++ b/operators/mass/noarch/P2ElementwiseMassIcosahedralShellMap_apply_P2ElementwiseMassIcosahedralShellMap_macro_3D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseMassIcosahedralShellMap::apply_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const +void P2ElementwiseMassIcosahedralShellMap::apply_P2ElementwiseMassIcosahedralShellMap_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const { { const real_t _data_q_w [] = {0.02364442875776411, 0.0041220137385117907, 0.025486503738822306, 0.0091363578885896128, 0.0052827872295400091, 0.02526054964096567, 0.01949470769240411, 0.0094909452962763929, 0.019522077110586403, 0.018549261128954084, 0.0066770344442521412}; diff --git a/operators/mass/noarch/P2ElementwiseMassIcosahedralShellMap_computeInverseDiagonalOperatorValues_macro_3D.cpp b/operators/mass/noarch/P2ElementwiseMassIcosahedralShellMap_computeInverseDiagonalOperatorValues_P2ElementwiseMassIcosahedralShellMap_macro_3D.cpp similarity index 99% rename from operators/mass/noarch/P2ElementwiseMassIcosahedralShellMap_computeInverseDiagonalOperatorValues_macro_3D.cpp rename to operators/mass/noarch/P2ElementwiseMassIcosahedralShellMap_computeInverseDiagonalOperatorValues_P2ElementwiseMassIcosahedralShellMap_macro_3D.cpp index 1a772c20..c72291d3 100644 --- a/operators/mass/noarch/P2ElementwiseMassIcosahedralShellMap_computeInverseDiagonalOperatorValues_macro_3D.cpp +++ b/operators/mass/noarch/P2ElementwiseMassIcosahedralShellMap_computeInverseDiagonalOperatorValues_P2ElementwiseMassIcosahedralShellMap_macro_3D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseMassIcosahedralShellMap::computeInverseDiagonalOperatorValues_macro_3D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const +void P2ElementwiseMassIcosahedralShellMap::computeInverseDiagonalOperatorValues_P2ElementwiseMassIcosahedralShellMap_macro_3D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const { { const real_t _data_q_w [] = {0.02364442875776411, 0.0041220137385117907, 0.025486503738822306, 0.0091363578885896128, 0.0052827872295400091, 0.02526054964096567, 0.01949470769240411, 0.0094909452962763929, 0.019522077110586403, 0.018549261128954084, 0.0066770344442521412}; diff --git a/operators/mass/noarch/P2ElementwiseMassIcosahedralShellMap_toMatrix_macro_3D.cpp b/operators/mass/noarch/P2ElementwiseMassIcosahedralShellMap_toMatrix_P2ElementwiseMassIcosahedralShellMap_macro_3D.cpp similarity index 99% rename from operators/mass/noarch/P2ElementwiseMassIcosahedralShellMap_toMatrix_macro_3D.cpp rename to operators/mass/noarch/P2ElementwiseMassIcosahedralShellMap_toMatrix_P2ElementwiseMassIcosahedralShellMap_macro_3D.cpp index 22d4b441..c8476258 100644 --- a/operators/mass/noarch/P2ElementwiseMassIcosahedralShellMap_toMatrix_macro_3D.cpp +++ b/operators/mass/noarch/P2ElementwiseMassIcosahedralShellMap_toMatrix_P2ElementwiseMassIcosahedralShellMap_macro_3D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseMassIcosahedralShellMap::toMatrix_macro_3D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const +void P2ElementwiseMassIcosahedralShellMap::toMatrix_P2ElementwiseMassIcosahedralShellMap_macro_3D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const { { const real_t _data_q_w [] = {0.02364442875776411, 0.0041220137385117907, 0.025486503738822306, 0.0091363578885896128, 0.0052827872295400091, 0.02526054964096567, 0.01949470769240411, 0.0094909452962763929, 0.019522077110586403, 0.018549261128954084, 0.0066770344442521412}; diff --git a/operators/mass/noarch/P2ElementwiseMass_apply_macro_2D.cpp b/operators/mass/noarch/P2ElementwiseMass_apply_P2ElementwiseMass_macro_2D.cpp similarity index 98% rename from operators/mass/noarch/P2ElementwiseMass_apply_macro_2D.cpp rename to operators/mass/noarch/P2ElementwiseMass_apply_P2ElementwiseMass_macro_2D.cpp index 2e75a97d..ec2ae4c0 100644 --- a/operators/mass/noarch/P2ElementwiseMass_apply_macro_2D.cpp +++ b/operators/mass/noarch/P2ElementwiseMass_apply_P2ElementwiseMass_macro_2D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseMass::apply_macro_2D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseMass::apply_P2ElementwiseMass_macro_2D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.11169079483900581, 0.054975871827660949, 0.11169079483900581, 0.054975871827660949, 0.11169079483900581, 0.054975871827660949}; diff --git a/operators/mass/noarch/P2ElementwiseMass_apply_macro_3D.cpp b/operators/mass/noarch/P2ElementwiseMass_apply_P2ElementwiseMass_macro_3D.cpp similarity index 99% rename from operators/mass/noarch/P2ElementwiseMass_apply_macro_3D.cpp rename to operators/mass/noarch/P2ElementwiseMass_apply_P2ElementwiseMass_macro_3D.cpp index 4ec9c161..6cf210a4 100644 --- a/operators/mass/noarch/P2ElementwiseMass_apply_macro_3D.cpp +++ b/operators/mass/noarch/P2ElementwiseMass_apply_P2ElementwiseMass_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseMass::apply_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseMass::apply_P2ElementwiseMass_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.02364442875776411, 0.0041220137385117907, 0.025486503738822306, 0.0091363578885896128, 0.0052827872295400091, 0.02526054964096567, 0.01949470769240411, 0.0094909452962763929, 0.019522077110586403, 0.018549261128954084, 0.0066770344442521412}; diff --git a/operators/mass/noarch/P2ElementwiseMass_computeInverseDiagonalOperatorValues_macro_2D.cpp b/operators/mass/noarch/P2ElementwiseMass_computeInverseDiagonalOperatorValues_P2ElementwiseMass_macro_2D.cpp similarity index 97% rename from operators/mass/noarch/P2ElementwiseMass_computeInverseDiagonalOperatorValues_macro_2D.cpp rename to operators/mass/noarch/P2ElementwiseMass_computeInverseDiagonalOperatorValues_P2ElementwiseMass_macro_2D.cpp index 83462877..cf9a8dd1 100644 --- a/operators/mass/noarch/P2ElementwiseMass_computeInverseDiagonalOperatorValues_macro_2D.cpp +++ b/operators/mass/noarch/P2ElementwiseMass_computeInverseDiagonalOperatorValues_P2ElementwiseMass_macro_2D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseMass::computeInverseDiagonalOperatorValues_macro_2D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseMass::computeInverseDiagonalOperatorValues_P2ElementwiseMass_macro_2D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.11169079483900581, 0.054975871827660949, 0.11169079483900581, 0.054975871827660949, 0.11169079483900581, 0.054975871827660949}; diff --git a/operators/mass/noarch/P2ElementwiseMass_computeInverseDiagonalOperatorValues_macro_3D.cpp b/operators/mass/noarch/P2ElementwiseMass_computeInverseDiagonalOperatorValues_P2ElementwiseMass_macro_3D.cpp similarity index 99% rename from operators/mass/noarch/P2ElementwiseMass_computeInverseDiagonalOperatorValues_macro_3D.cpp rename to operators/mass/noarch/P2ElementwiseMass_computeInverseDiagonalOperatorValues_P2ElementwiseMass_macro_3D.cpp index 2c456b57..49ad0528 100644 --- a/operators/mass/noarch/P2ElementwiseMass_computeInverseDiagonalOperatorValues_macro_3D.cpp +++ b/operators/mass/noarch/P2ElementwiseMass_computeInverseDiagonalOperatorValues_P2ElementwiseMass_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseMass::computeInverseDiagonalOperatorValues_macro_3D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseMass::computeInverseDiagonalOperatorValues_P2ElementwiseMass_macro_3D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.02364442875776411, 0.0041220137385117907, 0.025486503738822306, 0.0091363578885896128, 0.0052827872295400091, 0.02526054964096567, 0.01949470769240411, 0.0094909452962763929, 0.019522077110586403, 0.018549261128954084, 0.0066770344442521412}; diff --git a/operators/mass/noarch/P2ElementwiseMass_toMatrix_macro_2D.cpp b/operators/mass/noarch/P2ElementwiseMass_toMatrix_P2ElementwiseMass_macro_2D.cpp similarity index 98% rename from operators/mass/noarch/P2ElementwiseMass_toMatrix_macro_2D.cpp rename to operators/mass/noarch/P2ElementwiseMass_toMatrix_P2ElementwiseMass_macro_2D.cpp index 134890c5..e0e35dbc 100644 --- a/operators/mass/noarch/P2ElementwiseMass_toMatrix_macro_2D.cpp +++ b/operators/mass/noarch/P2ElementwiseMass_toMatrix_P2ElementwiseMass_macro_2D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseMass::toMatrix_macro_2D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseMass::toMatrix_P2ElementwiseMass_macro_2D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.11169079483900581, 0.054975871827660949, 0.11169079483900581, 0.054975871827660949, 0.11169079483900581, 0.054975871827660949}; diff --git a/operators/mass/noarch/P2ElementwiseMass_toMatrix_macro_3D.cpp b/operators/mass/noarch/P2ElementwiseMass_toMatrix_P2ElementwiseMass_macro_3D.cpp similarity index 99% rename from operators/mass/noarch/P2ElementwiseMass_toMatrix_macro_3D.cpp rename to operators/mass/noarch/P2ElementwiseMass_toMatrix_P2ElementwiseMass_macro_3D.cpp index 4bdda5f7..bf026d00 100644 --- a/operators/mass/noarch/P2ElementwiseMass_toMatrix_macro_3D.cpp +++ b/operators/mass/noarch/P2ElementwiseMass_toMatrix_P2ElementwiseMass_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseMass::toMatrix_macro_3D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseMass::toMatrix_P2ElementwiseMass_macro_3D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {0.02364442875776411, 0.0041220137385117907, 0.025486503738822306, 0.0091363578885896128, 0.0052827872295400091, 0.02526054964096567, 0.01949470769240411, 0.0094909452962763929, 0.019522077110586403, 0.018549261128954084, 0.0066770344442521412}; diff --git a/operators/shear_heating/CMakeLists.txt b/operators/shear_heating/CMakeLists.txt index 8c5a4622..c60a8fee 100644 --- a/operators/shear_heating/CMakeLists.txt +++ b/operators/shear_heating/CMakeLists.txt @@ -11,30 +11,30 @@ add_library( opgen-shear_heating if(HYTEG_BUILD_WITH_AVX AND WALBERLA_DOUBLE_ACCURACY) target_sources(opgen-shear_heating PRIVATE - avx/P2ElementwiseShearHeatingAnnulusMap_apply_macro_2D.cpp - avx/P2ElementwiseShearHeatingAnnulusMap_computeInverseDiagonalOperatorValues_macro_2D.cpp - avx/P2ElementwiseShearHeatingIcosahedralShellMap_apply_macro_3D.cpp - avx/P2ElementwiseShearHeatingIcosahedralShellMap_computeInverseDiagonalOperatorValues_macro_3D.cpp - avx/P2ElementwiseShearHeating_apply_macro_2D.cpp - avx/P2ElementwiseShearHeating_apply_macro_3D.cpp - avx/P2ElementwiseShearHeating_computeInverseDiagonalOperatorValues_macro_2D.cpp - avx/P2ElementwiseShearHeating_computeInverseDiagonalOperatorValues_macro_3D.cpp - noarch/P2ElementwiseShearHeatingAnnulusMap_toMatrix_macro_2D.cpp - noarch/P2ElementwiseShearHeatingIcosahedralShellMap_toMatrix_macro_3D.cpp - noarch/P2ElementwiseShearHeating_toMatrix_macro_2D.cpp - noarch/P2ElementwiseShearHeating_toMatrix_macro_3D.cpp + avx/P2ElementwiseShearHeatingAnnulusMap_apply_P2ElementwiseShearHeatingAnnulusMap_macro_2D.cpp + avx/P2ElementwiseShearHeatingAnnulusMap_computeInverseDiagonalOperatorValues_P2ElementwiseShearHeatingAnnulusMap_macro_2D.cpp + avx/P2ElementwiseShearHeatingIcosahedralShellMap_apply_P2ElementwiseShearHeatingIcosahedralShellMap_macro_3D.cpp + avx/P2ElementwiseShearHeatingIcosahedralShellMap_computeInverseDiagonalOperatorValues_P2ElementwiseShearHeatingIcosahedralShellMap_macro_3D.cpp + avx/P2ElementwiseShearHeating_apply_P2ElementwiseShearHeating_macro_2D.cpp + avx/P2ElementwiseShearHeating_apply_P2ElementwiseShearHeating_macro_3D.cpp + avx/P2ElementwiseShearHeating_computeInverseDiagonalOperatorValues_P2ElementwiseShearHeating_macro_2D.cpp + avx/P2ElementwiseShearHeating_computeInverseDiagonalOperatorValues_P2ElementwiseShearHeating_macro_3D.cpp + noarch/P2ElementwiseShearHeatingAnnulusMap_toMatrix_P2ElementwiseShearHeatingAnnulusMap_macro_2D.cpp + noarch/P2ElementwiseShearHeatingIcosahedralShellMap_toMatrix_P2ElementwiseShearHeatingIcosahedralShellMap_macro_3D.cpp + noarch/P2ElementwiseShearHeating_toMatrix_P2ElementwiseShearHeating_macro_2D.cpp + noarch/P2ElementwiseShearHeating_toMatrix_P2ElementwiseShearHeating_macro_3D.cpp ) set_source_files_properties( - avx/P2ElementwiseShearHeatingAnnulusMap_apply_macro_2D.cpp - avx/P2ElementwiseShearHeatingAnnulusMap_computeInverseDiagonalOperatorValues_macro_2D.cpp - avx/P2ElementwiseShearHeatingIcosahedralShellMap_apply_macro_3D.cpp - avx/P2ElementwiseShearHeatingIcosahedralShellMap_computeInverseDiagonalOperatorValues_macro_3D.cpp - avx/P2ElementwiseShearHeating_apply_macro_2D.cpp - avx/P2ElementwiseShearHeating_apply_macro_3D.cpp - avx/P2ElementwiseShearHeating_computeInverseDiagonalOperatorValues_macro_2D.cpp - avx/P2ElementwiseShearHeating_computeInverseDiagonalOperatorValues_macro_3D.cpp + avx/P2ElementwiseShearHeatingAnnulusMap_apply_P2ElementwiseShearHeatingAnnulusMap_macro_2D.cpp + avx/P2ElementwiseShearHeatingAnnulusMap_computeInverseDiagonalOperatorValues_P2ElementwiseShearHeatingAnnulusMap_macro_2D.cpp + avx/P2ElementwiseShearHeatingIcosahedralShellMap_apply_P2ElementwiseShearHeatingIcosahedralShellMap_macro_3D.cpp + avx/P2ElementwiseShearHeatingIcosahedralShellMap_computeInverseDiagonalOperatorValues_P2ElementwiseShearHeatingIcosahedralShellMap_macro_3D.cpp + avx/P2ElementwiseShearHeating_apply_P2ElementwiseShearHeating_macro_2D.cpp + avx/P2ElementwiseShearHeating_apply_P2ElementwiseShearHeating_macro_3D.cpp + avx/P2ElementwiseShearHeating_computeInverseDiagonalOperatorValues_P2ElementwiseShearHeating_macro_2D.cpp + avx/P2ElementwiseShearHeating_computeInverseDiagonalOperatorValues_P2ElementwiseShearHeating_macro_3D.cpp PROPERTIES COMPILE_OPTIONS ${HYTEG_COMPILER_NATIVE_FLAGS} ) @@ -45,18 +45,18 @@ else() target_sources(opgen-shear_heating PRIVATE - noarch/P2ElementwiseShearHeatingAnnulusMap_apply_macro_2D.cpp - noarch/P2ElementwiseShearHeatingAnnulusMap_computeInverseDiagonalOperatorValues_macro_2D.cpp - noarch/P2ElementwiseShearHeatingAnnulusMap_toMatrix_macro_2D.cpp - noarch/P2ElementwiseShearHeatingIcosahedralShellMap_apply_macro_3D.cpp - noarch/P2ElementwiseShearHeatingIcosahedralShellMap_computeInverseDiagonalOperatorValues_macro_3D.cpp - noarch/P2ElementwiseShearHeatingIcosahedralShellMap_toMatrix_macro_3D.cpp - noarch/P2ElementwiseShearHeating_apply_macro_2D.cpp - noarch/P2ElementwiseShearHeating_apply_macro_3D.cpp - noarch/P2ElementwiseShearHeating_computeInverseDiagonalOperatorValues_macro_2D.cpp - noarch/P2ElementwiseShearHeating_computeInverseDiagonalOperatorValues_macro_3D.cpp - noarch/P2ElementwiseShearHeating_toMatrix_macro_2D.cpp - noarch/P2ElementwiseShearHeating_toMatrix_macro_3D.cpp + noarch/P2ElementwiseShearHeatingAnnulusMap_apply_P2ElementwiseShearHeatingAnnulusMap_macro_2D.cpp + noarch/P2ElementwiseShearHeatingAnnulusMap_computeInverseDiagonalOperatorValues_P2ElementwiseShearHeatingAnnulusMap_macro_2D.cpp + noarch/P2ElementwiseShearHeatingAnnulusMap_toMatrix_P2ElementwiseShearHeatingAnnulusMap_macro_2D.cpp + noarch/P2ElementwiseShearHeatingIcosahedralShellMap_apply_P2ElementwiseShearHeatingIcosahedralShellMap_macro_3D.cpp + noarch/P2ElementwiseShearHeatingIcosahedralShellMap_computeInverseDiagonalOperatorValues_P2ElementwiseShearHeatingIcosahedralShellMap_macro_3D.cpp + noarch/P2ElementwiseShearHeatingIcosahedralShellMap_toMatrix_P2ElementwiseShearHeatingIcosahedralShellMap_macro_3D.cpp + noarch/P2ElementwiseShearHeating_apply_P2ElementwiseShearHeating_macro_2D.cpp + noarch/P2ElementwiseShearHeating_apply_P2ElementwiseShearHeating_macro_3D.cpp + noarch/P2ElementwiseShearHeating_computeInverseDiagonalOperatorValues_P2ElementwiseShearHeating_macro_2D.cpp + noarch/P2ElementwiseShearHeating_computeInverseDiagonalOperatorValues_P2ElementwiseShearHeating_macro_3D.cpp + noarch/P2ElementwiseShearHeating_toMatrix_P2ElementwiseShearHeating_macro_2D.cpp + noarch/P2ElementwiseShearHeating_toMatrix_P2ElementwiseShearHeating_macro_3D.cpp ) endif() diff --git a/operators/shear_heating/P2ElementwiseShearHeating.cpp b/operators/shear_heating/P2ElementwiseShearHeating.cpp index c862b806..bb205872 100644 --- a/operators/shear_heating/P2ElementwiseShearHeating.cpp +++ b/operators/shear_heating/P2ElementwiseShearHeating.cpp @@ -161,7 +161,7 @@ void P2ElementwiseShearHeating::apply( const P2Function< real_t >& src, this->timingTree_->start( "kernel" ); - apply_macro_3D( + apply_P2ElementwiseShearHeating_macro_3D( _data_dstEdge, _data_dstVertex, @@ -189,6 +189,7 @@ void P2ElementwiseShearHeating::apply( const P2Function< real_t >& src, macro_vertex_coord_id_3comp2, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } @@ -262,7 +263,7 @@ void P2ElementwiseShearHeating::apply( const P2Function< real_t >& src, this->timingTree_->start( "kernel" ); - apply_macro_2D( + apply_P2ElementwiseShearHeating_macro_2D( _data_dstEdge, _data_dstVertex, @@ -282,6 +283,7 @@ void P2ElementwiseShearHeating::apply( const P2Function< real_t >& src, macro_vertex_coord_id_2comp1, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } @@ -367,7 +369,7 @@ void P2ElementwiseShearHeating::toMatrix( const std::shared_ptr< SparseMatrixPro this->timingTree_->start( "kernel" ); - toMatrix_macro_3D( + toMatrix_P2ElementwiseShearHeating_macro_3D( _data_dstEdge, _data_dstVertex, @@ -396,6 +398,7 @@ void P2ElementwiseShearHeating::toMatrix( const std::shared_ptr< SparseMatrixPro mat, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } } @@ -437,7 +440,7 @@ void P2ElementwiseShearHeating::toMatrix( const std::shared_ptr< SparseMatrixPro this->timingTree_->start( "kernel" ); - toMatrix_macro_2D( + toMatrix_P2ElementwiseShearHeating_macro_2D( _data_dstEdge, _data_dstVertex, @@ -458,6 +461,7 @@ void P2ElementwiseShearHeating::toMatrix( const std::shared_ptr< SparseMatrixPro mat, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } } @@ -527,7 +531,7 @@ void P2ElementwiseShearHeating::computeInverseDiagonalOperatorValues() this->timingTree_->start( "kernel" ); - computeInverseDiagonalOperatorValues_macro_3D( + computeInverseDiagonalOperatorValues_P2ElementwiseShearHeating_macro_3D( _data_invDiag_Edge, _data_invDiag_Vertex, @@ -553,6 +557,7 @@ void P2ElementwiseShearHeating::computeInverseDiagonalOperatorValues() macro_vertex_coord_id_3comp2, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } @@ -606,7 +611,7 @@ void P2ElementwiseShearHeating::computeInverseDiagonalOperatorValues() this->timingTree_->start( "kernel" ); - computeInverseDiagonalOperatorValues_macro_2D( + computeInverseDiagonalOperatorValues_P2ElementwiseShearHeating_macro_2D( _data_invDiag_Edge, _data_invDiag_Vertex, @@ -624,6 +629,7 @@ void P2ElementwiseShearHeating::computeInverseDiagonalOperatorValues() macro_vertex_coord_id_2comp1, micro_edges_per_macro_edge, micro_edges_per_macro_edge_float ); + this->timingTree_->stop( "kernel" ); } diff --git a/operators/shear_heating/P2ElementwiseShearHeating.hpp b/operators/shear_heating/P2ElementwiseShearHeating.hpp index 1fd9f687..cda1dd4c 100644 --- a/operators/shear_heating/P2ElementwiseShearHeating.hpp +++ b/operators/shear_heating/P2ElementwiseShearHeating.hpp @@ -29,6 +29,7 @@ #include "core/DataTypes.h" #include "hyteg/LikwidWrapper.hpp" +#include "hyteg/boundary/BoundaryConditions.hpp" #include "hyteg/communication/Syncing.hpp" #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp" #include "hyteg/operators/Operator.hpp" @@ -36,6 +37,7 @@ #include "hyteg/primitivestorage/PrimitiveStorage.hpp" #include "hyteg/solvers/Smoothables.hpp" #include "hyteg/sparseassembly/SparseMatrixProxy.hpp" +#include "hyteg/types/types.hpp" #define FUNC_PREFIX @@ -107,172 +109,201 @@ class P2ElementwiseShearHeating : public Operator< P2Function< real_t >, P2Funct protected: private: - /// Kernel type: apply + /// Integral: P2ElementwiseShearHeating + /// - volume element: triangle, dim: 2, vertices: 3, spacedim: 2 + /// - kernel type: apply + /// - loop strategy: SAWTOOTH /// - quadrature rule: Dunavant 3 | points: 4, degree: 3 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 352 400 12 0 0 0 0 1 - void apply_macro_2D( real_t* RESTRICT _data_dstEdge, - real_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - real_t* RESTRICT _data_srcEdge, - real_t* RESTRICT _data_srcVertex, - real_t* RESTRICT _data_uxEdge, - real_t* RESTRICT _data_uxVertex, - real_t* RESTRICT _data_uyEdge, - real_t* RESTRICT _data_uyVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; - /// Kernel type: apply + void apply_P2ElementwiseShearHeating_macro_2D( real_t* RESTRICT _data_dstEdge, + real_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + real_t* RESTRICT _data_srcEdge, + real_t* RESTRICT _data_srcVertex, + real_t* RESTRICT _data_uxEdge, + real_t* RESTRICT _data_uxVertex, + real_t* RESTRICT _data_uyEdge, + real_t* RESTRICT _data_uyVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; + + /// Integral: P2ElementwiseShearHeating + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: apply + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 3 | points: 5, degree: 3 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 1268 1327 36 0 0 0 0 1 - void apply_macro_3D( real_t* RESTRICT _data_dstEdge, - real_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - real_t* RESTRICT _data_srcEdge, - real_t* RESTRICT _data_srcVertex, - real_t* RESTRICT _data_uxEdge, - real_t* RESTRICT _data_uxVertex, - real_t* RESTRICT _data_uyEdge, - real_t* RESTRICT _data_uyVertex, - real_t* RESTRICT _data_uzEdge, - real_t* RESTRICT _data_uzVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; - /// Kernel type: toMatrix + void apply_P2ElementwiseShearHeating_macro_3D( real_t* RESTRICT _data_dstEdge, + real_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + real_t* RESTRICT _data_srcEdge, + real_t* RESTRICT _data_srcVertex, + real_t* RESTRICT _data_uxEdge, + real_t* RESTRICT _data_uxVertex, + real_t* RESTRICT _data_uyEdge, + real_t* RESTRICT _data_uyVertex, + real_t* RESTRICT _data_uzEdge, + real_t* RESTRICT _data_uzVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; + + /// Integral: P2ElementwiseShearHeating + /// - volume element: triangle, dim: 2, vertices: 3, spacedim: 2 + /// - kernel type: toMatrix + /// - loop strategy: SAWTOOTH /// - quadrature rule: Dunavant 3 | points: 4, degree: 3 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 316 364 12 0 0 0 0 4 - void toMatrix_macro_2D( idx_t* RESTRICT _data_dstEdge, - idx_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - idx_t* RESTRICT _data_srcEdge, - idx_t* RESTRICT _data_srcVertex, - real_t* RESTRICT _data_uxEdge, - real_t* RESTRICT _data_uxVertex, - real_t* RESTRICT _data_uyEdge, - real_t* RESTRICT _data_uyVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - std::shared_ptr< SparseMatrixProxy > mat, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; - /// Kernel type: toMatrix + void toMatrix_P2ElementwiseShearHeating_macro_2D( idx_t* RESTRICT _data_dstEdge, + idx_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + idx_t* RESTRICT _data_srcEdge, + idx_t* RESTRICT _data_srcVertex, + real_t* RESTRICT _data_uxEdge, + real_t* RESTRICT _data_uxVertex, + real_t* RESTRICT _data_uyEdge, + real_t* RESTRICT _data_uyVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + std::shared_ptr< SparseMatrixProxy > mat, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; + + /// Integral: P2ElementwiseShearHeating + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: toMatrix + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 3 | points: 5, degree: 3 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 1168 1227 36 0 0 0 0 4 - void toMatrix_macro_3D( idx_t* RESTRICT _data_dstEdge, - idx_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - idx_t* RESTRICT _data_srcEdge, - idx_t* RESTRICT _data_srcVertex, - real_t* RESTRICT _data_uxEdge, - real_t* RESTRICT _data_uxVertex, - real_t* RESTRICT _data_uyEdge, - real_t* RESTRICT _data_uyVertex, - real_t* RESTRICT _data_uzEdge, - real_t* RESTRICT _data_uzVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - std::shared_ptr< SparseMatrixProxy > mat, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; - /// Kernel type: computeInverseDiagonalOperatorValues + void toMatrix_P2ElementwiseShearHeating_macro_3D( idx_t* RESTRICT _data_dstEdge, + idx_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + idx_t* RESTRICT _data_srcEdge, + idx_t* RESTRICT _data_srcVertex, + real_t* RESTRICT _data_uxEdge, + real_t* RESTRICT _data_uxVertex, + real_t* RESTRICT _data_uyEdge, + real_t* RESTRICT _data_uyVertex, + real_t* RESTRICT _data_uzEdge, + real_t* RESTRICT _data_uzVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + std::shared_ptr< SparseMatrixProxy > mat, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; + + /// Integral: P2ElementwiseShearHeating + /// - volume element: triangle, dim: 2, vertices: 3, spacedim: 2 + /// - kernel type: computeInverseDiagonalOperatorValues + /// - loop strategy: SAWTOOTH /// - quadrature rule: Dunavant 3 | points: 4, degree: 3 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 262 284 12 0 0 0 0 1 - void computeInverseDiagonalOperatorValues_macro_2D( real_t* RESTRICT _data_invDiag_Edge, - real_t* RESTRICT _data_invDiag_Vertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - real_t* RESTRICT _data_uxEdge, - real_t* RESTRICT _data_uxVertex, - real_t* RESTRICT _data_uyEdge, - real_t* RESTRICT _data_uyVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; - /// Kernel type: computeInverseDiagonalOperatorValues + void computeInverseDiagonalOperatorValues_P2ElementwiseShearHeating_macro_2D( real_t* RESTRICT _data_invDiag_Edge, + real_t* RESTRICT _data_invDiag_Vertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + real_t* RESTRICT _data_uxEdge, + real_t* RESTRICT _data_uxVertex, + real_t* RESTRICT _data_uyEdge, + real_t* RESTRICT _data_uyVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; + + /// Integral: P2ElementwiseShearHeating + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: computeInverseDiagonalOperatorValues + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 3 | points: 5, degree: 3 + /// - blending map: IdentityMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 953 962 36 0 0 0 0 1 - void computeInverseDiagonalOperatorValues_macro_3D( real_t* RESTRICT _data_invDiag_Edge, - real_t* RESTRICT _data_invDiag_Vertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - real_t* RESTRICT _data_uxEdge, - real_t* RESTRICT _data_uxVertex, - real_t* RESTRICT _data_uyEdge, - real_t* RESTRICT _data_uyVertex, - real_t* RESTRICT _data_uzEdge, - real_t* RESTRICT _data_uzVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float ) const; + void computeInverseDiagonalOperatorValues_P2ElementwiseShearHeating_macro_3D( real_t* RESTRICT _data_invDiag_Edge, + real_t* RESTRICT _data_invDiag_Vertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + real_t* RESTRICT _data_uxEdge, + real_t* RESTRICT _data_uxVertex, + real_t* RESTRICT _data_uyEdge, + real_t* RESTRICT _data_uyVertex, + real_t* RESTRICT _data_uzEdge, + real_t* RESTRICT _data_uzVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; std::shared_ptr< P2Function< real_t > > invDiag_; P2Function< real_t > mu; diff --git a/operators/shear_heating/P2ElementwiseShearHeatingAnnulusMap.cpp b/operators/shear_heating/P2ElementwiseShearHeatingAnnulusMap.cpp index 04eeb81d..32da46ef 100644 --- a/operators/shear_heating/P2ElementwiseShearHeatingAnnulusMap.cpp +++ b/operators/shear_heating/P2ElementwiseShearHeatingAnnulusMap.cpp @@ -159,7 +159,7 @@ void P2ElementwiseShearHeatingAnnulusMap::apply( const P2Function< real_t >& src this->timingTree_->start( "kernel" ); - apply_macro_2D( + apply_P2ElementwiseShearHeatingAnnulusMap_macro_2D( _data_dstEdge, _data_dstVertex, @@ -187,6 +187,7 @@ void P2ElementwiseShearHeatingAnnulusMap::apply( const P2Function< real_t >& src refVertex_1, thrVertex_0, thrVertex_1 ); + this->timingTree_->stop( "kernel" ); } @@ -282,7 +283,7 @@ void P2ElementwiseShearHeatingAnnulusMap::toMatrix( const std::shared_ptr< Spars this->timingTree_->start( "kernel" ); - toMatrix_macro_2D( + toMatrix_P2ElementwiseShearHeatingAnnulusMap_macro_2D( _data_dstEdge, _data_dstVertex, @@ -311,6 +312,7 @@ void P2ElementwiseShearHeatingAnnulusMap::toMatrix( const std::shared_ptr< Spars refVertex_1, thrVertex_0, thrVertex_1 ); + this->timingTree_->stop( "kernel" ); } } @@ -391,7 +393,7 @@ void P2ElementwiseShearHeatingAnnulusMap::computeInverseDiagonalOperatorValues() this->timingTree_->start( "kernel" ); - computeInverseDiagonalOperatorValues_macro_2D( + computeInverseDiagonalOperatorValues_P2ElementwiseShearHeatingAnnulusMap_macro_2D( _data_invDiag_Edge, _data_invDiag_Vertex, @@ -417,6 +419,7 @@ void P2ElementwiseShearHeatingAnnulusMap::computeInverseDiagonalOperatorValues() refVertex_1, thrVertex_0, thrVertex_1 ); + this->timingTree_->stop( "kernel" ); } diff --git a/operators/shear_heating/P2ElementwiseShearHeatingAnnulusMap.hpp b/operators/shear_heating/P2ElementwiseShearHeatingAnnulusMap.hpp index 617e2c69..690d3bba 100644 --- a/operators/shear_heating/P2ElementwiseShearHeatingAnnulusMap.hpp +++ b/operators/shear_heating/P2ElementwiseShearHeatingAnnulusMap.hpp @@ -29,6 +29,7 @@ #include "core/DataTypes.h" #include "hyteg/LikwidWrapper.hpp" +#include "hyteg/boundary/BoundaryConditions.hpp" #include "hyteg/communication/Syncing.hpp" #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp" #include "hyteg/geometry/AnnulusMap.hpp" @@ -37,6 +38,7 @@ #include "hyteg/primitivestorage/PrimitiveStorage.hpp" #include "hyteg/solvers/Smoothables.hpp" #include "hyteg/sparseassembly/SparseMatrixProxy.hpp" +#include "hyteg/types/types.hpp" #define FUNC_PREFIX @@ -107,101 +109,116 @@ class P2ElementwiseShearHeatingAnnulusMap : public Operator< P2Function< real_t protected: private: - /// Kernel type: apply + /// Integral: P2ElementwiseShearHeatingAnnulusMap + /// - volume element: triangle, dim: 2, vertices: 3, spacedim: 2 + /// - kernel type: apply + /// - loop strategy: SAWTOOTH /// - quadrature rule: Dunavant 3 | points: 4, degree: 3 + /// - blending map: AnnulusMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 556 756 20 12 0 0 0 1 - void apply_macro_2D( real_t* RESTRICT _data_dstEdge, - real_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - real_t* RESTRICT _data_srcEdge, - real_t* RESTRICT _data_srcVertex, - real_t* RESTRICT _data_uxEdge, - real_t* RESTRICT _data_uxVertex, - real_t* RESTRICT _data_uyEdge, - real_t* RESTRICT _data_uyVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t refVertex_0, - real_t refVertex_1, - real_t thrVertex_0, - real_t thrVertex_1 ) const; - /// Kernel type: toMatrix + void apply_P2ElementwiseShearHeatingAnnulusMap_macro_2D( real_t* RESTRICT _data_dstEdge, + real_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + real_t* RESTRICT _data_srcEdge, + real_t* RESTRICT _data_srcVertex, + real_t* RESTRICT _data_uxEdge, + real_t* RESTRICT _data_uxVertex, + real_t* RESTRICT _data_uyEdge, + real_t* RESTRICT _data_uyVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t refVertex_0, + real_t refVertex_1, + real_t thrVertex_0, + real_t thrVertex_1 ) const; + + /// Integral: P2ElementwiseShearHeatingAnnulusMap + /// - volume element: triangle, dim: 2, vertices: 3, spacedim: 2 + /// - kernel type: toMatrix + /// - loop strategy: SAWTOOTH /// - quadrature rule: Dunavant 3 | points: 4, degree: 3 + /// - blending map: AnnulusMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 520 720 20 12 0 0 0 4 - void toMatrix_macro_2D( idx_t* RESTRICT _data_dstEdge, - idx_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - idx_t* RESTRICT _data_srcEdge, - idx_t* RESTRICT _data_srcVertex, - real_t* RESTRICT _data_uxEdge, - real_t* RESTRICT _data_uxVertex, - real_t* RESTRICT _data_uyEdge, - real_t* RESTRICT _data_uyVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - std::shared_ptr< SparseMatrixProxy > mat, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t refVertex_0, - real_t refVertex_1, - real_t thrVertex_0, - real_t thrVertex_1 ) const; - /// Kernel type: computeInverseDiagonalOperatorValues + void toMatrix_P2ElementwiseShearHeatingAnnulusMap_macro_2D( idx_t* RESTRICT _data_dstEdge, + idx_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + idx_t* RESTRICT _data_srcEdge, + idx_t* RESTRICT _data_srcVertex, + real_t* RESTRICT _data_uxEdge, + real_t* RESTRICT _data_uxVertex, + real_t* RESTRICT _data_uyEdge, + real_t* RESTRICT _data_uyVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + std::shared_ptr< SparseMatrixProxy > mat, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t refVertex_0, + real_t refVertex_1, + real_t thrVertex_0, + real_t thrVertex_1 ) const; + + /// Integral: P2ElementwiseShearHeatingAnnulusMap + /// - volume element: triangle, dim: 2, vertices: 3, spacedim: 2 + /// - kernel type: computeInverseDiagonalOperatorValues + /// - loop strategy: SAWTOOTH /// - quadrature rule: Dunavant 3 | points: 4, degree: 3 + /// - blending map: AnnulusMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 466 640 20 12 0 0 0 1 - void computeInverseDiagonalOperatorValues_macro_2D( real_t* RESTRICT _data_invDiag_Edge, - real_t* RESTRICT _data_invDiag_Vertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - real_t* RESTRICT _data_uxEdge, - real_t* RESTRICT _data_uxVertex, - real_t* RESTRICT _data_uyEdge, - real_t* RESTRICT _data_uyVertex, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t refVertex_0, - real_t refVertex_1, - real_t thrVertex_0, - real_t thrVertex_1 ) const; + void + computeInverseDiagonalOperatorValues_P2ElementwiseShearHeatingAnnulusMap_macro_2D( real_t* RESTRICT _data_invDiag_Edge, + real_t* RESTRICT _data_invDiag_Vertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + real_t* RESTRICT _data_uxEdge, + real_t* RESTRICT _data_uxVertex, + real_t* RESTRICT _data_uyEdge, + real_t* RESTRICT _data_uyVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t refVertex_0, + real_t refVertex_1, + real_t thrVertex_0, + real_t thrVertex_1 ) const; std::shared_ptr< P2Function< real_t > > invDiag_; P2Function< real_t > mu; diff --git a/operators/shear_heating/P2ElementwiseShearHeatingIcosahedralShellMap.cpp b/operators/shear_heating/P2ElementwiseShearHeatingIcosahedralShellMap.cpp index 95c61d85..12969d32 100644 --- a/operators/shear_heating/P2ElementwiseShearHeatingIcosahedralShellMap.cpp +++ b/operators/shear_heating/P2ElementwiseShearHeatingIcosahedralShellMap.cpp @@ -175,7 +175,7 @@ void P2ElementwiseShearHeatingIcosahedralShellMap::apply( const P2Function< real this->timingTree_->start( "kernel" ); - apply_macro_3D( + apply_P2ElementwiseShearHeatingIcosahedralShellMap_macro_3D( _data_dstEdge, _data_dstVertex, @@ -217,6 +217,7 @@ void P2ElementwiseShearHeatingIcosahedralShellMap::apply( const P2Function< real thrVertex_0, thrVertex_1, thrVertex_2 ); + this->timingTree_->stop( "kernel" ); } @@ -327,7 +328,7 @@ void P2ElementwiseShearHeatingIcosahedralShellMap::toMatrix( const std::shared_p this->timingTree_->start( "kernel" ); - toMatrix_macro_3D( + toMatrix_P2ElementwiseShearHeatingIcosahedralShellMap_macro_3D( _data_dstEdge, _data_dstVertex, @@ -370,6 +371,7 @@ void P2ElementwiseShearHeatingIcosahedralShellMap::toMatrix( const std::shared_p thrVertex_0, thrVertex_1, thrVertex_2 ); + this->timingTree_->stop( "kernel" ); } } @@ -467,7 +469,7 @@ void P2ElementwiseShearHeatingIcosahedralShellMap::computeInverseDiagonalOperato this->timingTree_->start( "kernel" ); - computeInverseDiagonalOperatorValues_macro_3D( + computeInverseDiagonalOperatorValues_P2ElementwiseShearHeatingIcosahedralShellMap_macro_3D( _data_invDiag_Edge, _data_invDiag_Vertex, @@ -507,6 +509,7 @@ void P2ElementwiseShearHeatingIcosahedralShellMap::computeInverseDiagonalOperato thrVertex_0, thrVertex_1, thrVertex_2 ); + this->timingTree_->stop( "kernel" ); } diff --git a/operators/shear_heating/P2ElementwiseShearHeatingIcosahedralShellMap.hpp b/operators/shear_heating/P2ElementwiseShearHeatingIcosahedralShellMap.hpp index 4dbc9213..a4f62ade 100644 --- a/operators/shear_heating/P2ElementwiseShearHeatingIcosahedralShellMap.hpp +++ b/operators/shear_heating/P2ElementwiseShearHeatingIcosahedralShellMap.hpp @@ -29,6 +29,7 @@ #include "core/DataTypes.h" #include "hyteg/LikwidWrapper.hpp" +#include "hyteg/boundary/BoundaryConditions.hpp" #include "hyteg/communication/Syncing.hpp" #include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp" #include "hyteg/geometry/IcosahedralShellMap.hpp" @@ -37,6 +38,7 @@ #include "hyteg/primitivestorage/PrimitiveStorage.hpp" #include "hyteg/solvers/Smoothables.hpp" #include "hyteg/sparseassembly/SparseMatrixProxy.hpp" +#include "hyteg/types/types.hpp" #define FUNC_PREFIX @@ -108,143 +110,158 @@ class P2ElementwiseShearHeatingIcosahedralShellMap : public Operator< P2Function protected: private: - /// Kernel type: apply + /// Integral: P2ElementwiseShearHeatingIcosahedralShellMap + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: apply + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 3 | points: 5, degree: 3 + /// - blending map: IcosahedralShellMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 2053 2707 46 10 0 0 0 1 - void apply_macro_3D( real_t* RESTRICT _data_dstEdge, - real_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - real_t* RESTRICT _data_srcEdge, - real_t* RESTRICT _data_srcVertex, - real_t* RESTRICT _data_uxEdge, - real_t* RESTRICT _data_uxVertex, - real_t* RESTRICT _data_uyEdge, - real_t* RESTRICT _data_uyVertex, - real_t* RESTRICT _data_uzEdge, - real_t* RESTRICT _data_uzVertex, - real_t forVertex_0, - real_t forVertex_1, - real_t forVertex_2, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t rayVertex_2, - real_t refVertex_0, - real_t refVertex_1, - real_t refVertex_2, - real_t thrVertex_0, - real_t thrVertex_1, - real_t thrVertex_2 ) const; - /// Kernel type: toMatrix + void apply_P2ElementwiseShearHeatingIcosahedralShellMap_macro_3D( real_t* RESTRICT _data_dstEdge, + real_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + real_t* RESTRICT _data_srcEdge, + real_t* RESTRICT _data_srcVertex, + real_t* RESTRICT _data_uxEdge, + real_t* RESTRICT _data_uxVertex, + real_t* RESTRICT _data_uyEdge, + real_t* RESTRICT _data_uyVertex, + real_t* RESTRICT _data_uzEdge, + real_t* RESTRICT _data_uzVertex, + real_t forVertex_0, + real_t forVertex_1, + real_t forVertex_2, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t rayVertex_2, + real_t refVertex_0, + real_t refVertex_1, + real_t refVertex_2, + real_t thrVertex_0, + real_t thrVertex_1, + real_t thrVertex_2 ) const; + + /// Integral: P2ElementwiseShearHeatingIcosahedralShellMap + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: toMatrix + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 3 | points: 5, degree: 3 + /// - blending map: IcosahedralShellMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 1953 2607 46 10 0 0 0 4 - void toMatrix_macro_3D( idx_t* RESTRICT _data_dstEdge, - idx_t* RESTRICT _data_dstVertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - idx_t* RESTRICT _data_srcEdge, - idx_t* RESTRICT _data_srcVertex, - real_t* RESTRICT _data_uxEdge, - real_t* RESTRICT _data_uxVertex, - real_t* RESTRICT _data_uyEdge, - real_t* RESTRICT _data_uyVertex, - real_t* RESTRICT _data_uzEdge, - real_t* RESTRICT _data_uzVertex, - real_t forVertex_0, - real_t forVertex_1, - real_t forVertex_2, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - std::shared_ptr< SparseMatrixProxy > mat, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t rayVertex_2, - real_t refVertex_0, - real_t refVertex_1, - real_t refVertex_2, - real_t thrVertex_0, - real_t thrVertex_1, - real_t thrVertex_2 ) const; - /// Kernel type: computeInverseDiagonalOperatorValues + void toMatrix_P2ElementwiseShearHeatingIcosahedralShellMap_macro_3D( idx_t* RESTRICT _data_dstEdge, + idx_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + idx_t* RESTRICT _data_srcEdge, + idx_t* RESTRICT _data_srcVertex, + real_t* RESTRICT _data_uxEdge, + real_t* RESTRICT _data_uxVertex, + real_t* RESTRICT _data_uyEdge, + real_t* RESTRICT _data_uyVertex, + real_t* RESTRICT _data_uzEdge, + real_t* RESTRICT _data_uzVertex, + real_t forVertex_0, + real_t forVertex_1, + real_t forVertex_2, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + std::shared_ptr< SparseMatrixProxy > mat, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t rayVertex_2, + real_t refVertex_0, + real_t refVertex_1, + real_t refVertex_2, + real_t thrVertex_0, + real_t thrVertex_1, + real_t thrVertex_2 ) const; + + /// Integral: P2ElementwiseShearHeatingIcosahedralShellMap + /// - volume element: tetrahedron, dim: 3, vertices: 4, spacedim: 3 + /// - kernel type: computeInverseDiagonalOperatorValues + /// - loop strategy: SAWTOOTH /// - quadrature rule: Hammer-Marlowe-Stroud 3 | points: 5, degree: 3 + /// - blending map: IcosahedralShellMap /// - operations per element: /// adds muls divs pows abs assignments function_calls unknown_ops /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- /// 1738 2342 46 10 0 0 0 1 - void computeInverseDiagonalOperatorValues_macro_3D( real_t* RESTRICT _data_invDiag_Edge, - real_t* RESTRICT _data_invDiag_Vertex, - real_t* RESTRICT _data_muEdge, - real_t* RESTRICT _data_muVertex, - real_t* RESTRICT _data_uxEdge, - real_t* RESTRICT _data_uxVertex, - real_t* RESTRICT _data_uyEdge, - real_t* RESTRICT _data_uyVertex, - real_t* RESTRICT _data_uzEdge, - real_t* RESTRICT _data_uzVertex, - real_t forVertex_0, - real_t forVertex_1, - real_t forVertex_2, - real_t macro_vertex_coord_id_0comp0, - real_t macro_vertex_coord_id_0comp1, - real_t macro_vertex_coord_id_0comp2, - real_t macro_vertex_coord_id_1comp0, - real_t macro_vertex_coord_id_1comp1, - real_t macro_vertex_coord_id_1comp2, - real_t macro_vertex_coord_id_2comp0, - real_t macro_vertex_coord_id_2comp1, - real_t macro_vertex_coord_id_2comp2, - real_t macro_vertex_coord_id_3comp0, - real_t macro_vertex_coord_id_3comp1, - real_t macro_vertex_coord_id_3comp2, - int64_t micro_edges_per_macro_edge, - real_t micro_edges_per_macro_edge_float, - real_t radRayVertex, - real_t radRefVertex, - real_t rayVertex_0, - real_t rayVertex_1, - real_t rayVertex_2, - real_t refVertex_0, - real_t refVertex_1, - real_t refVertex_2, - real_t thrVertex_0, - real_t thrVertex_1, - real_t thrVertex_2 ) const; + void computeInverseDiagonalOperatorValues_P2ElementwiseShearHeatingIcosahedralShellMap_macro_3D( + real_t* RESTRICT _data_invDiag_Edge, + real_t* RESTRICT _data_invDiag_Vertex, + real_t* RESTRICT _data_muEdge, + real_t* RESTRICT _data_muVertex, + real_t* RESTRICT _data_uxEdge, + real_t* RESTRICT _data_uxVertex, + real_t* RESTRICT _data_uyEdge, + real_t* RESTRICT _data_uyVertex, + real_t* RESTRICT _data_uzEdge, + real_t* RESTRICT _data_uzVertex, + real_t forVertex_0, + real_t forVertex_1, + real_t forVertex_2, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_0comp2, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_1comp2, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + real_t macro_vertex_coord_id_2comp2, + real_t macro_vertex_coord_id_3comp0, + real_t macro_vertex_coord_id_3comp1, + real_t macro_vertex_coord_id_3comp2, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t rayVertex_2, + real_t refVertex_0, + real_t refVertex_1, + real_t refVertex_2, + real_t thrVertex_0, + real_t thrVertex_1, + real_t thrVertex_2 ) const; std::shared_ptr< P2Function< real_t > > invDiag_; P2Function< real_t > mu; diff --git a/operators/shear_heating/avx/P2ElementwiseShearHeatingAnnulusMap_apply_macro_2D.cpp b/operators/shear_heating/avx/P2ElementwiseShearHeatingAnnulusMap_apply_P2ElementwiseShearHeatingAnnulusMap_macro_2D.cpp similarity index 99% rename from operators/shear_heating/avx/P2ElementwiseShearHeatingAnnulusMap_apply_macro_2D.cpp rename to operators/shear_heating/avx/P2ElementwiseShearHeatingAnnulusMap_apply_P2ElementwiseShearHeatingAnnulusMap_macro_2D.cpp index e8bae39b..c30cf5f4 100644 --- a/operators/shear_heating/avx/P2ElementwiseShearHeatingAnnulusMap_apply_macro_2D.cpp +++ b/operators/shear_heating/avx/P2ElementwiseShearHeatingAnnulusMap_apply_P2ElementwiseShearHeatingAnnulusMap_macro_2D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseShearHeatingAnnulusMap::apply_macro_2D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t * RESTRICT _data_uxEdge, real_t * RESTRICT _data_uxVertex, real_t * RESTRICT _data_uyEdge, real_t * RESTRICT _data_uyVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const +void P2ElementwiseShearHeatingAnnulusMap::apply_P2ElementwiseShearHeatingAnnulusMap_macro_2D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t * RESTRICT _data_uxEdge, real_t * RESTRICT _data_uxVertex, real_t * RESTRICT _data_uyEdge, real_t * RESTRICT _data_uyVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const { { const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669}; diff --git a/operators/shear_heating/avx/P2ElementwiseShearHeatingAnnulusMap_computeInverseDiagonalOperatorValues_macro_2D.cpp b/operators/shear_heating/avx/P2ElementwiseShearHeatingAnnulusMap_computeInverseDiagonalOperatorValues_P2ElementwiseShearHeatingAnnulusMap_macro_2D.cpp similarity index 99% rename from operators/shear_heating/avx/P2ElementwiseShearHeatingAnnulusMap_computeInverseDiagonalOperatorValues_macro_2D.cpp rename to operators/shear_heating/avx/P2ElementwiseShearHeatingAnnulusMap_computeInverseDiagonalOperatorValues_P2ElementwiseShearHeatingAnnulusMap_macro_2D.cpp index 681c4336..ed6c10a0 100644 --- a/operators/shear_heating/avx/P2ElementwiseShearHeatingAnnulusMap_computeInverseDiagonalOperatorValues_macro_2D.cpp +++ b/operators/shear_heating/avx/P2ElementwiseShearHeatingAnnulusMap_computeInverseDiagonalOperatorValues_P2ElementwiseShearHeatingAnnulusMap_macro_2D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseShearHeatingAnnulusMap::computeInverseDiagonalOperatorValues_macro_2D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_uxEdge, real_t * RESTRICT _data_uxVertex, real_t * RESTRICT _data_uyEdge, real_t * RESTRICT _data_uyVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const +void P2ElementwiseShearHeatingAnnulusMap::computeInverseDiagonalOperatorValues_P2ElementwiseShearHeatingAnnulusMap_macro_2D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_uxEdge, real_t * RESTRICT _data_uxVertex, real_t * RESTRICT _data_uyEdge, real_t * RESTRICT _data_uyVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const { { const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669}; diff --git a/operators/shear_heating/avx/P2ElementwiseShearHeatingIcosahedralShellMap_apply_macro_3D.cpp b/operators/shear_heating/avx/P2ElementwiseShearHeatingIcosahedralShellMap_apply_P2ElementwiseShearHeatingIcosahedralShellMap_macro_3D.cpp similarity index 99% rename from operators/shear_heating/avx/P2ElementwiseShearHeatingIcosahedralShellMap_apply_macro_3D.cpp rename to operators/shear_heating/avx/P2ElementwiseShearHeatingIcosahedralShellMap_apply_P2ElementwiseShearHeatingIcosahedralShellMap_macro_3D.cpp index e26684a9..8316560d 100644 --- a/operators/shear_heating/avx/P2ElementwiseShearHeatingIcosahedralShellMap_apply_macro_3D.cpp +++ b/operators/shear_heating/avx/P2ElementwiseShearHeatingIcosahedralShellMap_apply_P2ElementwiseShearHeatingIcosahedralShellMap_macro_3D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseShearHeatingIcosahedralShellMap::apply_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t * RESTRICT _data_uxEdge, real_t * RESTRICT _data_uxVertex, real_t * RESTRICT _data_uyEdge, real_t * RESTRICT _data_uyVertex, real_t * RESTRICT _data_uzEdge, real_t * RESTRICT _data_uzVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const +void P2ElementwiseShearHeatingIcosahedralShellMap::apply_P2ElementwiseShearHeatingIcosahedralShellMap_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t * RESTRICT _data_uxEdge, real_t * RESTRICT _data_uxVertex, real_t * RESTRICT _data_uyEdge, real_t * RESTRICT _data_uyVertex, real_t * RESTRICT _data_uzEdge, real_t * RESTRICT _data_uzVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const { { const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983}; diff --git a/operators/shear_heating/avx/P2ElementwiseShearHeatingIcosahedralShellMap_computeInverseDiagonalOperatorValues_macro_3D.cpp b/operators/shear_heating/avx/P2ElementwiseShearHeatingIcosahedralShellMap_computeInverseDiagonalOperatorValues_P2ElementwiseShearHeatingIcosahedralShellMap_macro_3D.cpp similarity index 99% rename from operators/shear_heating/avx/P2ElementwiseShearHeatingIcosahedralShellMap_computeInverseDiagonalOperatorValues_macro_3D.cpp rename to operators/shear_heating/avx/P2ElementwiseShearHeatingIcosahedralShellMap_computeInverseDiagonalOperatorValues_P2ElementwiseShearHeatingIcosahedralShellMap_macro_3D.cpp index c41091c7..588a2315 100644 --- a/operators/shear_heating/avx/P2ElementwiseShearHeatingIcosahedralShellMap_computeInverseDiagonalOperatorValues_macro_3D.cpp +++ b/operators/shear_heating/avx/P2ElementwiseShearHeatingIcosahedralShellMap_computeInverseDiagonalOperatorValues_P2ElementwiseShearHeatingIcosahedralShellMap_macro_3D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseShearHeatingIcosahedralShellMap::computeInverseDiagonalOperatorValues_macro_3D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_uxEdge, real_t * RESTRICT _data_uxVertex, real_t * RESTRICT _data_uyEdge, real_t * RESTRICT _data_uyVertex, real_t * RESTRICT _data_uzEdge, real_t * RESTRICT _data_uzVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const +void P2ElementwiseShearHeatingIcosahedralShellMap::computeInverseDiagonalOperatorValues_P2ElementwiseShearHeatingIcosahedralShellMap_macro_3D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_uxEdge, real_t * RESTRICT _data_uxVertex, real_t * RESTRICT _data_uyEdge, real_t * RESTRICT _data_uyVertex, real_t * RESTRICT _data_uzEdge, real_t * RESTRICT _data_uzVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const { { const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983}; diff --git a/operators/shear_heating/avx/P2ElementwiseShearHeating_apply_macro_2D.cpp b/operators/shear_heating/avx/P2ElementwiseShearHeating_apply_P2ElementwiseShearHeating_macro_2D.cpp similarity index 99% rename from operators/shear_heating/avx/P2ElementwiseShearHeating_apply_macro_2D.cpp rename to operators/shear_heating/avx/P2ElementwiseShearHeating_apply_P2ElementwiseShearHeating_macro_2D.cpp index 45e6f163..5fdd5726 100644 --- a/operators/shear_heating/avx/P2ElementwiseShearHeating_apply_macro_2D.cpp +++ b/operators/shear_heating/avx/P2ElementwiseShearHeating_apply_P2ElementwiseShearHeating_macro_2D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseShearHeating::apply_macro_2D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t * RESTRICT _data_uxEdge, real_t * RESTRICT _data_uxVertex, real_t * RESTRICT _data_uyEdge, real_t * RESTRICT _data_uyVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseShearHeating::apply_P2ElementwiseShearHeating_macro_2D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t * RESTRICT _data_uxEdge, real_t * RESTRICT _data_uxVertex, real_t * RESTRICT _data_uyEdge, real_t * RESTRICT _data_uyVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669}; diff --git a/operators/shear_heating/avx/P2ElementwiseShearHeating_apply_macro_3D.cpp b/operators/shear_heating/avx/P2ElementwiseShearHeating_apply_P2ElementwiseShearHeating_macro_3D.cpp similarity index 99% rename from operators/shear_heating/avx/P2ElementwiseShearHeating_apply_macro_3D.cpp rename to operators/shear_heating/avx/P2ElementwiseShearHeating_apply_P2ElementwiseShearHeating_macro_3D.cpp index 454f70e4..87e3c112 100644 --- a/operators/shear_heating/avx/P2ElementwiseShearHeating_apply_macro_3D.cpp +++ b/operators/shear_heating/avx/P2ElementwiseShearHeating_apply_P2ElementwiseShearHeating_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseShearHeating::apply_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t * RESTRICT _data_uxEdge, real_t * RESTRICT _data_uxVertex, real_t * RESTRICT _data_uyEdge, real_t * RESTRICT _data_uyVertex, real_t * RESTRICT _data_uzEdge, real_t * RESTRICT _data_uzVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseShearHeating::apply_P2ElementwiseShearHeating_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t * RESTRICT _data_uxEdge, real_t * RESTRICT _data_uxVertex, real_t * RESTRICT _data_uyEdge, real_t * RESTRICT _data_uyVertex, real_t * RESTRICT _data_uzEdge, real_t * RESTRICT _data_uzVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983}; diff --git a/operators/shear_heating/avx/P2ElementwiseShearHeating_computeInverseDiagonalOperatorValues_macro_2D.cpp b/operators/shear_heating/avx/P2ElementwiseShearHeating_computeInverseDiagonalOperatorValues_P2ElementwiseShearHeating_macro_2D.cpp similarity index 99% rename from operators/shear_heating/avx/P2ElementwiseShearHeating_computeInverseDiagonalOperatorValues_macro_2D.cpp rename to operators/shear_heating/avx/P2ElementwiseShearHeating_computeInverseDiagonalOperatorValues_P2ElementwiseShearHeating_macro_2D.cpp index 5751d999..579af214 100644 --- a/operators/shear_heating/avx/P2ElementwiseShearHeating_computeInverseDiagonalOperatorValues_macro_2D.cpp +++ b/operators/shear_heating/avx/P2ElementwiseShearHeating_computeInverseDiagonalOperatorValues_P2ElementwiseShearHeating_macro_2D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseShearHeating::computeInverseDiagonalOperatorValues_macro_2D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_uxEdge, real_t * RESTRICT _data_uxVertex, real_t * RESTRICT _data_uyEdge, real_t * RESTRICT _data_uyVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseShearHeating::computeInverseDiagonalOperatorValues_P2ElementwiseShearHeating_macro_2D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_uxEdge, real_t * RESTRICT _data_uxVertex, real_t * RESTRICT _data_uyEdge, real_t * RESTRICT _data_uyVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669}; diff --git a/operators/shear_heating/avx/P2ElementwiseShearHeating_computeInverseDiagonalOperatorValues_macro_3D.cpp b/operators/shear_heating/avx/P2ElementwiseShearHeating_computeInverseDiagonalOperatorValues_P2ElementwiseShearHeating_macro_3D.cpp similarity index 99% rename from operators/shear_heating/avx/P2ElementwiseShearHeating_computeInverseDiagonalOperatorValues_macro_3D.cpp rename to operators/shear_heating/avx/P2ElementwiseShearHeating_computeInverseDiagonalOperatorValues_P2ElementwiseShearHeating_macro_3D.cpp index 319df79c..53ac3d65 100644 --- a/operators/shear_heating/avx/P2ElementwiseShearHeating_computeInverseDiagonalOperatorValues_macro_3D.cpp +++ b/operators/shear_heating/avx/P2ElementwiseShearHeating_computeInverseDiagonalOperatorValues_P2ElementwiseShearHeating_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseShearHeating::computeInverseDiagonalOperatorValues_macro_3D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_uxEdge, real_t * RESTRICT _data_uxVertex, real_t * RESTRICT _data_uyEdge, real_t * RESTRICT _data_uyVertex, real_t * RESTRICT _data_uzEdge, real_t * RESTRICT _data_uzVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseShearHeating::computeInverseDiagonalOperatorValues_P2ElementwiseShearHeating_macro_3D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_uxEdge, real_t * RESTRICT _data_uxVertex, real_t * RESTRICT _data_uyEdge, real_t * RESTRICT _data_uyVertex, real_t * RESTRICT _data_uzEdge, real_t * RESTRICT _data_uzVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983}; diff --git a/operators/shear_heating/noarch/P2ElementwiseShearHeatingAnnulusMap_apply_macro_2D.cpp b/operators/shear_heating/noarch/P2ElementwiseShearHeatingAnnulusMap_apply_P2ElementwiseShearHeatingAnnulusMap_macro_2D.cpp similarity index 98% rename from operators/shear_heating/noarch/P2ElementwiseShearHeatingAnnulusMap_apply_macro_2D.cpp rename to operators/shear_heating/noarch/P2ElementwiseShearHeatingAnnulusMap_apply_P2ElementwiseShearHeatingAnnulusMap_macro_2D.cpp index 66b9a9e6..77ffc3eb 100644 --- a/operators/shear_heating/noarch/P2ElementwiseShearHeatingAnnulusMap_apply_macro_2D.cpp +++ b/operators/shear_heating/noarch/P2ElementwiseShearHeatingAnnulusMap_apply_P2ElementwiseShearHeatingAnnulusMap_macro_2D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseShearHeatingAnnulusMap::apply_macro_2D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t * RESTRICT _data_uxEdge, real_t * RESTRICT _data_uxVertex, real_t * RESTRICT _data_uyEdge, real_t * RESTRICT _data_uyVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const +void P2ElementwiseShearHeatingAnnulusMap::apply_P2ElementwiseShearHeatingAnnulusMap_macro_2D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t * RESTRICT _data_uxEdge, real_t * RESTRICT _data_uxVertex, real_t * RESTRICT _data_uyEdge, real_t * RESTRICT _data_uyVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const { { const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669}; diff --git a/operators/shear_heating/noarch/P2ElementwiseShearHeatingAnnulusMap_computeInverseDiagonalOperatorValues_macro_2D.cpp b/operators/shear_heating/noarch/P2ElementwiseShearHeatingAnnulusMap_computeInverseDiagonalOperatorValues_P2ElementwiseShearHeatingAnnulusMap_macro_2D.cpp similarity index 98% rename from operators/shear_heating/noarch/P2ElementwiseShearHeatingAnnulusMap_computeInverseDiagonalOperatorValues_macro_2D.cpp rename to operators/shear_heating/noarch/P2ElementwiseShearHeatingAnnulusMap_computeInverseDiagonalOperatorValues_P2ElementwiseShearHeatingAnnulusMap_macro_2D.cpp index 9a0ec572..51d93ee1 100644 --- a/operators/shear_heating/noarch/P2ElementwiseShearHeatingAnnulusMap_computeInverseDiagonalOperatorValues_macro_2D.cpp +++ b/operators/shear_heating/noarch/P2ElementwiseShearHeatingAnnulusMap_computeInverseDiagonalOperatorValues_P2ElementwiseShearHeatingAnnulusMap_macro_2D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseShearHeatingAnnulusMap::computeInverseDiagonalOperatorValues_macro_2D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_uxEdge, real_t * RESTRICT _data_uxVertex, real_t * RESTRICT _data_uyEdge, real_t * RESTRICT _data_uyVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const +void P2ElementwiseShearHeatingAnnulusMap::computeInverseDiagonalOperatorValues_P2ElementwiseShearHeatingAnnulusMap_macro_2D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_uxEdge, real_t * RESTRICT _data_uxVertex, real_t * RESTRICT _data_uyEdge, real_t * RESTRICT _data_uyVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const { { const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669}; diff --git a/operators/shear_heating/noarch/P2ElementwiseShearHeatingAnnulusMap_toMatrix_macro_2D.cpp b/operators/shear_heating/noarch/P2ElementwiseShearHeatingAnnulusMap_toMatrix_P2ElementwiseShearHeatingAnnulusMap_macro_2D.cpp similarity index 98% rename from operators/shear_heating/noarch/P2ElementwiseShearHeatingAnnulusMap_toMatrix_macro_2D.cpp rename to operators/shear_heating/noarch/P2ElementwiseShearHeatingAnnulusMap_toMatrix_P2ElementwiseShearHeatingAnnulusMap_macro_2D.cpp index e0b1f2a3..ae71a619 100644 --- a/operators/shear_heating/noarch/P2ElementwiseShearHeatingAnnulusMap_toMatrix_macro_2D.cpp +++ b/operators/shear_heating/noarch/P2ElementwiseShearHeatingAnnulusMap_toMatrix_P2ElementwiseShearHeatingAnnulusMap_macro_2D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseShearHeatingAnnulusMap::toMatrix_macro_2D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t * RESTRICT _data_uxEdge, real_t * RESTRICT _data_uxVertex, real_t * RESTRICT _data_uyEdge, real_t * RESTRICT _data_uyVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const +void P2ElementwiseShearHeatingAnnulusMap::toMatrix_P2ElementwiseShearHeatingAnnulusMap_macro_2D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t * RESTRICT _data_uxEdge, real_t * RESTRICT _data_uxVertex, real_t * RESTRICT _data_uyEdge, real_t * RESTRICT _data_uyVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const { { const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669}; diff --git a/operators/shear_heating/noarch/P2ElementwiseShearHeatingIcosahedralShellMap_apply_macro_3D.cpp b/operators/shear_heating/noarch/P2ElementwiseShearHeatingIcosahedralShellMap_apply_P2ElementwiseShearHeatingIcosahedralShellMap_macro_3D.cpp similarity index 99% rename from operators/shear_heating/noarch/P2ElementwiseShearHeatingIcosahedralShellMap_apply_macro_3D.cpp rename to operators/shear_heating/noarch/P2ElementwiseShearHeatingIcosahedralShellMap_apply_P2ElementwiseShearHeatingIcosahedralShellMap_macro_3D.cpp index 3d2aa7f6..616b61f2 100644 --- a/operators/shear_heating/noarch/P2ElementwiseShearHeatingIcosahedralShellMap_apply_macro_3D.cpp +++ b/operators/shear_heating/noarch/P2ElementwiseShearHeatingIcosahedralShellMap_apply_P2ElementwiseShearHeatingIcosahedralShellMap_macro_3D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseShearHeatingIcosahedralShellMap::apply_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t * RESTRICT _data_uxEdge, real_t * RESTRICT _data_uxVertex, real_t * RESTRICT _data_uyEdge, real_t * RESTRICT _data_uyVertex, real_t * RESTRICT _data_uzEdge, real_t * RESTRICT _data_uzVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const +void P2ElementwiseShearHeatingIcosahedralShellMap::apply_P2ElementwiseShearHeatingIcosahedralShellMap_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t * RESTRICT _data_uxEdge, real_t * RESTRICT _data_uxVertex, real_t * RESTRICT _data_uyEdge, real_t * RESTRICT _data_uyVertex, real_t * RESTRICT _data_uzEdge, real_t * RESTRICT _data_uzVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const { { const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983}; diff --git a/operators/shear_heating/noarch/P2ElementwiseShearHeatingIcosahedralShellMap_computeInverseDiagonalOperatorValues_macro_3D.cpp b/operators/shear_heating/noarch/P2ElementwiseShearHeatingIcosahedralShellMap_computeInverseDiagonalOperatorValues_P2ElementwiseShearHeatingIcosahedralShellMap_macro_3D.cpp similarity index 99% rename from operators/shear_heating/noarch/P2ElementwiseShearHeatingIcosahedralShellMap_computeInverseDiagonalOperatorValues_macro_3D.cpp rename to operators/shear_heating/noarch/P2ElementwiseShearHeatingIcosahedralShellMap_computeInverseDiagonalOperatorValues_P2ElementwiseShearHeatingIcosahedralShellMap_macro_3D.cpp index 2ba79ecd..8fec87bf 100644 --- a/operators/shear_heating/noarch/P2ElementwiseShearHeatingIcosahedralShellMap_computeInverseDiagonalOperatorValues_macro_3D.cpp +++ b/operators/shear_heating/noarch/P2ElementwiseShearHeatingIcosahedralShellMap_computeInverseDiagonalOperatorValues_P2ElementwiseShearHeatingIcosahedralShellMap_macro_3D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseShearHeatingIcosahedralShellMap::computeInverseDiagonalOperatorValues_macro_3D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_uxEdge, real_t * RESTRICT _data_uxVertex, real_t * RESTRICT _data_uyEdge, real_t * RESTRICT _data_uyVertex, real_t * RESTRICT _data_uzEdge, real_t * RESTRICT _data_uzVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const +void P2ElementwiseShearHeatingIcosahedralShellMap::computeInverseDiagonalOperatorValues_P2ElementwiseShearHeatingIcosahedralShellMap_macro_3D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_uxEdge, real_t * RESTRICT _data_uxVertex, real_t * RESTRICT _data_uyEdge, real_t * RESTRICT _data_uyVertex, real_t * RESTRICT _data_uzEdge, real_t * RESTRICT _data_uzVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const { { const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983}; diff --git a/operators/shear_heating/noarch/P2ElementwiseShearHeatingIcosahedralShellMap_toMatrix_macro_3D.cpp b/operators/shear_heating/noarch/P2ElementwiseShearHeatingIcosahedralShellMap_toMatrix_P2ElementwiseShearHeatingIcosahedralShellMap_macro_3D.cpp similarity index 99% rename from operators/shear_heating/noarch/P2ElementwiseShearHeatingIcosahedralShellMap_toMatrix_macro_3D.cpp rename to operators/shear_heating/noarch/P2ElementwiseShearHeatingIcosahedralShellMap_toMatrix_P2ElementwiseShearHeatingIcosahedralShellMap_macro_3D.cpp index 3948bfe6..53d7d217 100644 --- a/operators/shear_heating/noarch/P2ElementwiseShearHeatingIcosahedralShellMap_toMatrix_macro_3D.cpp +++ b/operators/shear_heating/noarch/P2ElementwiseShearHeatingIcosahedralShellMap_toMatrix_P2ElementwiseShearHeatingIcosahedralShellMap_macro_3D.cpp @@ -45,6 +45,10 @@ + + + + @@ -58,7 +62,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseShearHeatingIcosahedralShellMap::toMatrix_macro_3D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t * RESTRICT _data_uxEdge, real_t * RESTRICT _data_uxVertex, real_t * RESTRICT _data_uyEdge, real_t * RESTRICT _data_uyVertex, real_t * RESTRICT _data_uzEdge, real_t * RESTRICT _data_uzVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const +void P2ElementwiseShearHeatingIcosahedralShellMap::toMatrix_P2ElementwiseShearHeatingIcosahedralShellMap_macro_3D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t * RESTRICT _data_uxEdge, real_t * RESTRICT _data_uxVertex, real_t * RESTRICT _data_uyEdge, real_t * RESTRICT _data_uyVertex, real_t * RESTRICT _data_uzEdge, real_t * RESTRICT _data_uzVertex, real_t forVertex_0, real_t forVertex_1, real_t forVertex_2, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t rayVertex_2, real_t refVertex_0, real_t refVertex_1, real_t refVertex_2, real_t thrVertex_0, real_t thrVertex_1, real_t thrVertex_2 ) const { { const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983}; diff --git a/operators/shear_heating/noarch/P2ElementwiseShearHeating_apply_macro_2D.cpp b/operators/shear_heating/noarch/P2ElementwiseShearHeating_apply_P2ElementwiseShearHeating_macro_2D.cpp similarity index 98% rename from operators/shear_heating/noarch/P2ElementwiseShearHeating_apply_macro_2D.cpp rename to operators/shear_heating/noarch/P2ElementwiseShearHeating_apply_P2ElementwiseShearHeating_macro_2D.cpp index 42127512..8e145f3f 100644 --- a/operators/shear_heating/noarch/P2ElementwiseShearHeating_apply_macro_2D.cpp +++ b/operators/shear_heating/noarch/P2ElementwiseShearHeating_apply_P2ElementwiseShearHeating_macro_2D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseShearHeating::apply_macro_2D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t * RESTRICT _data_uxEdge, real_t * RESTRICT _data_uxVertex, real_t * RESTRICT _data_uyEdge, real_t * RESTRICT _data_uyVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseShearHeating::apply_P2ElementwiseShearHeating_macro_2D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t * RESTRICT _data_uxEdge, real_t * RESTRICT _data_uxVertex, real_t * RESTRICT _data_uyEdge, real_t * RESTRICT _data_uyVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669}; diff --git a/operators/shear_heating/noarch/P2ElementwiseShearHeating_apply_macro_3D.cpp b/operators/shear_heating/noarch/P2ElementwiseShearHeating_apply_P2ElementwiseShearHeating_macro_3D.cpp similarity index 99% rename from operators/shear_heating/noarch/P2ElementwiseShearHeating_apply_macro_3D.cpp rename to operators/shear_heating/noarch/P2ElementwiseShearHeating_apply_P2ElementwiseShearHeating_macro_3D.cpp index bfa2f45d..b7fd6027 100644 --- a/operators/shear_heating/noarch/P2ElementwiseShearHeating_apply_macro_3D.cpp +++ b/operators/shear_heating/noarch/P2ElementwiseShearHeating_apply_P2ElementwiseShearHeating_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseShearHeating::apply_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t * RESTRICT _data_uxEdge, real_t * RESTRICT _data_uxVertex, real_t * RESTRICT _data_uyEdge, real_t * RESTRICT _data_uyVertex, real_t * RESTRICT _data_uzEdge, real_t * RESTRICT _data_uzVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseShearHeating::apply_P2ElementwiseShearHeating_macro_3D( real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t * RESTRICT _data_uxEdge, real_t * RESTRICT _data_uxVertex, real_t * RESTRICT _data_uyEdge, real_t * RESTRICT _data_uyVertex, real_t * RESTRICT _data_uzEdge, real_t * RESTRICT _data_uzVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983}; diff --git a/operators/shear_heating/noarch/P2ElementwiseShearHeating_computeInverseDiagonalOperatorValues_macro_2D.cpp b/operators/shear_heating/noarch/P2ElementwiseShearHeating_computeInverseDiagonalOperatorValues_P2ElementwiseShearHeating_macro_2D.cpp similarity index 97% rename from operators/shear_heating/noarch/P2ElementwiseShearHeating_computeInverseDiagonalOperatorValues_macro_2D.cpp rename to operators/shear_heating/noarch/P2ElementwiseShearHeating_computeInverseDiagonalOperatorValues_P2ElementwiseShearHeating_macro_2D.cpp index 6f8bbe9d..ba3e6b09 100644 --- a/operators/shear_heating/noarch/P2ElementwiseShearHeating_computeInverseDiagonalOperatorValues_macro_2D.cpp +++ b/operators/shear_heating/noarch/P2ElementwiseShearHeating_computeInverseDiagonalOperatorValues_P2ElementwiseShearHeating_macro_2D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseShearHeating::computeInverseDiagonalOperatorValues_macro_2D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_uxEdge, real_t * RESTRICT _data_uxVertex, real_t * RESTRICT _data_uyEdge, real_t * RESTRICT _data_uyVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseShearHeating::computeInverseDiagonalOperatorValues_P2ElementwiseShearHeating_macro_2D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_uxEdge, real_t * RESTRICT _data_uxVertex, real_t * RESTRICT _data_uyEdge, real_t * RESTRICT _data_uyVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669}; diff --git a/operators/shear_heating/noarch/P2ElementwiseShearHeating_computeInverseDiagonalOperatorValues_macro_3D.cpp b/operators/shear_heating/noarch/P2ElementwiseShearHeating_computeInverseDiagonalOperatorValues_P2ElementwiseShearHeating_macro_3D.cpp similarity index 99% rename from operators/shear_heating/noarch/P2ElementwiseShearHeating_computeInverseDiagonalOperatorValues_macro_3D.cpp rename to operators/shear_heating/noarch/P2ElementwiseShearHeating_computeInverseDiagonalOperatorValues_P2ElementwiseShearHeating_macro_3D.cpp index f10f8d9b..081669dd 100644 --- a/operators/shear_heating/noarch/P2ElementwiseShearHeating_computeInverseDiagonalOperatorValues_macro_3D.cpp +++ b/operators/shear_heating/noarch/P2ElementwiseShearHeating_computeInverseDiagonalOperatorValues_P2ElementwiseShearHeating_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseShearHeating::computeInverseDiagonalOperatorValues_macro_3D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_uxEdge, real_t * RESTRICT _data_uxVertex, real_t * RESTRICT _data_uyEdge, real_t * RESTRICT _data_uyVertex, real_t * RESTRICT _data_uzEdge, real_t * RESTRICT _data_uzVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseShearHeating::computeInverseDiagonalOperatorValues_P2ElementwiseShearHeating_macro_3D( real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, real_t * RESTRICT _data_uxEdge, real_t * RESTRICT _data_uxVertex, real_t * RESTRICT _data_uyEdge, real_t * RESTRICT _data_uyVertex, real_t * RESTRICT _data_uzEdge, real_t * RESTRICT _data_uzVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983}; diff --git a/operators/shear_heating/noarch/P2ElementwiseShearHeating_toMatrix_macro_2D.cpp b/operators/shear_heating/noarch/P2ElementwiseShearHeating_toMatrix_P2ElementwiseShearHeating_macro_2D.cpp similarity index 98% rename from operators/shear_heating/noarch/P2ElementwiseShearHeating_toMatrix_macro_2D.cpp rename to operators/shear_heating/noarch/P2ElementwiseShearHeating_toMatrix_P2ElementwiseShearHeating_macro_2D.cpp index 98085203..86974681 100644 --- a/operators/shear_heating/noarch/P2ElementwiseShearHeating_toMatrix_macro_2D.cpp +++ b/operators/shear_heating/noarch/P2ElementwiseShearHeating_toMatrix_P2ElementwiseShearHeating_macro_2D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseShearHeating::toMatrix_macro_2D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t * RESTRICT _data_uxEdge, real_t * RESTRICT _data_uxVertex, real_t * RESTRICT _data_uyEdge, real_t * RESTRICT _data_uyVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseShearHeating::toMatrix_P2ElementwiseShearHeating_macro_2D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t * RESTRICT _data_uxEdge, real_t * RESTRICT _data_uxVertex, real_t * RESTRICT _data_uyEdge, real_t * RESTRICT _data_uyVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669}; diff --git a/operators/shear_heating/noarch/P2ElementwiseShearHeating_toMatrix_macro_3D.cpp b/operators/shear_heating/noarch/P2ElementwiseShearHeating_toMatrix_P2ElementwiseShearHeating_macro_3D.cpp similarity index 99% rename from operators/shear_heating/noarch/P2ElementwiseShearHeating_toMatrix_macro_3D.cpp rename to operators/shear_heating/noarch/P2ElementwiseShearHeating_toMatrix_P2ElementwiseShearHeating_macro_3D.cpp index e6b13d44..b896d935 100644 --- a/operators/shear_heating/noarch/P2ElementwiseShearHeating_toMatrix_macro_3D.cpp +++ b/operators/shear_heating/noarch/P2ElementwiseShearHeating_toMatrix_P2ElementwiseShearHeating_macro_3D.cpp @@ -43,6 +43,10 @@ + + + + @@ -56,7 +60,7 @@ namespace hyteg { namespace operatorgeneration { -void P2ElementwiseShearHeating::toMatrix_macro_3D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t * RESTRICT _data_uxEdge, real_t * RESTRICT _data_uxVertex, real_t * RESTRICT _data_uyEdge, real_t * RESTRICT _data_uyVertex, real_t * RESTRICT _data_uzEdge, real_t * RESTRICT _data_uzVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +void P2ElementwiseShearHeating::toMatrix_P2ElementwiseShearHeating_macro_3D( idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_muEdge, real_t * RESTRICT _data_muVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t * RESTRICT _data_uxEdge, real_t * RESTRICT _data_uxVertex, real_t * RESTRICT _data_uyEdge, real_t * RESTRICT _data_uyVertex, real_t * RESTRICT _data_uzEdge, real_t * RESTRICT _data_uzVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_0comp2, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_1comp2, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, real_t macro_vertex_coord_id_2comp2, real_t macro_vertex_coord_id_3comp0, real_t macro_vertex_coord_id_3comp1, real_t macro_vertex_coord_id_3comp2, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const { { const real_t _data_q_w [] = {-0.1333333333333333, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983, 0.074999999999999983}; -- GitLab