diff --git a/generate/requirements.txt b/generate/requirements.txt index 70b3c18a1ea140e1efee581fceb07b41b29e8e78..953304809f461249dd0ba8ed14eb644029e799eb 100644 --- a/generate/requirements.txt +++ b/generate/requirements.txt @@ -1,5 +1,5 @@ --extra-index-url https://test.pypi.org/simple/ -hog @ git+ssh://git@i10git.cs.fau.de/hyteg/hog@516f33ba88809c2174d316883f09221ed0e7ce02 +hog @ git+ssh://git@i10git.cs.fau.de/hyteg/hog@aee9c848e6500bbdecd135482f6ea0e3ef54c78b tomli >= 1.1.0 ; python_version < "3.11" clang-format diff --git a/operators.toml b/operators.toml index 806f89a228fb64cc1de7478b8ff617b4db4443d8..adaae642c0dd4f153448a51c6a5a19ce7e2a3958 100644 --- a/operators.toml +++ b/operators.toml @@ -388,3 +388,69 @@ quadrature = 3 blending = "IcosahedralShellMap" loop-strategy = "sawtooth" optimizations = ["moveconstants", "vectorize", "quadloops"] + +[[advection]] +trial-space = "P2" +test-space = "P2" +form-space-args.coefficient_function_space = "P2" +form-space-args.velocity_function_space = "P2" +dimensions = [2] +quadrature = 3 +loop-strategy = "sawtooth" +optimizations = ["moveconstants", "vectorize", "quadloops"] +blending = "IdentityMap" + +[[advection]] +trial-space = "P2" +test-space = "P2" +form-space-args.coefficient_function_space = "P2" +form-space-args.velocity_function_space = "P2" +dimensions = [2] +quadrature = 3 +loop-strategy = "sawtooth" +optimizations = ["moveconstants", "vectorize", "quadloops"] +blending = "AnnulusMap" + +[[supg_diffusion]] +trial-space = "P2" +test-space = "P2" +form-space-args.diffusivityXdelta_function_space = "P2" +form-space-args.velocity_function_space = "P2" +dimensions = [2] +quadrature = 3 +loop-strategy = "sawtooth" +optimizations = ["moveconstants", "vectorize", "quadloops"] +blending = "IdentityMap" + +[[supg_diffusion]] +trial-space = "P2" +test-space = "P2" +form-space-args.diffusivityXdelta_function_space = "P2" +form-space-args.velocity_function_space = "P2" +dimensions = [2] +quadrature = 3 +loop-strategy = "sawtooth" +optimizations = ["moveconstants", "vectorize", "quadloops"] +blending = "AnnulusMap" + +[[supg_advection]] +trial-space = "P2" +test-space = "P2" +form-space-args.coefficient_function_space = "P2" +form-space-args.velocity_function_space = "P2" +dimensions = [2] +quadrature = 3 +loop-strategy = "sawtooth" +optimizations = ["moveconstants", "vectorize", "quadloops"] +blending = "IdentityMap" + +[[supg_advection]] +trial-space = "P2" +test-space = "P2" +form-space-args.coefficient_function_space = "P2" +form-space-args.velocity_function_space = "P2" +dimensions = [2] +quadrature = 3 +loop-strategy = "sawtooth" +optimizations = ["moveconstants", "vectorize", "quadloops"] +blending = "AnnulusMap" diff --git a/operators/CMakeLists.txt b/operators/CMakeLists.txt index b8a33d29263fbe077fe4628a9ddde5760822171c..ceb4c8aaa70dcc44e6129a7dfed4b786f8cdf47c 100644 --- a/operators/CMakeLists.txt +++ b/operators/CMakeLists.txt @@ -7,6 +7,7 @@ endif() add_compile_options( "-Wno-unused-variable" ) +add_subdirectory(advection) add_subdirectory(curl_curl) add_subdirectory(diffusion) add_subdirectory(div_k_grad) @@ -18,3 +19,5 @@ add_subdirectory(gradient) add_subdirectory(k_mass) add_subdirectory(mass) add_subdirectory(shear_heating) +add_subdirectory(supg_advection) +add_subdirectory(supg_diffusion) diff --git a/operators/advection/CMakeLists.txt b/operators/advection/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..825f65cbc0b131209495b7914e44508bb89ffb5b --- /dev/null +++ b/operators/advection/CMakeLists.txt @@ -0,0 +1,52 @@ +add_library( opgen-advection + + P2ElementwiseAdvection.cpp + P2ElementwiseAdvection.hpp + P2ElementwiseAdvectionAnnulusMap.cpp + P2ElementwiseAdvectionAnnulusMap.hpp +) + +if(HYTEG_BUILD_WITH_AVX AND WALBERLA_DOUBLE_ACCURACY) + target_sources(opgen-advection PRIVATE + + avx/P2ElementwiseAdvectionAnnulusMap_apply_P2ElementwiseAdvectionAnnulusMap_macro_2D.cpp + avx/P2ElementwiseAdvectionAnnulusMap_computeInverseDiagonalOperatorValues_P2ElementwiseAdvectionAnnulusMap_macro_2D.cpp + avx/P2ElementwiseAdvection_apply_P2ElementwiseAdvection_macro_2D.cpp + avx/P2ElementwiseAdvection_computeInverseDiagonalOperatorValues_P2ElementwiseAdvection_macro_2D.cpp + noarch/P2ElementwiseAdvectionAnnulusMap_toMatrix_P2ElementwiseAdvectionAnnulusMap_macro_2D.cpp + noarch/P2ElementwiseAdvection_toMatrix_P2ElementwiseAdvection_macro_2D.cpp + ) + + set_source_files_properties( + + avx/P2ElementwiseAdvectionAnnulusMap_apply_P2ElementwiseAdvectionAnnulusMap_macro_2D.cpp + avx/P2ElementwiseAdvectionAnnulusMap_computeInverseDiagonalOperatorValues_P2ElementwiseAdvectionAnnulusMap_macro_2D.cpp + avx/P2ElementwiseAdvection_apply_P2ElementwiseAdvection_macro_2D.cpp + avx/P2ElementwiseAdvection_computeInverseDiagonalOperatorValues_P2ElementwiseAdvection_macro_2D.cpp + + PROPERTIES COMPILE_OPTIONS ${HYTEG_COMPILER_NATIVE_FLAGS} + ) +else() + if(HYTEG_BUILD_WITH_AVX AND NOT WALBERLA_DOUBLE_ACCURACY) + message(WARNING "AVX vectorization only available in double precision. Using scalar kernels.") + endif() + + target_sources(opgen-advection PRIVATE + + noarch/P2ElementwiseAdvectionAnnulusMap_apply_P2ElementwiseAdvectionAnnulusMap_macro_2D.cpp + noarch/P2ElementwiseAdvectionAnnulusMap_computeInverseDiagonalOperatorValues_P2ElementwiseAdvectionAnnulusMap_macro_2D.cpp + noarch/P2ElementwiseAdvectionAnnulusMap_toMatrix_P2ElementwiseAdvectionAnnulusMap_macro_2D.cpp + noarch/P2ElementwiseAdvection_apply_P2ElementwiseAdvection_macro_2D.cpp + noarch/P2ElementwiseAdvection_computeInverseDiagonalOperatorValues_P2ElementwiseAdvection_macro_2D.cpp + noarch/P2ElementwiseAdvection_toMatrix_P2ElementwiseAdvection_macro_2D.cpp + ) +endif() + +if (HYTEG_BUILD_WITH_PETSC) + target_link_libraries(opgen-advection PUBLIC PETSc::PETSc) +endif () +if (WALBERLA_BUILD_WITH_HALF_PRECISION_SUPPORT) + target_compile_features(opgen-advection PUBLIC cxx_std_23) +else () + target_compile_features(opgen-advection PUBLIC cxx_std_17) +endif () diff --git a/operators/advection/P2ElementwiseAdvection.cpp b/operators/advection/P2ElementwiseAdvection.cpp new file mode 100644 index 0000000000000000000000000000000000000000..3b60e138507e06439202de824e3d38e9d99d63fa --- /dev/null +++ b/operators/advection/P2ElementwiseAdvection.cpp @@ -0,0 +1,391 @@ +/* +* Copyright (c) 2017-2024 Nils Kohl, Daniel Bauer, Fabian Böhm. +* +* This file is part of HyTeG +* (see https://i10git.cs.fau.de/hyteg/hyteg). +* +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. +* +* You should have received a copy of the GNU General Public License +* along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +/* +* The entire file was generated with the HyTeG Operator Generator. +* +* Avoid modifying this file. If buggy, consider fixing the generator itself. +*/ + +// Unfortunately, the inverse diagonal kernel wrapper triggers a GCC bug (maybe +// (related to) https://gcc.gnu.org/bugzilla/show_bug.cgi?id=107087) causing a +// warning in an internal standard library header (bits/stl_algobase.h). As a +// workaround, we disable the warning and include this header indirectly through +// a public header. +#include <waLBerlaDefinitions.h> +#ifdef WALBERLA_CXX_COMPILER_IS_GNU +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wnonnull" +#endif +#include <cmath> +#ifdef WALBERLA_CXX_COMPILER_IS_GNU +#pragma GCC diagnostic pop +#endif + +#include "P2ElementwiseAdvection.hpp" + +#define FUNC_PREFIX + +namespace hyteg { + +namespace operatorgeneration { + +P2ElementwiseAdvection::P2ElementwiseAdvection( const std::shared_ptr< PrimitiveStorage >& storage, + size_t minLevel, + size_t maxLevel, + const P2Function< real_t >& _cp, + const P2Function< real_t >& _ux, + const P2Function< real_t >& _uy ) +: Operator( storage, minLevel, maxLevel ) +, cp( _cp ) +, ux( _ux ) +, uy( _uy ) +{} + +void P2ElementwiseAdvection::apply( const P2Function< real_t >& src, + const P2Function< real_t >& dst, + uint_t level, + DoFType flag, + UpdateType updateType ) const +{ + this->startTiming( "apply" ); + + // Make sure that halos are up-to-date + this->timingTree_->start( "pre-communication" ); + if ( this->storage_->hasGlobalCells() ) + { + WALBERLA_ABORT( "Not implemented." ); + } + else + { + communication::syncFunctionBetweenPrimitives( src, level, communication::syncDirection_t::LOW2HIGH ); + communication::syncFunctionBetweenPrimitives( cp, level, communication::syncDirection_t::LOW2HIGH ); + communication::syncFunctionBetweenPrimitives( ux, level, communication::syncDirection_t::LOW2HIGH ); + communication::syncFunctionBetweenPrimitives( uy, level, communication::syncDirection_t::LOW2HIGH ); + } + this->timingTree_->stop( "pre-communication" ); + + if ( updateType == Replace ) + { + // We need to zero the destination array (including halos). + // However, we must not zero out anything that is not flagged with the specified BCs. + // Therefore, we first zero out everything that flagged, and then, later, + // the halos of the highest dim primitives. + dst.interpolate( walberla::numeric_cast< real_t >( 0 ), level, flag ); + } + + if ( storage_->hasGlobalCells() ) + { + WALBERLA_ABORT( "Not implemented." ); + } + else + { + for ( auto& it : storage_->getFaces() ) + { + Face& face = *it.second; + + // get hold of the actual numerical data in the functions + real_t* _data_srcVertex = face.getData( src.getVertexDoFFunction().getFaceDataID() )->getPointer( level ); + real_t* _data_srcEdge = face.getData( src.getEdgeDoFFunction().getFaceDataID() )->getPointer( level ); + real_t* _data_dstVertex = face.getData( dst.getVertexDoFFunction().getFaceDataID() )->getPointer( level ); + real_t* _data_dstEdge = face.getData( dst.getEdgeDoFFunction().getFaceDataID() )->getPointer( level ); + real_t* _data_cpVertex = face.getData( cp.getVertexDoFFunction().getFaceDataID() )->getPointer( level ); + real_t* _data_cpEdge = face.getData( cp.getEdgeDoFFunction().getFaceDataID() )->getPointer( level ); + real_t* _data_uxVertex = face.getData( ux.getVertexDoFFunction().getFaceDataID() )->getPointer( level ); + real_t* _data_uxEdge = face.getData( ux.getEdgeDoFFunction().getFaceDataID() )->getPointer( level ); + real_t* _data_uyVertex = face.getData( uy.getVertexDoFFunction().getFaceDataID() )->getPointer( level ); + real_t* _data_uyEdge = face.getData( uy.getEdgeDoFFunction().getFaceDataID() )->getPointer( level ); + + // Zero out dst halos only + // + // This is also necessary when using update type == Add. + // During additive comm we then skip zeroing the data on the lower-dim primitives. + for ( const auto& idx : vertexdof::macroface::Iterator( level ) ) + { + if ( vertexdof::macroface::isVertexOnBoundary( level, idx ) ) + { + auto arrayIdx = vertexdof::macroface::index( level, idx.x(), idx.y() ); + _data_dstVertex[arrayIdx] = walberla::numeric_cast< real_t >( 0 ); + } + } + for ( const auto& idx : edgedof::macroface::Iterator( level ) ) + { + for ( const auto& orientation : edgedof::faceLocalEdgeDoFOrientations ) + { + if ( !edgedof::macroface::isInnerEdgeDoF( level, idx, orientation ) ) + { + auto arrayIdx = edgedof::macroface::index( level, idx.x(), idx.y(), orientation ); + _data_dstEdge[arrayIdx] = walberla::numeric_cast< real_t >( 0 ); + } + } + } + + const auto micro_edges_per_macro_edge = (int64_t) levelinfo::num_microedges_per_edge( level ); + const auto micro_edges_per_macro_edge_float = (real_t) levelinfo::num_microedges_per_edge( level ); + const real_t macro_vertex_coord_id_0comp0 = (real_t) face.getCoordinates()[0][0]; + const real_t macro_vertex_coord_id_0comp1 = (real_t) face.getCoordinates()[0][1]; + const real_t macro_vertex_coord_id_1comp0 = (real_t) face.getCoordinates()[1][0]; + const real_t macro_vertex_coord_id_1comp1 = (real_t) face.getCoordinates()[1][1]; + const real_t macro_vertex_coord_id_2comp0 = (real_t) face.getCoordinates()[2][0]; + const real_t macro_vertex_coord_id_2comp1 = (real_t) face.getCoordinates()[2][1]; + + this->timingTree_->start( "kernel" ); + + apply_P2ElementwiseAdvection_macro_2D( + + _data_cpEdge, + _data_cpVertex, + _data_dstEdge, + _data_dstVertex, + _data_srcEdge, + _data_srcVertex, + _data_uxEdge, + _data_uxVertex, + _data_uyEdge, + _data_uyVertex, + macro_vertex_coord_id_0comp0, + macro_vertex_coord_id_0comp1, + macro_vertex_coord_id_1comp0, + macro_vertex_coord_id_1comp1, + macro_vertex_coord_id_2comp0, + macro_vertex_coord_id_2comp1, + micro_edges_per_macro_edge, + micro_edges_per_macro_edge_float ); + + this->timingTree_->stop( "kernel" ); + } + + // Push result to lower-dimensional primitives + // + this->timingTree_->start( "post-communication" ); + // Note: We could avoid communication here by implementing the apply() also for the respective + // lower dimensional primitives! + dst.getVertexDoFFunction().communicateAdditively< Face, Edge >( + level, DoFType::All ^ flag, *storage_, updateType == Replace ); + dst.getVertexDoFFunction().communicateAdditively< Face, Vertex >( + level, DoFType::All ^ flag, *storage_, updateType == Replace ); + dst.getEdgeDoFFunction().communicateAdditively< Face, Edge >( + level, DoFType::All ^ flag, *storage_, updateType == Replace ); + this->timingTree_->stop( "post-communication" ); + } + + this->stopTiming( "apply" ); +} +void P2ElementwiseAdvection::toMatrix( const std::shared_ptr< SparseMatrixProxy >& mat, + const P2Function< idx_t >& src, + const P2Function< idx_t >& dst, + uint_t level, + DoFType flag ) const +{ + this->startTiming( "toMatrix" ); + + // We currently ignore the flag provided! + if ( flag != All ) + { + WALBERLA_LOG_WARNING_ON_ROOT( "Input flag ignored in toMatrix; using flag = All" ); + } + + if ( storage_->hasGlobalCells() ) + { + this->timingTree_->start( "pre-communication" ); + cp.communicate< Face, Cell >( level ); + cp.communicate< Edge, Cell >( level ); + cp.communicate< Vertex, Cell >( level ); + ux.communicate< Face, Cell >( level ); + ux.communicate< Edge, Cell >( level ); + ux.communicate< Vertex, Cell >( level ); + uy.communicate< Face, Cell >( level ); + uy.communicate< Edge, Cell >( level ); + uy.communicate< Vertex, Cell >( level ); + this->timingTree_->stop( "pre-communication" ); + + WALBERLA_ABORT( "Not implemented." ); + } + else + { + this->timingTree_->start( "pre-communication" ); + communication::syncFunctionBetweenPrimitives( cp, level, communication::syncDirection_t::LOW2HIGH ); + communication::syncFunctionBetweenPrimitives( ux, level, communication::syncDirection_t::LOW2HIGH ); + communication::syncFunctionBetweenPrimitives( uy, level, communication::syncDirection_t::LOW2HIGH ); + this->timingTree_->stop( "pre-communication" ); + + for ( auto& it : storage_->getFaces() ) + { + Face& face = *it.second; + + // get hold of the actual numerical data + idx_t* _data_srcVertex = face.getData( src.getVertexDoFFunction().getFaceDataID() )->getPointer( level ); + idx_t* _data_srcEdge = face.getData( src.getEdgeDoFFunction().getFaceDataID() )->getPointer( level ); + idx_t* _data_dstVertex = face.getData( dst.getVertexDoFFunction().getFaceDataID() )->getPointer( level ); + idx_t* _data_dstEdge = face.getData( dst.getEdgeDoFFunction().getFaceDataID() )->getPointer( level ); + real_t* _data_cpVertex = face.getData( cp.getVertexDoFFunction().getFaceDataID() )->getPointer( level ); + real_t* _data_cpEdge = face.getData( cp.getEdgeDoFFunction().getFaceDataID() )->getPointer( level ); + real_t* _data_uxVertex = face.getData( ux.getVertexDoFFunction().getFaceDataID() )->getPointer( level ); + real_t* _data_uxEdge = face.getData( ux.getEdgeDoFFunction().getFaceDataID() )->getPointer( level ); + real_t* _data_uyVertex = face.getData( uy.getVertexDoFFunction().getFaceDataID() )->getPointer( level ); + real_t* _data_uyEdge = face.getData( uy.getEdgeDoFFunction().getFaceDataID() )->getPointer( level ); + + const auto micro_edges_per_macro_edge = (int64_t) levelinfo::num_microedges_per_edge( level ); + const auto micro_edges_per_macro_edge_float = (real_t) levelinfo::num_microedges_per_edge( level ); + const real_t macro_vertex_coord_id_0comp0 = (real_t) face.getCoordinates()[0][0]; + const real_t macro_vertex_coord_id_0comp1 = (real_t) face.getCoordinates()[0][1]; + const real_t macro_vertex_coord_id_1comp0 = (real_t) face.getCoordinates()[1][0]; + const real_t macro_vertex_coord_id_1comp1 = (real_t) face.getCoordinates()[1][1]; + const real_t macro_vertex_coord_id_2comp0 = (real_t) face.getCoordinates()[2][0]; + const real_t macro_vertex_coord_id_2comp1 = (real_t) face.getCoordinates()[2][1]; + + this->timingTree_->start( "kernel" ); + + toMatrix_P2ElementwiseAdvection_macro_2D( + + _data_cpEdge, + _data_cpVertex, + _data_dstEdge, + _data_dstVertex, + _data_srcEdge, + _data_srcVertex, + _data_uxEdge, + _data_uxVertex, + _data_uyEdge, + _data_uyVertex, + macro_vertex_coord_id_0comp0, + macro_vertex_coord_id_0comp1, + macro_vertex_coord_id_1comp0, + macro_vertex_coord_id_1comp1, + macro_vertex_coord_id_2comp0, + macro_vertex_coord_id_2comp1, + mat, + micro_edges_per_macro_edge, + micro_edges_per_macro_edge_float ); + + this->timingTree_->stop( "kernel" ); + } + } + this->stopTiming( "toMatrix" ); +} +void P2ElementwiseAdvection::computeInverseDiagonalOperatorValues() +{ + this->startTiming( "computeInverseDiagonalOperatorValues" ); + + if ( invDiag_ == nullptr ) + { + invDiag_ = std::make_shared< P2Function< real_t > >( "inverse diagonal entries", storage_, minLevel_, maxLevel_ ); + } + + for ( uint_t level = minLevel_; level <= maxLevel_; level++ ) + { + invDiag_->setToZero( level ); + + if ( storage_->hasGlobalCells() ) + { + this->timingTree_->start( "pre-communication" ); + cp.communicate< Face, Cell >( level ); + cp.communicate< Edge, Cell >( level ); + cp.communicate< Vertex, Cell >( level ); + ux.communicate< Face, Cell >( level ); + ux.communicate< Edge, Cell >( level ); + ux.communicate< Vertex, Cell >( level ); + uy.communicate< Face, Cell >( level ); + uy.communicate< Edge, Cell >( level ); + uy.communicate< Vertex, Cell >( level ); + this->timingTree_->stop( "pre-communication" ); + + WALBERLA_ABORT( "Not implemented." ); + ( *invDiag_ ).invertElementwise( level ); + } + else + { + this->timingTree_->start( "pre-communication" ); + communication::syncFunctionBetweenPrimitives( cp, level, communication::syncDirection_t::LOW2HIGH ); + communication::syncFunctionBetweenPrimitives( ux, level, communication::syncDirection_t::LOW2HIGH ); + communication::syncFunctionBetweenPrimitives( uy, level, communication::syncDirection_t::LOW2HIGH ); + this->timingTree_->stop( "pre-communication" ); + + for ( auto& it : storage_->getFaces() ) + { + Face& face = *it.second; + + // get hold of the actual numerical data + real_t* _data_invDiag_Vertex = + face.getData( ( *invDiag_ ).getVertexDoFFunction().getFaceDataID() )->getPointer( level ); + real_t* _data_invDiag_Edge = face.getData( ( *invDiag_ ).getEdgeDoFFunction().getFaceDataID() )->getPointer( level ); + real_t* _data_cpVertex = face.getData( cp.getVertexDoFFunction().getFaceDataID() )->getPointer( level ); + real_t* _data_cpEdge = face.getData( cp.getEdgeDoFFunction().getFaceDataID() )->getPointer( level ); + real_t* _data_uxVertex = face.getData( ux.getVertexDoFFunction().getFaceDataID() )->getPointer( level ); + real_t* _data_uxEdge = face.getData( ux.getEdgeDoFFunction().getFaceDataID() )->getPointer( level ); + real_t* _data_uyVertex = face.getData( uy.getVertexDoFFunction().getFaceDataID() )->getPointer( level ); + real_t* _data_uyEdge = face.getData( uy.getEdgeDoFFunction().getFaceDataID() )->getPointer( level ); + + const auto micro_edges_per_macro_edge = (int64_t) levelinfo::num_microedges_per_edge( level ); + const auto micro_edges_per_macro_edge_float = (real_t) levelinfo::num_microedges_per_edge( level ); + const real_t macro_vertex_coord_id_0comp0 = (real_t) face.getCoordinates()[0][0]; + const real_t macro_vertex_coord_id_0comp1 = (real_t) face.getCoordinates()[0][1]; + const real_t macro_vertex_coord_id_1comp0 = (real_t) face.getCoordinates()[1][0]; + const real_t macro_vertex_coord_id_1comp1 = (real_t) face.getCoordinates()[1][1]; + const real_t macro_vertex_coord_id_2comp0 = (real_t) face.getCoordinates()[2][0]; + const real_t macro_vertex_coord_id_2comp1 = (real_t) face.getCoordinates()[2][1]; + + this->timingTree_->start( "kernel" ); + + computeInverseDiagonalOperatorValues_P2ElementwiseAdvection_macro_2D( + + _data_cpEdge, + _data_cpVertex, + _data_invDiag_Edge, + _data_invDiag_Vertex, + _data_uxEdge, + _data_uxVertex, + _data_uyEdge, + _data_uyVertex, + macro_vertex_coord_id_0comp0, + macro_vertex_coord_id_0comp1, + macro_vertex_coord_id_1comp0, + macro_vertex_coord_id_1comp1, + macro_vertex_coord_id_2comp0, + macro_vertex_coord_id_2comp1, + micro_edges_per_macro_edge, + micro_edges_per_macro_edge_float ); + + this->timingTree_->stop( "kernel" ); + } + + // Push result to lower-dimensional primitives + // + this->timingTree_->start( "post-communication" ); + // Note: We could avoid communication here by implementing the apply() also for the respective + // lower dimensional primitives! + ( *invDiag_ ).getVertexDoFFunction().communicateAdditively< Face, Edge >( level ); + ( *invDiag_ ).getVertexDoFFunction().communicateAdditively< Face, Vertex >( level ); + ( *invDiag_ ).getEdgeDoFFunction().communicateAdditively< Face, Edge >( level ); + this->timingTree_->stop( "post-communication" ); + ( *invDiag_ ).invertElementwise( level ); + } + } + + this->stopTiming( "computeInverseDiagonalOperatorValues" ); +} +std::shared_ptr< P2Function< real_t > > P2ElementwiseAdvection::getInverseDiagonalValues() const +{ + return invDiag_; +} + +} // namespace operatorgeneration + +} // namespace hyteg diff --git a/operators/advection/P2ElementwiseAdvection.hpp b/operators/advection/P2ElementwiseAdvection.hpp new file mode 100644 index 0000000000000000000000000000000000000000..88d58e6a7afa3ccf0fa354bca06954bdcf8a5b47 --- /dev/null +++ b/operators/advection/P2ElementwiseAdvection.hpp @@ -0,0 +1,183 @@ +/* +* Copyright (c) 2017-2024 Nils Kohl, Daniel Bauer, Fabian Böhm. +* +* This file is part of HyTeG +* (see https://i10git.cs.fau.de/hyteg/hyteg). +* +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. +* +* You should have received a copy of the GNU General Public License +* along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +/* +* The entire file was generated with the HyTeG Operator Generator. +* +* Avoid modifying this file. If buggy, consider fixing the generator itself. +*/ + +#pragma once + +#include "core/DataTypes.h" + +#include "hyteg/LikwidWrapper.hpp" +#include "hyteg/boundary/BoundaryConditions.hpp" +#include "hyteg/communication/Syncing.hpp" +#include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp" +#include "hyteg/operators/Operator.hpp" +#include "hyteg/p2functionspace/P2Function.hpp" +#include "hyteg/primitivestorage/PrimitiveStorage.hpp" +#include "hyteg/solvers/Smoothables.hpp" +#include "hyteg/sparseassembly/SparseMatrixProxy.hpp" +#include "hyteg/types/types.hpp" + +#define FUNC_PREFIX + +namespace hyteg { + +namespace operatorgeneration { + +/// advection operator which needs to be used in combination with SUPG +/// +/// Geometry map: IdentityMap +/// +/// Weak formulation +/// +/// T: trial function (scalar space: Lagrange, degree: 2) +/// s: test function (scalar space: Lagrange, degree: 2) +/// u: velocity function (vectorial space: Lagrange, degree: 2) +/// +/// ∫ cp ( u · ∇T ) s + +class P2ElementwiseAdvection : public Operator< P2Function< real_t >, P2Function< real_t > >, + public OperatorWithInverseDiagonal< P2Function< real_t > > +{ + public: + P2ElementwiseAdvection( const std::shared_ptr< PrimitiveStorage >& storage, + size_t minLevel, + size_t maxLevel, + const P2Function< real_t >& _cp, + const P2Function< real_t >& _ux, + const P2Function< real_t >& _uy ); + + void apply( const P2Function< real_t >& src, + const P2Function< real_t >& dst, + uint_t level, + DoFType flag, + UpdateType updateType = Replace ) const; + + void toMatrix( const std::shared_ptr< SparseMatrixProxy >& mat, + const P2Function< idx_t >& src, + const P2Function< idx_t >& dst, + uint_t level, + DoFType flag ) const; + + void computeInverseDiagonalOperatorValues(); + + std::shared_ptr< P2Function< real_t > > getInverseDiagonalValues() const; + + protected: + private: + /// Integral: P2ElementwiseAdvection + /// - volume element: triangle, dim: 2, vertices: 3, spacedim: 2 + /// - kernel type: apply + /// - loop strategy: SAWTOOTH + /// - quadrature rule: Dunavant 3 | points: 4, degree: 3 + /// - blending map: IdentityMap + /// - operations per element: + /// adds muls divs pows abs assignments function_calls unknown_ops + /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- + /// 404 472 12 0 0 0 0 1 + void apply_P2ElementwiseAdvection_macro_2D( real_t* RESTRICT _data_cpEdge, + real_t* RESTRICT _data_cpVertex, + real_t* RESTRICT _data_dstEdge, + real_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_srcEdge, + real_t* RESTRICT _data_srcVertex, + real_t* RESTRICT _data_uxEdge, + real_t* RESTRICT _data_uxVertex, + real_t* RESTRICT _data_uyEdge, + real_t* RESTRICT _data_uyVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; + + /// Integral: P2ElementwiseAdvection + /// - volume element: triangle, dim: 2, vertices: 3, spacedim: 2 + /// - kernel type: toMatrix + /// - loop strategy: SAWTOOTH + /// - quadrature rule: Dunavant 3 | points: 4, degree: 3 + /// - blending map: IdentityMap + /// - operations per element: + /// adds muls divs pows abs assignments function_calls unknown_ops + /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- + /// 368 436 12 0 0 0 0 4 + void toMatrix_P2ElementwiseAdvection_macro_2D( real_t* RESTRICT _data_cpEdge, + real_t* RESTRICT _data_cpVertex, + idx_t* RESTRICT _data_dstEdge, + idx_t* RESTRICT _data_dstVertex, + idx_t* RESTRICT _data_srcEdge, + idx_t* RESTRICT _data_srcVertex, + real_t* RESTRICT _data_uxEdge, + real_t* RESTRICT _data_uxVertex, + real_t* RESTRICT _data_uyEdge, + real_t* RESTRICT _data_uyVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + std::shared_ptr< SparseMatrixProxy > mat, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; + + /// Integral: P2ElementwiseAdvection + /// - volume element: triangle, dim: 2, vertices: 3, spacedim: 2 + /// - kernel type: computeInverseDiagonalOperatorValues + /// - loop strategy: SAWTOOTH + /// - quadrature rule: Dunavant 3 | points: 4, degree: 3 + /// - blending map: IdentityMap + /// - operations per element: + /// adds muls divs pows abs assignments function_calls unknown_ops + /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- + /// 254 316 12 0 0 0 0 1 + void computeInverseDiagonalOperatorValues_P2ElementwiseAdvection_macro_2D( real_t* RESTRICT _data_cpEdge, + real_t* RESTRICT _data_cpVertex, + real_t* RESTRICT _data_invDiag_Edge, + real_t* RESTRICT _data_invDiag_Vertex, + real_t* RESTRICT _data_uxEdge, + real_t* RESTRICT _data_uxVertex, + real_t* RESTRICT _data_uyEdge, + real_t* RESTRICT _data_uyVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; + + std::shared_ptr< P2Function< real_t > > invDiag_; + P2Function< real_t > cp; + P2Function< real_t > ux; + P2Function< real_t > uy; +}; + +} // namespace operatorgeneration + +} // namespace hyteg diff --git a/operators/advection/P2ElementwiseAdvectionAnnulusMap.cpp b/operators/advection/P2ElementwiseAdvectionAnnulusMap.cpp new file mode 100644 index 0000000000000000000000000000000000000000..c88e9c6b6e7f118575ffcc0220e2f1899bf25528 --- /dev/null +++ b/operators/advection/P2ElementwiseAdvectionAnnulusMap.cpp @@ -0,0 +1,448 @@ +/* +* Copyright (c) 2017-2024 Nils Kohl, Daniel Bauer, Fabian Böhm. +* +* This file is part of HyTeG +* (see https://i10git.cs.fau.de/hyteg/hyteg). +* +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. +* +* You should have received a copy of the GNU General Public License +* along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +/* +* The entire file was generated with the HyTeG Operator Generator. +* +* Avoid modifying this file. If buggy, consider fixing the generator itself. +*/ + +// Unfortunately, the inverse diagonal kernel wrapper triggers a GCC bug (maybe +// (related to) https://gcc.gnu.org/bugzilla/show_bug.cgi?id=107087) causing a +// warning in an internal standard library header (bits/stl_algobase.h). As a +// workaround, we disable the warning and include this header indirectly through +// a public header. +#include <waLBerlaDefinitions.h> +#ifdef WALBERLA_CXX_COMPILER_IS_GNU +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wnonnull" +#endif +#include <cmath> +#ifdef WALBERLA_CXX_COMPILER_IS_GNU +#pragma GCC diagnostic pop +#endif + +#include "P2ElementwiseAdvectionAnnulusMap.hpp" + +#define FUNC_PREFIX + +namespace hyteg { + +namespace operatorgeneration { + +P2ElementwiseAdvectionAnnulusMap::P2ElementwiseAdvectionAnnulusMap( const std::shared_ptr< PrimitiveStorage >& storage, + size_t minLevel, + size_t maxLevel, + const P2Function< real_t >& _cp, + const P2Function< real_t >& _ux, + const P2Function< real_t >& _uy ) +: Operator( storage, minLevel, maxLevel ) +, cp( _cp ) +, ux( _ux ) +, uy( _uy ) +{} + +void P2ElementwiseAdvectionAnnulusMap::apply( const P2Function< real_t >& src, + const P2Function< real_t >& dst, + uint_t level, + DoFType flag, + UpdateType updateType ) const +{ + this->startTiming( "apply" ); + + // Make sure that halos are up-to-date + this->timingTree_->start( "pre-communication" ); + if ( this->storage_->hasGlobalCells() ) + { + WALBERLA_ABORT( "Not implemented." ); + } + else + { + communication::syncFunctionBetweenPrimitives( src, level, communication::syncDirection_t::LOW2HIGH ); + communication::syncFunctionBetweenPrimitives( cp, level, communication::syncDirection_t::LOW2HIGH ); + communication::syncFunctionBetweenPrimitives( ux, level, communication::syncDirection_t::LOW2HIGH ); + communication::syncFunctionBetweenPrimitives( uy, level, communication::syncDirection_t::LOW2HIGH ); + } + this->timingTree_->stop( "pre-communication" ); + + if ( updateType == Replace ) + { + // We need to zero the destination array (including halos). + // However, we must not zero out anything that is not flagged with the specified BCs. + // Therefore, we first zero out everything that flagged, and then, later, + // the halos of the highest dim primitives. + dst.interpolate( walberla::numeric_cast< real_t >( 0 ), level, flag ); + } + + if ( storage_->hasGlobalCells() ) + { + WALBERLA_ABORT( "Not implemented." ); + } + else + { + for ( auto& it : storage_->getFaces() ) + { + Face& face = *it.second; + + // get hold of the actual numerical data in the functions + real_t* _data_srcVertex = face.getData( src.getVertexDoFFunction().getFaceDataID() )->getPointer( level ); + real_t* _data_srcEdge = face.getData( src.getEdgeDoFFunction().getFaceDataID() )->getPointer( level ); + real_t* _data_dstVertex = face.getData( dst.getVertexDoFFunction().getFaceDataID() )->getPointer( level ); + real_t* _data_dstEdge = face.getData( dst.getEdgeDoFFunction().getFaceDataID() )->getPointer( level ); + real_t* _data_cpVertex = face.getData( cp.getVertexDoFFunction().getFaceDataID() )->getPointer( level ); + real_t* _data_cpEdge = face.getData( cp.getEdgeDoFFunction().getFaceDataID() )->getPointer( level ); + real_t* _data_uxVertex = face.getData( ux.getVertexDoFFunction().getFaceDataID() )->getPointer( level ); + real_t* _data_uxEdge = face.getData( ux.getEdgeDoFFunction().getFaceDataID() )->getPointer( level ); + real_t* _data_uyVertex = face.getData( uy.getVertexDoFFunction().getFaceDataID() )->getPointer( level ); + real_t* _data_uyEdge = face.getData( uy.getEdgeDoFFunction().getFaceDataID() )->getPointer( level ); + + // Zero out dst halos only + // + // This is also necessary when using update type == Add. + // During additive comm we then skip zeroing the data on the lower-dim primitives. + for ( const auto& idx : vertexdof::macroface::Iterator( level ) ) + { + if ( vertexdof::macroface::isVertexOnBoundary( level, idx ) ) + { + auto arrayIdx = vertexdof::macroface::index( level, idx.x(), idx.y() ); + _data_dstVertex[arrayIdx] = walberla::numeric_cast< real_t >( 0 ); + } + } + for ( const auto& idx : edgedof::macroface::Iterator( level ) ) + { + for ( const auto& orientation : edgedof::faceLocalEdgeDoFOrientations ) + { + if ( !edgedof::macroface::isInnerEdgeDoF( level, idx, orientation ) ) + { + auto arrayIdx = edgedof::macroface::index( level, idx.x(), idx.y(), orientation ); + _data_dstEdge[arrayIdx] = walberla::numeric_cast< real_t >( 0 ); + } + } + } + + const auto micro_edges_per_macro_edge = (int64_t) levelinfo::num_microedges_per_edge( level ); + const auto micro_edges_per_macro_edge_float = (real_t) levelinfo::num_microedges_per_edge( level ); + const real_t macro_vertex_coord_id_0comp0 = (real_t) face.getCoordinates()[0][0]; + const real_t macro_vertex_coord_id_0comp1 = (real_t) face.getCoordinates()[0][1]; + const real_t macro_vertex_coord_id_1comp0 = (real_t) face.getCoordinates()[1][0]; + const real_t macro_vertex_coord_id_1comp1 = (real_t) face.getCoordinates()[1][1]; + const real_t macro_vertex_coord_id_2comp0 = (real_t) face.getCoordinates()[2][0]; + const real_t macro_vertex_coord_id_2comp1 = (real_t) face.getCoordinates()[2][1]; + WALBERLA_CHECK_NOT_NULLPTR( + std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() ), + "This operator requires the AnnulusMap to be registered as GeometryMap on every macro-cell." ) + real_t radRefVertex = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->radRefVertex(); + real_t radRayVertex = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->radRayVertex(); + real_t refVertex_0 = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->refVertex()[0]; + real_t rayVertex_0 = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->rayVertex()[0]; + real_t thrVertex_0 = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->thrVertex()[0]; + real_t refVertex_1 = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->refVertex()[1]; + real_t rayVertex_1 = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->rayVertex()[1]; + real_t thrVertex_1 = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->thrVertex()[1]; + + this->timingTree_->start( "kernel" ); + + apply_P2ElementwiseAdvectionAnnulusMap_macro_2D( + + _data_cpEdge, + _data_cpVertex, + _data_dstEdge, + _data_dstVertex, + _data_srcEdge, + _data_srcVertex, + _data_uxEdge, + _data_uxVertex, + _data_uyEdge, + _data_uyVertex, + macro_vertex_coord_id_0comp0, + macro_vertex_coord_id_0comp1, + macro_vertex_coord_id_1comp0, + macro_vertex_coord_id_1comp1, + macro_vertex_coord_id_2comp0, + macro_vertex_coord_id_2comp1, + micro_edges_per_macro_edge, + micro_edges_per_macro_edge_float, + radRayVertex, + radRefVertex, + rayVertex_0, + rayVertex_1, + refVertex_0, + refVertex_1, + thrVertex_0, + thrVertex_1 ); + + this->timingTree_->stop( "kernel" ); + } + + // Push result to lower-dimensional primitives + // + this->timingTree_->start( "post-communication" ); + // Note: We could avoid communication here by implementing the apply() also for the respective + // lower dimensional primitives! + dst.getVertexDoFFunction().communicateAdditively< Face, Edge >( + level, DoFType::All ^ flag, *storage_, updateType == Replace ); + dst.getVertexDoFFunction().communicateAdditively< Face, Vertex >( + level, DoFType::All ^ flag, *storage_, updateType == Replace ); + dst.getEdgeDoFFunction().communicateAdditively< Face, Edge >( + level, DoFType::All ^ flag, *storage_, updateType == Replace ); + this->timingTree_->stop( "post-communication" ); + } + + this->stopTiming( "apply" ); +} +void P2ElementwiseAdvectionAnnulusMap::toMatrix( const std::shared_ptr< SparseMatrixProxy >& mat, + const P2Function< idx_t >& src, + const P2Function< idx_t >& dst, + uint_t level, + DoFType flag ) const +{ + this->startTiming( "toMatrix" ); + + // We currently ignore the flag provided! + if ( flag != All ) + { + WALBERLA_LOG_WARNING_ON_ROOT( "Input flag ignored in toMatrix; using flag = All" ); + } + + if ( storage_->hasGlobalCells() ) + { + this->timingTree_->start( "pre-communication" ); + cp.communicate< Face, Cell >( level ); + cp.communicate< Edge, Cell >( level ); + cp.communicate< Vertex, Cell >( level ); + ux.communicate< Face, Cell >( level ); + ux.communicate< Edge, Cell >( level ); + ux.communicate< Vertex, Cell >( level ); + uy.communicate< Face, Cell >( level ); + uy.communicate< Edge, Cell >( level ); + uy.communicate< Vertex, Cell >( level ); + this->timingTree_->stop( "pre-communication" ); + + WALBERLA_ABORT( "Not implemented." ); + } + else + { + this->timingTree_->start( "pre-communication" ); + communication::syncFunctionBetweenPrimitives( cp, level, communication::syncDirection_t::LOW2HIGH ); + communication::syncFunctionBetweenPrimitives( ux, level, communication::syncDirection_t::LOW2HIGH ); + communication::syncFunctionBetweenPrimitives( uy, level, communication::syncDirection_t::LOW2HIGH ); + this->timingTree_->stop( "pre-communication" ); + + for ( auto& it : storage_->getFaces() ) + { + Face& face = *it.second; + + // get hold of the actual numerical data + idx_t* _data_srcVertex = face.getData( src.getVertexDoFFunction().getFaceDataID() )->getPointer( level ); + idx_t* _data_srcEdge = face.getData( src.getEdgeDoFFunction().getFaceDataID() )->getPointer( level ); + idx_t* _data_dstVertex = face.getData( dst.getVertexDoFFunction().getFaceDataID() )->getPointer( level ); + idx_t* _data_dstEdge = face.getData( dst.getEdgeDoFFunction().getFaceDataID() )->getPointer( level ); + real_t* _data_cpVertex = face.getData( cp.getVertexDoFFunction().getFaceDataID() )->getPointer( level ); + real_t* _data_cpEdge = face.getData( cp.getEdgeDoFFunction().getFaceDataID() )->getPointer( level ); + real_t* _data_uxVertex = face.getData( ux.getVertexDoFFunction().getFaceDataID() )->getPointer( level ); + real_t* _data_uxEdge = face.getData( ux.getEdgeDoFFunction().getFaceDataID() )->getPointer( level ); + real_t* _data_uyVertex = face.getData( uy.getVertexDoFFunction().getFaceDataID() )->getPointer( level ); + real_t* _data_uyEdge = face.getData( uy.getEdgeDoFFunction().getFaceDataID() )->getPointer( level ); + + const auto micro_edges_per_macro_edge = (int64_t) levelinfo::num_microedges_per_edge( level ); + const auto micro_edges_per_macro_edge_float = (real_t) levelinfo::num_microedges_per_edge( level ); + const real_t macro_vertex_coord_id_0comp0 = (real_t) face.getCoordinates()[0][0]; + const real_t macro_vertex_coord_id_0comp1 = (real_t) face.getCoordinates()[0][1]; + const real_t macro_vertex_coord_id_1comp0 = (real_t) face.getCoordinates()[1][0]; + const real_t macro_vertex_coord_id_1comp1 = (real_t) face.getCoordinates()[1][1]; + const real_t macro_vertex_coord_id_2comp0 = (real_t) face.getCoordinates()[2][0]; + const real_t macro_vertex_coord_id_2comp1 = (real_t) face.getCoordinates()[2][1]; + WALBERLA_CHECK_NOT_NULLPTR( + std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() ), + "This operator requires the AnnulusMap to be registered as GeometryMap on every macro-cell." ) + real_t radRefVertex = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->radRefVertex(); + real_t radRayVertex = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->radRayVertex(); + real_t refVertex_0 = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->refVertex()[0]; + real_t rayVertex_0 = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->rayVertex()[0]; + real_t thrVertex_0 = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->thrVertex()[0]; + real_t refVertex_1 = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->refVertex()[1]; + real_t rayVertex_1 = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->rayVertex()[1]; + real_t thrVertex_1 = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->thrVertex()[1]; + + this->timingTree_->start( "kernel" ); + + toMatrix_P2ElementwiseAdvectionAnnulusMap_macro_2D( + + _data_cpEdge, + _data_cpVertex, + _data_dstEdge, + _data_dstVertex, + _data_srcEdge, + _data_srcVertex, + _data_uxEdge, + _data_uxVertex, + _data_uyEdge, + _data_uyVertex, + macro_vertex_coord_id_0comp0, + macro_vertex_coord_id_0comp1, + macro_vertex_coord_id_1comp0, + macro_vertex_coord_id_1comp1, + macro_vertex_coord_id_2comp0, + macro_vertex_coord_id_2comp1, + mat, + micro_edges_per_macro_edge, + micro_edges_per_macro_edge_float, + radRayVertex, + radRefVertex, + rayVertex_0, + rayVertex_1, + refVertex_0, + refVertex_1, + thrVertex_0, + thrVertex_1 ); + + this->timingTree_->stop( "kernel" ); + } + } + this->stopTiming( "toMatrix" ); +} +void P2ElementwiseAdvectionAnnulusMap::computeInverseDiagonalOperatorValues() +{ + this->startTiming( "computeInverseDiagonalOperatorValues" ); + + if ( invDiag_ == nullptr ) + { + invDiag_ = std::make_shared< P2Function< real_t > >( "inverse diagonal entries", storage_, minLevel_, maxLevel_ ); + } + + for ( uint_t level = minLevel_; level <= maxLevel_; level++ ) + { + invDiag_->setToZero( level ); + + if ( storage_->hasGlobalCells() ) + { + this->timingTree_->start( "pre-communication" ); + cp.communicate< Face, Cell >( level ); + cp.communicate< Edge, Cell >( level ); + cp.communicate< Vertex, Cell >( level ); + ux.communicate< Face, Cell >( level ); + ux.communicate< Edge, Cell >( level ); + ux.communicate< Vertex, Cell >( level ); + uy.communicate< Face, Cell >( level ); + uy.communicate< Edge, Cell >( level ); + uy.communicate< Vertex, Cell >( level ); + this->timingTree_->stop( "pre-communication" ); + + WALBERLA_ABORT( "Not implemented." ); + ( *invDiag_ ).invertElementwise( level ); + } + else + { + this->timingTree_->start( "pre-communication" ); + communication::syncFunctionBetweenPrimitives( cp, level, communication::syncDirection_t::LOW2HIGH ); + communication::syncFunctionBetweenPrimitives( ux, level, communication::syncDirection_t::LOW2HIGH ); + communication::syncFunctionBetweenPrimitives( uy, level, communication::syncDirection_t::LOW2HIGH ); + this->timingTree_->stop( "pre-communication" ); + + for ( auto& it : storage_->getFaces() ) + { + Face& face = *it.second; + + // get hold of the actual numerical data + real_t* _data_invDiag_Vertex = + face.getData( ( *invDiag_ ).getVertexDoFFunction().getFaceDataID() )->getPointer( level ); + real_t* _data_invDiag_Edge = face.getData( ( *invDiag_ ).getEdgeDoFFunction().getFaceDataID() )->getPointer( level ); + real_t* _data_cpVertex = face.getData( cp.getVertexDoFFunction().getFaceDataID() )->getPointer( level ); + real_t* _data_cpEdge = face.getData( cp.getEdgeDoFFunction().getFaceDataID() )->getPointer( level ); + real_t* _data_uxVertex = face.getData( ux.getVertexDoFFunction().getFaceDataID() )->getPointer( level ); + real_t* _data_uxEdge = face.getData( ux.getEdgeDoFFunction().getFaceDataID() )->getPointer( level ); + real_t* _data_uyVertex = face.getData( uy.getVertexDoFFunction().getFaceDataID() )->getPointer( level ); + real_t* _data_uyEdge = face.getData( uy.getEdgeDoFFunction().getFaceDataID() )->getPointer( level ); + + const auto micro_edges_per_macro_edge = (int64_t) levelinfo::num_microedges_per_edge( level ); + const auto micro_edges_per_macro_edge_float = (real_t) levelinfo::num_microedges_per_edge( level ); + const real_t macro_vertex_coord_id_0comp0 = (real_t) face.getCoordinates()[0][0]; + const real_t macro_vertex_coord_id_0comp1 = (real_t) face.getCoordinates()[0][1]; + const real_t macro_vertex_coord_id_1comp0 = (real_t) face.getCoordinates()[1][0]; + const real_t macro_vertex_coord_id_1comp1 = (real_t) face.getCoordinates()[1][1]; + const real_t macro_vertex_coord_id_2comp0 = (real_t) face.getCoordinates()[2][0]; + const real_t macro_vertex_coord_id_2comp1 = (real_t) face.getCoordinates()[2][1]; + WALBERLA_CHECK_NOT_NULLPTR( + std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() ), + "This operator requires the AnnulusMap to be registered as GeometryMap on every macro-cell." ) + real_t radRefVertex = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->radRefVertex(); + real_t radRayVertex = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->radRayVertex(); + real_t refVertex_0 = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->refVertex()[0]; + real_t rayVertex_0 = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->rayVertex()[0]; + real_t thrVertex_0 = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->thrVertex()[0]; + real_t refVertex_1 = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->refVertex()[1]; + real_t rayVertex_1 = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->rayVertex()[1]; + real_t thrVertex_1 = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->thrVertex()[1]; + + this->timingTree_->start( "kernel" ); + + computeInverseDiagonalOperatorValues_P2ElementwiseAdvectionAnnulusMap_macro_2D( + + _data_cpEdge, + _data_cpVertex, + _data_invDiag_Edge, + _data_invDiag_Vertex, + _data_uxEdge, + _data_uxVertex, + _data_uyEdge, + _data_uyVertex, + macro_vertex_coord_id_0comp0, + macro_vertex_coord_id_0comp1, + macro_vertex_coord_id_1comp0, + macro_vertex_coord_id_1comp1, + macro_vertex_coord_id_2comp0, + macro_vertex_coord_id_2comp1, + micro_edges_per_macro_edge, + micro_edges_per_macro_edge_float, + radRayVertex, + radRefVertex, + rayVertex_0, + rayVertex_1, + refVertex_0, + refVertex_1, + thrVertex_0, + thrVertex_1 ); + + this->timingTree_->stop( "kernel" ); + } + + // Push result to lower-dimensional primitives + // + this->timingTree_->start( "post-communication" ); + // Note: We could avoid communication here by implementing the apply() also for the respective + // lower dimensional primitives! + ( *invDiag_ ).getVertexDoFFunction().communicateAdditively< Face, Edge >( level ); + ( *invDiag_ ).getVertexDoFFunction().communicateAdditively< Face, Vertex >( level ); + ( *invDiag_ ).getEdgeDoFFunction().communicateAdditively< Face, Edge >( level ); + this->timingTree_->stop( "post-communication" ); + ( *invDiag_ ).invertElementwise( level ); + } + } + + this->stopTiming( "computeInverseDiagonalOperatorValues" ); +} +std::shared_ptr< P2Function< real_t > > P2ElementwiseAdvectionAnnulusMap::getInverseDiagonalValues() const +{ + return invDiag_; +} + +} // namespace operatorgeneration + +} // namespace hyteg diff --git a/operators/advection/P2ElementwiseAdvectionAnnulusMap.hpp b/operators/advection/P2ElementwiseAdvectionAnnulusMap.hpp new file mode 100644 index 0000000000000000000000000000000000000000..f1f9b111c6b57030fe580644ac9727c00f1f1a12 --- /dev/null +++ b/operators/advection/P2ElementwiseAdvectionAnnulusMap.hpp @@ -0,0 +1,208 @@ +/* +* Copyright (c) 2017-2024 Nils Kohl, Daniel Bauer, Fabian Böhm. +* +* This file is part of HyTeG +* (see https://i10git.cs.fau.de/hyteg/hyteg). +* +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. +* +* You should have received a copy of the GNU General Public License +* along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +/* +* The entire file was generated with the HyTeG Operator Generator. +* +* Avoid modifying this file. If buggy, consider fixing the generator itself. +*/ + +#pragma once + +#include "core/DataTypes.h" + +#include "hyteg/LikwidWrapper.hpp" +#include "hyteg/boundary/BoundaryConditions.hpp" +#include "hyteg/communication/Syncing.hpp" +#include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp" +#include "hyteg/geometry/AnnulusMap.hpp" +#include "hyteg/operators/Operator.hpp" +#include "hyteg/p2functionspace/P2Function.hpp" +#include "hyteg/primitivestorage/PrimitiveStorage.hpp" +#include "hyteg/solvers/Smoothables.hpp" +#include "hyteg/sparseassembly/SparseMatrixProxy.hpp" +#include "hyteg/types/types.hpp" + +#define FUNC_PREFIX + +namespace hyteg { + +namespace operatorgeneration { + +/// advection operator which needs to be used in combination with SUPG +/// +/// Geometry map: AnnulusMap +/// +/// Weak formulation +/// +/// T: trial function (scalar space: Lagrange, degree: 2) +/// s: test function (scalar space: Lagrange, degree: 2) +/// u: velocity function (vectorial space: Lagrange, degree: 2) +/// +/// ∫ cp ( u · ∇T ) s + +class P2ElementwiseAdvectionAnnulusMap : public Operator< P2Function< real_t >, P2Function< real_t > >, + public OperatorWithInverseDiagonal< P2Function< real_t > > +{ + public: + P2ElementwiseAdvectionAnnulusMap( const std::shared_ptr< PrimitiveStorage >& storage, + size_t minLevel, + size_t maxLevel, + const P2Function< real_t >& _cp, + const P2Function< real_t >& _ux, + const P2Function< real_t >& _uy ); + + void apply( const P2Function< real_t >& src, + const P2Function< real_t >& dst, + uint_t level, + DoFType flag, + UpdateType updateType = Replace ) const; + + void toMatrix( const std::shared_ptr< SparseMatrixProxy >& mat, + const P2Function< idx_t >& src, + const P2Function< idx_t >& dst, + uint_t level, + DoFType flag ) const; + + void computeInverseDiagonalOperatorValues(); + + std::shared_ptr< P2Function< real_t > > getInverseDiagonalValues() const; + + protected: + private: + /// Integral: P2ElementwiseAdvectionAnnulusMap + /// - volume element: triangle, dim: 2, vertices: 3, spacedim: 2 + /// - kernel type: apply + /// - loop strategy: SAWTOOTH + /// - quadrature rule: Dunavant 3 | points: 4, degree: 3 + /// - blending map: AnnulusMap + /// - operations per element: + /// adds muls divs pows abs assignments function_calls unknown_ops + /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- + /// 640 892 20 12 0 0 0 1 + void apply_P2ElementwiseAdvectionAnnulusMap_macro_2D( real_t* RESTRICT _data_cpEdge, + real_t* RESTRICT _data_cpVertex, + real_t* RESTRICT _data_dstEdge, + real_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_srcEdge, + real_t* RESTRICT _data_srcVertex, + real_t* RESTRICT _data_uxEdge, + real_t* RESTRICT _data_uxVertex, + real_t* RESTRICT _data_uyEdge, + real_t* RESTRICT _data_uyVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t refVertex_0, + real_t refVertex_1, + real_t thrVertex_0, + real_t thrVertex_1 ) const; + + /// Integral: P2ElementwiseAdvectionAnnulusMap + /// - volume element: triangle, dim: 2, vertices: 3, spacedim: 2 + /// - kernel type: toMatrix + /// - loop strategy: SAWTOOTH + /// - quadrature rule: Dunavant 3 | points: 4, degree: 3 + /// - blending map: AnnulusMap + /// - operations per element: + /// adds muls divs pows abs assignments function_calls unknown_ops + /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- + /// 604 856 20 12 0 0 0 4 + void toMatrix_P2ElementwiseAdvectionAnnulusMap_macro_2D( real_t* RESTRICT _data_cpEdge, + real_t* RESTRICT _data_cpVertex, + idx_t* RESTRICT _data_dstEdge, + idx_t* RESTRICT _data_dstVertex, + idx_t* RESTRICT _data_srcEdge, + idx_t* RESTRICT _data_srcVertex, + real_t* RESTRICT _data_uxEdge, + real_t* RESTRICT _data_uxVertex, + real_t* RESTRICT _data_uyEdge, + real_t* RESTRICT _data_uyVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + std::shared_ptr< SparseMatrixProxy > mat, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t refVertex_0, + real_t refVertex_1, + real_t thrVertex_0, + real_t thrVertex_1 ) const; + + /// Integral: P2ElementwiseAdvectionAnnulusMap + /// - volume element: triangle, dim: 2, vertices: 3, spacedim: 2 + /// - kernel type: computeInverseDiagonalOperatorValues + /// - loop strategy: SAWTOOTH + /// - quadrature rule: Dunavant 3 | points: 4, degree: 3 + /// - blending map: AnnulusMap + /// - operations per element: + /// adds muls divs pows abs assignments function_calls unknown_ops + /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- + /// 490 736 20 12 0 0 0 1 + void computeInverseDiagonalOperatorValues_P2ElementwiseAdvectionAnnulusMap_macro_2D( real_t* RESTRICT _data_cpEdge, + real_t* RESTRICT _data_cpVertex, + real_t* RESTRICT _data_invDiag_Edge, + real_t* RESTRICT _data_invDiag_Vertex, + real_t* RESTRICT _data_uxEdge, + real_t* RESTRICT _data_uxVertex, + real_t* RESTRICT _data_uyEdge, + real_t* RESTRICT _data_uyVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t refVertex_0, + real_t refVertex_1, + real_t thrVertex_0, + real_t thrVertex_1 ) const; + + std::shared_ptr< P2Function< real_t > > invDiag_; + P2Function< real_t > cp; + P2Function< real_t > ux; + P2Function< real_t > uy; +}; + +} // namespace operatorgeneration + +} // namespace hyteg diff --git a/operators/advection/avx/P2ElementwiseAdvectionAnnulusMap_apply_P2ElementwiseAdvectionAnnulusMap_macro_2D.cpp b/operators/advection/avx/P2ElementwiseAdvectionAnnulusMap_apply_P2ElementwiseAdvectionAnnulusMap_macro_2D.cpp new file mode 100644 index 0000000000000000000000000000000000000000..96bbe2222a72a27518f8b3ff4762b3bb1919305d --- /dev/null +++ b/operators/advection/avx/P2ElementwiseAdvectionAnnulusMap_apply_P2ElementwiseAdvectionAnnulusMap_macro_2D.cpp @@ -0,0 +1,1193 @@ +/* +* Copyright (c) 2017-2024 Nils Kohl, Daniel Bauer, Fabian Böhm. +* +* This file is part of HyTeG +* (see https://i10git.cs.fau.de/hyteg/hyteg). +* +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. +* +* You should have received a copy of the GNU General Public License +* along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +/* +* The entire file was generated with the HyTeG Operator Generator. +* +* Avoid modifying this file. If buggy, consider fixing the generator itself. +*/ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +#include "../P2ElementwiseAdvectionAnnulusMap.hpp" + +#define FUNC_PREFIX + +namespace hyteg { + +namespace operatorgeneration { + +void P2ElementwiseAdvectionAnnulusMap::apply_P2ElementwiseAdvectionAnnulusMap_macro_2D( real_t * RESTRICT _data_cpEdge, real_t * RESTRICT _data_cpVertex, real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t * RESTRICT _data_uxEdge, real_t * RESTRICT _data_uxVertex, real_t * RESTRICT _data_uyEdge, real_t * RESTRICT _data_uyVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const +{ + { + const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669}; + + const real_t _data_q_p_0 [] = {0.33333333333333331, 0.20000000000000001, 0.59999999999999998, 0.20000000000000001}; + + const real_t _data_q_p_1 [] = {0.33333333333333331, 0.59999999999999998, 0.20000000000000001, 0.20000000000000001}; + + const real_t tmp_coords_jac_0_GRAY = 1.0 / (micro_edges_per_macro_edge_float)*1.0; + const real_t p_affine_const_0_0_GRAY = macro_vertex_coord_id_0comp0; + const real_t p_affine_const_0_1_GRAY = macro_vertex_coord_id_0comp1; + const real_t p_affine_const_1_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0); + const real_t p_affine_const_1_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1); + const real_t p_affine_const_2_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0); + const real_t p_affine_const_2_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1); + const real_t jac_affine_0_0_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_1_0_GRAY; + const real_t jac_affine_0_1_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_2_0_GRAY; + const real_t jac_affine_1_0_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_1_1_GRAY; + const real_t jac_affine_1_1_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_2_1_GRAY; + const real_t tmp_coords_jac_1_GRAY = jac_affine_0_0_GRAY*jac_affine_1_1_GRAY - jac_affine_0_1_GRAY*jac_affine_1_0_GRAY; + const real_t tmp_coords_jac_2_GRAY = 1.0 / (tmp_coords_jac_1_GRAY); + const real_t jac_affine_inv_0_0_GRAY = jac_affine_1_1_GRAY*tmp_coords_jac_2_GRAY; + const real_t jac_affine_inv_0_1_GRAY = -jac_affine_0_1_GRAY*tmp_coords_jac_2_GRAY; + const real_t jac_affine_inv_1_0_GRAY = -jac_affine_1_0_GRAY*tmp_coords_jac_2_GRAY; + const real_t jac_affine_inv_1_1_GRAY = jac_affine_0_0_GRAY*tmp_coords_jac_2_GRAY; + const real_t abs_det_jac_affine_GRAY = abs(tmp_coords_jac_1_GRAY); + const real_t tmp_qloop_1 = -rayVertex_1 + thrVertex_1; + const real_t tmp_qloop_7 = rayVertex_0 - thrVertex_0; + const real_t tmp_qloop_8 = -tmp_qloop_7; + const real_t tmp_qloop_9 = 1.0 / (tmp_qloop_1*(-rayVertex_0 + refVertex_0) - tmp_qloop_8*(-rayVertex_1 + refVertex_1)); + const real_t tmp_qloop_10 = -radRayVertex + radRefVertex; + const real_t tmp_qloop_11 = tmp_qloop_10*tmp_qloop_9; + const real_t tmp_qloop_12 = tmp_qloop_11*1.0; + { + /* FaceType.GRAY */ + for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1) + { + for (int64_t ctr_0 = 0; ctr_0 < (int64_t)((-ctr_1 + micro_edges_per_macro_edge) / (4)) * (4); ctr_0 += 4) + { + + const int64_t phantom_ctr_0 = ctr_0; + real_t _data_float_loop_ctr_array_dim_0[4]; + _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0; + _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1; + _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2; + _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3; + real_t _data_float_loop_ctr_array_dim_1[4]; + _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1; + + const __m256d p_affine_0_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)); + const __m256d p_affine_0_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)); + const __m256d p_affine_1_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)); + const __m256d p_affine_1_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)); + const __m256d p_affine_2_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)); + const __m256d p_affine_2_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)); + const __m256d src_dof_0 = _mm256_loadu_pd(& _data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]); + const __m256d src_dof_1 = _mm256_loadu_pd(& _data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]); + const __m256d src_dof_2 = _mm256_loadu_pd(& _data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]); + const __m256d src_dof_3 = _mm256_loadu_pd(& _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]); + const __m256d src_dof_4 = _mm256_loadu_pd(& _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]); + const __m256d src_dof_5 = _mm256_loadu_pd(& _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]); + const __m256d cp_dof_0 = _mm256_loadu_pd(& _data_cpVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]); + const __m256d cp_dof_1 = _mm256_loadu_pd(& _data_cpVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]); + const __m256d cp_dof_2 = _mm256_loadu_pd(& _data_cpVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]); + const __m256d cp_dof_3 = _mm256_loadu_pd(& _data_cpEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]); + const __m256d cp_dof_4 = _mm256_loadu_pd(& _data_cpEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]); + const __m256d cp_dof_5 = _mm256_loadu_pd(& _data_cpEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]); + const __m256d ux_dof_0 = _mm256_loadu_pd(& _data_uxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]); + const __m256d ux_dof_1 = _mm256_loadu_pd(& _data_uxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]); + const __m256d ux_dof_2 = _mm256_loadu_pd(& _data_uxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]); + const __m256d ux_dof_3 = _mm256_loadu_pd(& _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]); + const __m256d ux_dof_4 = _mm256_loadu_pd(& _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]); + const __m256d ux_dof_5 = _mm256_loadu_pd(& _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]); + const __m256d uy_dof_0 = _mm256_loadu_pd(& _data_uyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]); + const __m256d uy_dof_1 = _mm256_loadu_pd(& _data_uyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]); + const __m256d uy_dof_2 = _mm256_loadu_pd(& _data_uyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]); + const __m256d uy_dof_3 = _mm256_loadu_pd(& _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]); + const __m256d uy_dof_4 = _mm256_loadu_pd(& _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]); + const __m256d uy_dof_5 = _mm256_loadu_pd(& _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]); + __m256d q_acc_0_0 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_1 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_2 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_3 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_4 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_5 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_0 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_1 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_2 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_3 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_4 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_5 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_0 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_1 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_2 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_3 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_4 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_5 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_0 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_1 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_2 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_3 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_4 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_5 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_4_0 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_4_1 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_4_2 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_4_3 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_4_4 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_4_5 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_5_0 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_5_1 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_5_2 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_5_3 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_5_4 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_5_5 = _mm256_set_pd(0.0,0.0,0.0,0.0); + for (int64_t q = 0; q < 4; q += 1) + { + const __m256d tmp_qloop_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_1_0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_2_0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),p_affine_0_0); + const __m256d tmp_qloop_2 = _mm256_mul_pd(tmp_qloop_0,tmp_qloop_0); + const __m256d tmp_qloop_3 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_1_1),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_2_1),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),p_affine_0_1); + const __m256d tmp_qloop_4 = _mm256_mul_pd(tmp_qloop_3,tmp_qloop_3); + const __m256d tmp_qloop_5 = _mm256_add_pd(tmp_qloop_2,tmp_qloop_4); + const __m256d tmp_qloop_6 = _mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_sqrt_pd(tmp_qloop_5)); + const __m256d tmp_qloop_13 = _mm256_mul_pd(tmp_qloop_6,_mm256_set_pd(tmp_qloop_12,tmp_qloop_12,tmp_qloop_12,tmp_qloop_12)); + const __m256d tmp_qloop_14 = _mm256_mul_pd(tmp_qloop_13,_mm256_set_pd(tmp_qloop_1,tmp_qloop_1,tmp_qloop_1,tmp_qloop_1)); + const __m256d tmp_qloop_15 = _mm256_div_pd(_mm256_sqrt_pd(tmp_qloop_5),_mm256_mul_pd(tmp_qloop_5,tmp_qloop_5)); + const __m256d tmp_qloop_16 = _mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(rayVertex_0,rayVertex_0,rayVertex_0,rayVertex_0)),tmp_qloop_0),_mm256_set_pd(tmp_qloop_1,tmp_qloop_1,tmp_qloop_1,tmp_qloop_1)),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(rayVertex_1,rayVertex_1,rayVertex_1,rayVertex_1)),tmp_qloop_3),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_8,tmp_qloop_8,tmp_qloop_8,tmp_qloop_8))),_mm256_set_pd(tmp_qloop_11,tmp_qloop_11,tmp_qloop_11,tmp_qloop_11)),_mm256_set_pd(radRayVertex,radRayVertex,radRayVertex,radRayVertex)); + const __m256d tmp_qloop_17 = _mm256_mul_pd(tmp_qloop_15,tmp_qloop_16); + const __m256d tmp_qloop_18 = _mm256_mul_pd(tmp_qloop_17,_mm256_set_pd(1.0,1.0,1.0,1.0)); + const __m256d tmp_qloop_19 = _mm256_mul_pd(tmp_qloop_13,_mm256_set_pd(tmp_qloop_8,tmp_qloop_8,tmp_qloop_8,tmp_qloop_8)); + const __m256d tmp_qloop_20 = _mm256_mul_pd(tmp_qloop_0,tmp_qloop_18); + const __m256d tmp_qloop_23 = _mm256_mul_pd(tmp_qloop_15,_mm256_set_pd(tmp_qloop_12,tmp_qloop_12,tmp_qloop_12,tmp_qloop_12)); + const __m256d tmp_qloop_24 = _mm256_mul_pd(tmp_qloop_23,_mm256_set_pd(tmp_qloop_1,tmp_qloop_1,tmp_qloop_1,tmp_qloop_1)); + const __m256d tmp_qloop_25 = _mm256_mul_pd(tmp_qloop_2,tmp_qloop_24); + const __m256d tmp_qloop_26 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_16,_mm256_div_pd(_mm256_sqrt_pd(tmp_qloop_5),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_5,tmp_qloop_5),tmp_qloop_5))),_mm256_set_pd(3.0,3.0,3.0,3.0)); + const __m256d tmp_qloop_27 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_0,tmp_qloop_26),tmp_qloop_4); + const __m256d tmp_qloop_28 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_27,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_24,tmp_qloop_4)); + const __m256d tmp_qloop_29 = _mm256_mul_pd(tmp_qloop_23,_mm256_set_pd(tmp_qloop_7,tmp_qloop_7,tmp_qloop_7,tmp_qloop_7)); + const __m256d tmp_qloop_30 = _mm256_mul_pd(tmp_qloop_0,tmp_qloop_3); + const __m256d tmp_qloop_31 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_30); + const __m256d tmp_qloop_32 = _mm256_mul_pd(tmp_qloop_17,_mm256_set_pd(2.0,2.0,2.0,2.0)); + const __m256d tmp_qloop_33 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_2,tmp_qloop_26),tmp_qloop_3); + const __m256d tmp_qloop_34 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_33,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_18,tmp_qloop_3)); + const __m256d tmp_qloop_35 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_30); + const __m256d tmp_qloop_36 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_23,tmp_qloop_30),_mm256_set_pd(tmp_qloop_8,tmp_qloop_8,tmp_qloop_8,tmp_qloop_8)); + const __m256d tmp_qloop_37 = _mm256_mul_pd(_mm256_set_pd(4.0,4.0,4.0,4.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])); + const __m256d tmp_qloop_38 = _mm256_mul_pd(_mm256_set_pd(4.0,4.0,4.0,4.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])); + const __m256d tmp_qloop_39 = _mm256_add_pd(_mm256_add_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),tmp_qloop_37),tmp_qloop_38); + const __m256d tmp_qloop_40 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_39,_mm256_set_pd(jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY)),_mm256_mul_pd(tmp_qloop_39,_mm256_set_pd(jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY))); + const __m256d tmp_qloop_41 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_39,_mm256_set_pd(jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY)),_mm256_mul_pd(tmp_qloop_39,_mm256_set_pd(jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY))); + const __m256d tmp_qloop_42 = _mm256_mul_pd(tmp_qloop_37,_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])); + const __m256d tmp_qloop_43 = _mm256_mul_pd(_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])); + const __m256d tmp_qloop_44 = _mm256_mul_pd(tmp_qloop_43,_mm256_set_pd(2.0,2.0,2.0,2.0)); + const __m256d tmp_qloop_45 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),tmp_qloop_44); + const __m256d tmp_qloop_46 = _mm256_mul_pd(_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])); + const __m256d tmp_qloop_47 = _mm256_mul_pd(tmp_qloop_46,_mm256_set_pd(2.0,2.0,2.0,2.0)); + const __m256d tmp_qloop_48 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])),tmp_qloop_47); + const __m256d tmp_qloop_49 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_46,_mm256_set_pd(-4.0,-4.0,-4.0,-4.0)),_mm256_mul_pd(tmp_qloop_42,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_38); + const __m256d tmp_qloop_50 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_43,_mm256_set_pd(-4.0,-4.0,-4.0,-4.0)),_mm256_mul_pd(tmp_qloop_42,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_37); + const __m256d tmp_qloop_51 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),tmp_qloop_42),tmp_qloop_44),tmp_qloop_47); + const __m256d tmp_qloop_52 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_42,ux_dof_3),_mm256_mul_pd(tmp_qloop_45,ux_dof_1)),_mm256_mul_pd(tmp_qloop_48,ux_dof_2)),_mm256_mul_pd(tmp_qloop_49,ux_dof_4)),_mm256_mul_pd(tmp_qloop_50,ux_dof_5)),_mm256_mul_pd(tmp_qloop_51,ux_dof_0)); + const __m256d tmp_qloop_53 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_42,uy_dof_3),_mm256_mul_pd(tmp_qloop_45,uy_dof_1)),_mm256_mul_pd(tmp_qloop_48,uy_dof_2)),_mm256_mul_pd(tmp_qloop_49,uy_dof_4)),_mm256_mul_pd(tmp_qloop_50,uy_dof_5)),_mm256_mul_pd(tmp_qloop_51,uy_dof_0)); + const __m256d tmp_qloop_57 = _mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),tmp_qloop_37); + const __m256d tmp_qloop_58 = _mm256_mul_pd(tmp_qloop_57,_mm256_set_pd(jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY)); + const __m256d tmp_qloop_59 = _mm256_mul_pd(tmp_qloop_57,_mm256_set_pd(jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY)); + const __m256d tmp_qloop_61 = _mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),tmp_qloop_38); + const __m256d tmp_qloop_62 = _mm256_mul_pd(tmp_qloop_61,_mm256_set_pd(jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY)); + const __m256d tmp_qloop_63 = _mm256_mul_pd(tmp_qloop_61,_mm256_set_pd(jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY)); + const __m256d tmp_qloop_65 = _mm256_mul_pd(tmp_qloop_37,_mm256_set_pd(jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY)); + const __m256d tmp_qloop_66 = _mm256_mul_pd(tmp_qloop_38,_mm256_set_pd(jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY)); + const __m256d tmp_qloop_67 = _mm256_add_pd(tmp_qloop_65,tmp_qloop_66); + const __m256d tmp_qloop_68 = _mm256_mul_pd(tmp_qloop_37,_mm256_set_pd(jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY)); + const __m256d tmp_qloop_69 = _mm256_mul_pd(tmp_qloop_38,_mm256_set_pd(jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY)); + const __m256d tmp_qloop_70 = _mm256_add_pd(tmp_qloop_68,tmp_qloop_69); + const __m256d tmp_qloop_72 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-8.0,-8.0,-8.0,-8.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])),_mm256_mul_pd(tmp_qloop_37,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_set_pd(4.0,4.0,4.0,4.0)); + const __m256d tmp_qloop_73 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_66,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_72,_mm256_set_pd(jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY))); + const __m256d tmp_qloop_74 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_69,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_72,_mm256_set_pd(jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY))); + const __m256d tmp_qloop_76 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-8.0,-8.0,-8.0,-8.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(tmp_qloop_38,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_set_pd(4.0,4.0,4.0,4.0)); + const __m256d tmp_qloop_77 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_65,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_76,_mm256_set_pd(jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY))); + const __m256d tmp_qloop_78 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_68,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_76,_mm256_set_pd(jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY))); + const __m256d jac_blending_0_0 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_0,tmp_qloop_14),_mm256_mul_pd(tmp_qloop_18,tmp_qloop_4)); + const __m256d jac_blending_0_1 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_0,tmp_qloop_19),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_0,tmp_qloop_15),tmp_qloop_16),tmp_qloop_3),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))); + const __m256d jac_blending_1_0 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_14,tmp_qloop_3),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_20,tmp_qloop_3),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))); + const __m256d jac_blending_1_1 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_19,tmp_qloop_3),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_15,tmp_qloop_16),tmp_qloop_2),_mm256_set_pd(1.0,1.0,1.0,1.0))); + const __m256d tmp_qloop_21 = _mm256_add_pd(_mm256_mul_pd(jac_blending_0_0,jac_blending_1_1),_mm256_mul_pd(_mm256_mul_pd(jac_blending_0_1,jac_blending_1_0),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))); + const __m256d tmp_qloop_22 = _mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),tmp_qloop_21); + const __m256d abs_det_jac_blending = tmp_qloop_21; + const __m256d tmp_qloop_55 = _mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(abs_det_jac_blending,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(cp_dof_0,tmp_qloop_51),_mm256_mul_pd(cp_dof_1,tmp_qloop_45)),_mm256_mul_pd(cp_dof_2,tmp_qloop_48)),_mm256_mul_pd(cp_dof_3,tmp_qloop_42)),_mm256_mul_pd(cp_dof_4,tmp_qloop_49)),_mm256_mul_pd(cp_dof_5,tmp_qloop_50))),_mm256_set_pd(_data_q_w[q],_data_q_w[q],_data_q_w[q],_data_q_w[q])),_mm256_set_pd(abs_det_jac_affine_GRAY,abs_det_jac_affine_GRAY,abs_det_jac_affine_GRAY,abs_det_jac_affine_GRAY)); + const __m256d tmp_qloop_56 = _mm256_mul_pd(tmp_qloop_51,tmp_qloop_55); + const __m256d tmp_qloop_80 = _mm256_mul_pd(tmp_qloop_45,tmp_qloop_55); + const __m256d tmp_qloop_81 = _mm256_mul_pd(tmp_qloop_48,tmp_qloop_55); + const __m256d tmp_qloop_82 = _mm256_mul_pd(tmp_qloop_42,tmp_qloop_55); + const __m256d tmp_qloop_83 = _mm256_mul_pd(tmp_qloop_49,tmp_qloop_55); + const __m256d tmp_qloop_84 = _mm256_mul_pd(tmp_qloop_50,tmp_qloop_55); + const __m256d jac_blending_inv_0_0 = _mm256_mul_pd(jac_blending_1_1,tmp_qloop_22); + const __m256d jac_blending_inv_0_1 = _mm256_mul_pd(_mm256_mul_pd(jac_blending_0_1,tmp_qloop_22),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)); + const __m256d jac_blending_inv_1_0 = _mm256_mul_pd(_mm256_mul_pd(jac_blending_1_0,tmp_qloop_22),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)); + const __m256d jac_blending_inv_1_1 = _mm256_mul_pd(jac_blending_0_0,tmp_qloop_22); + const __m256d tmp_qloop_54 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_52,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,tmp_qloop_40),_mm256_mul_pd(jac_blending_inv_1_0,tmp_qloop_41))),_mm256_mul_pd(tmp_qloop_53,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,tmp_qloop_40),_mm256_mul_pd(jac_blending_inv_1_1,tmp_qloop_41)))); + const __m256d tmp_qloop_60 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_52,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,tmp_qloop_58),_mm256_mul_pd(jac_blending_inv_1_0,tmp_qloop_59))),_mm256_mul_pd(tmp_qloop_53,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,tmp_qloop_58),_mm256_mul_pd(jac_blending_inv_1_1,tmp_qloop_59)))); + const __m256d tmp_qloop_64 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_52,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,tmp_qloop_62),_mm256_mul_pd(jac_blending_inv_1_0,tmp_qloop_63))),_mm256_mul_pd(tmp_qloop_53,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,tmp_qloop_62),_mm256_mul_pd(jac_blending_inv_1_1,tmp_qloop_63)))); + const __m256d tmp_qloop_71 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_52,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,tmp_qloop_67),_mm256_mul_pd(jac_blending_inv_1_0,tmp_qloop_70))),_mm256_mul_pd(tmp_qloop_53,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,tmp_qloop_67),_mm256_mul_pd(jac_blending_inv_1_1,tmp_qloop_70)))); + const __m256d tmp_qloop_75 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_52,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,tmp_qloop_73),_mm256_mul_pd(jac_blending_inv_1_0,tmp_qloop_74))),_mm256_mul_pd(tmp_qloop_53,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,tmp_qloop_73),_mm256_mul_pd(jac_blending_inv_1_1,tmp_qloop_74)))); + const __m256d tmp_qloop_79 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_52,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,tmp_qloop_77),_mm256_mul_pd(jac_blending_inv_1_0,tmp_qloop_78))),_mm256_mul_pd(tmp_qloop_53,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,tmp_qloop_77),_mm256_mul_pd(jac_blending_inv_1_1,tmp_qloop_78)))); + const __m256d hessian_blending_0_0_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_25,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),tmp_qloop_14),tmp_qloop_28); + const __m256d hessian_blending_1_0_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_31,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_29,tmp_qloop_4)),_mm256_mul_pd(tmp_qloop_3,tmp_qloop_32)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_26,_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_3,tmp_qloop_3),tmp_qloop_3)),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))); + const __m256d hessian_blending_0_0_1 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_34,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_15,tmp_qloop_30),_mm256_set_pd(-2.0,-2.0,-2.0,-2.0)),_mm256_set_pd(tmp_qloop_1,tmp_qloop_1,tmp_qloop_1,tmp_qloop_1)),_mm256_set_pd(tmp_qloop_11,tmp_qloop_11,tmp_qloop_11,tmp_qloop_11))); + const __m256d hessian_blending_1_0_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_20,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_28,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(tmp_qloop_35,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_6,_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_1,tmp_qloop_1,tmp_qloop_1,tmp_qloop_1)),_mm256_set_pd(tmp_qloop_10,tmp_qloop_10,tmp_qloop_10,tmp_qloop_10)),_mm256_set_pd(tmp_qloop_9,tmp_qloop_9,tmp_qloop_9,tmp_qloop_9))); + const __m256d hessian_blending_0_1_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_19,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_31,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(tmp_qloop_34,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_15,tmp_qloop_2),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_10,tmp_qloop_10,tmp_qloop_10,tmp_qloop_10)),_mm256_set_pd(tmp_qloop_8,tmp_qloop_8,tmp_qloop_8,tmp_qloop_8)),_mm256_set_pd(tmp_qloop_9,tmp_qloop_9,tmp_qloop_9,tmp_qloop_9))); + const __m256d hessian_blending_1_1_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_20,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_35,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_27),tmp_qloop_36); + const __m256d hessian_blending_0_1_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_0,tmp_qloop_32),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_0,tmp_qloop_0),tmp_qloop_0),tmp_qloop_26),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_25),tmp_qloop_36); + const __m256d hessian_blending_1_1_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_19,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_33,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_15,tmp_qloop_2),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_10,tmp_qloop_10,tmp_qloop_10,tmp_qloop_10)),_mm256_set_pd(tmp_qloop_7,tmp_qloop_7,tmp_qloop_7,tmp_qloop_7)),_mm256_set_pd(tmp_qloop_9,tmp_qloop_9,tmp_qloop_9,tmp_qloop_9))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_15,tmp_qloop_4),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_10,tmp_qloop_10,tmp_qloop_10,tmp_qloop_10)),_mm256_set_pd(tmp_qloop_8,tmp_qloop_8,tmp_qloop_8,tmp_qloop_8)),_mm256_set_pd(tmp_qloop_9,tmp_qloop_9,tmp_qloop_9,tmp_qloop_9))); + const __m256d q_tmp_0_0 = _mm256_mul_pd(tmp_qloop_54,tmp_qloop_56); + const __m256d q_tmp_0_1 = _mm256_mul_pd(tmp_qloop_56,tmp_qloop_60); + const __m256d q_tmp_0_2 = _mm256_mul_pd(tmp_qloop_56,tmp_qloop_64); + const __m256d q_tmp_0_3 = _mm256_mul_pd(tmp_qloop_56,tmp_qloop_71); + const __m256d q_tmp_0_4 = _mm256_mul_pd(tmp_qloop_56,tmp_qloop_75); + const __m256d q_tmp_0_5 = _mm256_mul_pd(tmp_qloop_56,tmp_qloop_79); + const __m256d q_tmp_1_0 = _mm256_mul_pd(tmp_qloop_54,tmp_qloop_80); + const __m256d q_tmp_1_1 = _mm256_mul_pd(tmp_qloop_60,tmp_qloop_80); + const __m256d q_tmp_1_2 = _mm256_mul_pd(tmp_qloop_64,tmp_qloop_80); + const __m256d q_tmp_1_3 = _mm256_mul_pd(tmp_qloop_71,tmp_qloop_80); + const __m256d q_tmp_1_4 = _mm256_mul_pd(tmp_qloop_75,tmp_qloop_80); + const __m256d q_tmp_1_5 = _mm256_mul_pd(tmp_qloop_79,tmp_qloop_80); + const __m256d q_tmp_2_0 = _mm256_mul_pd(tmp_qloop_54,tmp_qloop_81); + const __m256d q_tmp_2_1 = _mm256_mul_pd(tmp_qloop_60,tmp_qloop_81); + const __m256d q_tmp_2_2 = _mm256_mul_pd(tmp_qloop_64,tmp_qloop_81); + const __m256d q_tmp_2_3 = _mm256_mul_pd(tmp_qloop_71,tmp_qloop_81); + const __m256d q_tmp_2_4 = _mm256_mul_pd(tmp_qloop_75,tmp_qloop_81); + const __m256d q_tmp_2_5 = _mm256_mul_pd(tmp_qloop_79,tmp_qloop_81); + const __m256d q_tmp_3_0 = _mm256_mul_pd(tmp_qloop_54,tmp_qloop_82); + const __m256d q_tmp_3_1 = _mm256_mul_pd(tmp_qloop_60,tmp_qloop_82); + const __m256d q_tmp_3_2 = _mm256_mul_pd(tmp_qloop_64,tmp_qloop_82); + const __m256d q_tmp_3_3 = _mm256_mul_pd(tmp_qloop_71,tmp_qloop_82); + const __m256d q_tmp_3_4 = _mm256_mul_pd(tmp_qloop_75,tmp_qloop_82); + const __m256d q_tmp_3_5 = _mm256_mul_pd(tmp_qloop_79,tmp_qloop_82); + const __m256d q_tmp_4_0 = _mm256_mul_pd(tmp_qloop_54,tmp_qloop_83); + const __m256d q_tmp_4_1 = _mm256_mul_pd(tmp_qloop_60,tmp_qloop_83); + const __m256d q_tmp_4_2 = _mm256_mul_pd(tmp_qloop_64,tmp_qloop_83); + const __m256d q_tmp_4_3 = _mm256_mul_pd(tmp_qloop_71,tmp_qloop_83); + const __m256d q_tmp_4_4 = _mm256_mul_pd(tmp_qloop_75,tmp_qloop_83); + const __m256d q_tmp_4_5 = _mm256_mul_pd(tmp_qloop_79,tmp_qloop_83); + const __m256d q_tmp_5_0 = _mm256_mul_pd(tmp_qloop_54,tmp_qloop_84); + const __m256d q_tmp_5_1 = _mm256_mul_pd(tmp_qloop_60,tmp_qloop_84); + const __m256d q_tmp_5_2 = _mm256_mul_pd(tmp_qloop_64,tmp_qloop_84); + const __m256d q_tmp_5_3 = _mm256_mul_pd(tmp_qloop_71,tmp_qloop_84); + const __m256d q_tmp_5_4 = _mm256_mul_pd(tmp_qloop_75,tmp_qloop_84); + const __m256d q_tmp_5_5 = _mm256_mul_pd(tmp_qloop_79,tmp_qloop_84); + q_acc_0_0 = _mm256_add_pd(q_acc_0_0,q_tmp_0_0); + q_acc_0_1 = _mm256_add_pd(q_acc_0_1,q_tmp_0_1); + q_acc_0_2 = _mm256_add_pd(q_acc_0_2,q_tmp_0_2); + q_acc_0_3 = _mm256_add_pd(q_acc_0_3,q_tmp_0_3); + q_acc_0_4 = _mm256_add_pd(q_acc_0_4,q_tmp_0_4); + q_acc_0_5 = _mm256_add_pd(q_acc_0_5,q_tmp_0_5); + q_acc_1_0 = _mm256_add_pd(q_acc_1_0,q_tmp_1_0); + q_acc_1_1 = _mm256_add_pd(q_acc_1_1,q_tmp_1_1); + q_acc_1_2 = _mm256_add_pd(q_acc_1_2,q_tmp_1_2); + q_acc_1_3 = _mm256_add_pd(q_acc_1_3,q_tmp_1_3); + q_acc_1_4 = _mm256_add_pd(q_acc_1_4,q_tmp_1_4); + q_acc_1_5 = _mm256_add_pd(q_acc_1_5,q_tmp_1_5); + q_acc_2_0 = _mm256_add_pd(q_acc_2_0,q_tmp_2_0); + q_acc_2_1 = _mm256_add_pd(q_acc_2_1,q_tmp_2_1); + q_acc_2_2 = _mm256_add_pd(q_acc_2_2,q_tmp_2_2); + q_acc_2_3 = _mm256_add_pd(q_acc_2_3,q_tmp_2_3); + q_acc_2_4 = _mm256_add_pd(q_acc_2_4,q_tmp_2_4); + q_acc_2_5 = _mm256_add_pd(q_acc_2_5,q_tmp_2_5); + q_acc_3_0 = _mm256_add_pd(q_acc_3_0,q_tmp_3_0); + q_acc_3_1 = _mm256_add_pd(q_acc_3_1,q_tmp_3_1); + q_acc_3_2 = _mm256_add_pd(q_acc_3_2,q_tmp_3_2); + q_acc_3_3 = _mm256_add_pd(q_acc_3_3,q_tmp_3_3); + q_acc_3_4 = _mm256_add_pd(q_acc_3_4,q_tmp_3_4); + q_acc_3_5 = _mm256_add_pd(q_acc_3_5,q_tmp_3_5); + q_acc_4_0 = _mm256_add_pd(q_acc_4_0,q_tmp_4_0); + q_acc_4_1 = _mm256_add_pd(q_acc_4_1,q_tmp_4_1); + q_acc_4_2 = _mm256_add_pd(q_acc_4_2,q_tmp_4_2); + q_acc_4_3 = _mm256_add_pd(q_acc_4_3,q_tmp_4_3); + q_acc_4_4 = _mm256_add_pd(q_acc_4_4,q_tmp_4_4); + q_acc_4_5 = _mm256_add_pd(q_acc_4_5,q_tmp_4_5); + q_acc_5_0 = _mm256_add_pd(q_acc_5_0,q_tmp_5_0); + q_acc_5_1 = _mm256_add_pd(q_acc_5_1,q_tmp_5_1); + q_acc_5_2 = _mm256_add_pd(q_acc_5_2,q_tmp_5_2); + q_acc_5_3 = _mm256_add_pd(q_acc_5_3,q_tmp_5_3); + q_acc_5_4 = _mm256_add_pd(q_acc_5_4,q_tmp_5_4); + q_acc_5_5 = _mm256_add_pd(q_acc_5_5,q_tmp_5_5); + } + const __m256d elMatVec_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_0_0,src_dof_0),_mm256_mul_pd(q_acc_0_1,src_dof_1)),_mm256_mul_pd(q_acc_0_2,src_dof_2)),_mm256_mul_pd(q_acc_0_3,src_dof_3)),_mm256_mul_pd(q_acc_0_4,src_dof_4)),_mm256_mul_pd(q_acc_0_5,src_dof_5)); + const __m256d elMatVec_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_1_0,src_dof_0),_mm256_mul_pd(q_acc_1_1,src_dof_1)),_mm256_mul_pd(q_acc_1_2,src_dof_2)),_mm256_mul_pd(q_acc_1_3,src_dof_3)),_mm256_mul_pd(q_acc_1_4,src_dof_4)),_mm256_mul_pd(q_acc_1_5,src_dof_5)); + const __m256d elMatVec_2 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_2_0,src_dof_0),_mm256_mul_pd(q_acc_2_1,src_dof_1)),_mm256_mul_pd(q_acc_2_2,src_dof_2)),_mm256_mul_pd(q_acc_2_3,src_dof_3)),_mm256_mul_pd(q_acc_2_4,src_dof_4)),_mm256_mul_pd(q_acc_2_5,src_dof_5)); + const __m256d elMatVec_3 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_3_0,src_dof_0),_mm256_mul_pd(q_acc_3_1,src_dof_1)),_mm256_mul_pd(q_acc_3_2,src_dof_2)),_mm256_mul_pd(q_acc_3_3,src_dof_3)),_mm256_mul_pd(q_acc_3_4,src_dof_4)),_mm256_mul_pd(q_acc_3_5,src_dof_5)); + const __m256d elMatVec_4 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_4_0,src_dof_0),_mm256_mul_pd(q_acc_4_1,src_dof_1)),_mm256_mul_pd(q_acc_4_2,src_dof_2)),_mm256_mul_pd(q_acc_4_3,src_dof_3)),_mm256_mul_pd(q_acc_4_4,src_dof_4)),_mm256_mul_pd(q_acc_4_5,src_dof_5)); + const __m256d elMatVec_5 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_5_0,src_dof_0),_mm256_mul_pd(q_acc_5_1,src_dof_1)),_mm256_mul_pd(q_acc_5_2,src_dof_2)),_mm256_mul_pd(q_acc_5_3,src_dof_3)),_mm256_mul_pd(q_acc_5_4,src_dof_4)),_mm256_mul_pd(q_acc_5_5,src_dof_5)); + _mm256_storeu_pd(&_data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))],_mm256_add_pd(elMatVec_0,_mm256_loadu_pd(& _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]))); + _mm256_storeu_pd(&_data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1],_mm256_add_pd(elMatVec_1,_mm256_loadu_pd(& _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]))); + _mm256_storeu_pd(&_data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))],_mm256_add_pd(elMatVec_2,_mm256_loadu_pd(& _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]))); + _mm256_storeu_pd(&_data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))],_mm256_add_pd(elMatVec_3,_mm256_loadu_pd(& _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]))); + _mm256_storeu_pd(&_data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))],_mm256_add_pd(elMatVec_4,_mm256_loadu_pd(& _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]))); + _mm256_storeu_pd(&_data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))],_mm256_add_pd(elMatVec_5,_mm256_loadu_pd(& _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]))); + } + for (int64_t ctr_0 = (int64_t)((-ctr_1 + micro_edges_per_macro_edge) / (4)) * (4); ctr_0 < -ctr_1 + micro_edges_per_macro_edge; ctr_0 += 1) + { + + const int64_t phantom_ctr_0 = ctr_0; + real_t _data_float_loop_ctr_array_dim_0[4]; + _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0; + _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1; + _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2; + _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3; + real_t _data_float_loop_ctr_array_dim_1[4]; + _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1; + + const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; + const real_t src_dof_0 = _data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]; + const real_t src_dof_1 = _data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + const real_t src_dof_2 = _data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t src_dof_3 = _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t src_dof_4 = _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t src_dof_5 = _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]; + const real_t cp_dof_0 = _data_cpVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]; + const real_t cp_dof_1 = _data_cpVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + const real_t cp_dof_2 = _data_cpVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t cp_dof_3 = _data_cpEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t cp_dof_4 = _data_cpEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t cp_dof_5 = _data_cpEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]; + const real_t ux_dof_0 = _data_uxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]; + const real_t ux_dof_1 = _data_uxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + const real_t ux_dof_2 = _data_uxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t ux_dof_3 = _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t ux_dof_4 = _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t ux_dof_5 = _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]; + const real_t uy_dof_0 = _data_uyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]; + const real_t uy_dof_1 = _data_uyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + const real_t uy_dof_2 = _data_uyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t uy_dof_3 = _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t uy_dof_4 = _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t uy_dof_5 = _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]; + real_t q_acc_0_0 = 0.0; + real_t q_acc_0_1 = 0.0; + real_t q_acc_0_2 = 0.0; + real_t q_acc_0_3 = 0.0; + real_t q_acc_0_4 = 0.0; + real_t q_acc_0_5 = 0.0; + real_t q_acc_1_0 = 0.0; + real_t q_acc_1_1 = 0.0; + real_t q_acc_1_2 = 0.0; + real_t q_acc_1_3 = 0.0; + real_t q_acc_1_4 = 0.0; + real_t q_acc_1_5 = 0.0; + real_t q_acc_2_0 = 0.0; + real_t q_acc_2_1 = 0.0; + real_t q_acc_2_2 = 0.0; + real_t q_acc_2_3 = 0.0; + real_t q_acc_2_4 = 0.0; + real_t q_acc_2_5 = 0.0; + real_t q_acc_3_0 = 0.0; + real_t q_acc_3_1 = 0.0; + real_t q_acc_3_2 = 0.0; + real_t q_acc_3_3 = 0.0; + real_t q_acc_3_4 = 0.0; + real_t q_acc_3_5 = 0.0; + real_t q_acc_4_0 = 0.0; + real_t q_acc_4_1 = 0.0; + real_t q_acc_4_2 = 0.0; + real_t q_acc_4_3 = 0.0; + real_t q_acc_4_4 = 0.0; + real_t q_acc_4_5 = 0.0; + real_t q_acc_5_0 = 0.0; + real_t q_acc_5_1 = 0.0; + real_t q_acc_5_2 = 0.0; + real_t q_acc_5_3 = 0.0; + real_t q_acc_5_4 = 0.0; + real_t q_acc_5_5 = 0.0; + for (int64_t q = 0; q < 4; q += 1) + { + const real_t tmp_qloop_0 = p_affine_0_0 + (-p_affine_0_0 + p_affine_1_0)*_data_q_p_0[q] + (-p_affine_0_0 + p_affine_2_0)*_data_q_p_1[q]; + const real_t tmp_qloop_2 = (tmp_qloop_0*tmp_qloop_0); + const real_t tmp_qloop_3 = p_affine_0_1 + (-p_affine_0_1 + p_affine_1_1)*_data_q_p_0[q] + (-p_affine_0_1 + p_affine_2_1)*_data_q_p_1[q]; + const real_t tmp_qloop_4 = (tmp_qloop_3*tmp_qloop_3); + const real_t tmp_qloop_5 = tmp_qloop_2 + tmp_qloop_4; + const real_t tmp_qloop_6 = pow(tmp_qloop_5, -0.50000000000000000); + const real_t tmp_qloop_13 = tmp_qloop_12*tmp_qloop_6; + const real_t tmp_qloop_14 = tmp_qloop_1*tmp_qloop_13; + const real_t tmp_qloop_15 = pow(tmp_qloop_5, -1.5000000000000000); + const real_t tmp_qloop_16 = radRayVertex + tmp_qloop_11*(tmp_qloop_1*(-rayVertex_0 + tmp_qloop_0) - tmp_qloop_8*(-rayVertex_1 + tmp_qloop_3)); + const real_t tmp_qloop_17 = tmp_qloop_15*tmp_qloop_16; + const real_t tmp_qloop_18 = tmp_qloop_17*1.0; + const real_t tmp_qloop_19 = tmp_qloop_13*tmp_qloop_8; + const real_t tmp_qloop_20 = tmp_qloop_0*tmp_qloop_18; + const real_t tmp_qloop_23 = tmp_qloop_12*tmp_qloop_15; + const real_t tmp_qloop_24 = tmp_qloop_1*tmp_qloop_23; + const real_t tmp_qloop_25 = tmp_qloop_2*tmp_qloop_24; + const real_t tmp_qloop_26 = tmp_qloop_16*pow(tmp_qloop_5, -2.5000000000000000)*3.0; + const real_t tmp_qloop_27 = tmp_qloop_0*tmp_qloop_26*tmp_qloop_4; + const real_t tmp_qloop_28 = tmp_qloop_24*tmp_qloop_4 - tmp_qloop_27; + const real_t tmp_qloop_29 = tmp_qloop_23*tmp_qloop_7; + const real_t tmp_qloop_30 = tmp_qloop_0*tmp_qloop_3; + const real_t tmp_qloop_31 = tmp_qloop_24*tmp_qloop_30; + const real_t tmp_qloop_32 = tmp_qloop_17*2.0; + const real_t tmp_qloop_33 = tmp_qloop_2*tmp_qloop_26*tmp_qloop_3; + const real_t tmp_qloop_34 = tmp_qloop_18*tmp_qloop_3 - tmp_qloop_33; + const real_t tmp_qloop_35 = tmp_qloop_29*tmp_qloop_30; + const real_t tmp_qloop_36 = tmp_qloop_23*tmp_qloop_30*tmp_qloop_8; + const real_t tmp_qloop_37 = 4.0*_data_q_p_0[q]; + const real_t tmp_qloop_38 = 4.0*_data_q_p_1[q]; + const real_t tmp_qloop_39 = tmp_qloop_37 + tmp_qloop_38 - 3.0; + const real_t tmp_qloop_40 = jac_affine_inv_0_0_GRAY*tmp_qloop_39 + jac_affine_inv_1_0_GRAY*tmp_qloop_39; + const real_t tmp_qloop_41 = jac_affine_inv_0_1_GRAY*tmp_qloop_39 + jac_affine_inv_1_1_GRAY*tmp_qloop_39; + const real_t tmp_qloop_42 = tmp_qloop_37*_data_q_p_1[q]; + const real_t tmp_qloop_43 = (_data_q_p_0[q]*_data_q_p_0[q]); + const real_t tmp_qloop_44 = tmp_qloop_43*2.0; + const real_t tmp_qloop_45 = tmp_qloop_44 - _data_q_p_0[q]; + const real_t tmp_qloop_46 = (_data_q_p_1[q]*_data_q_p_1[q]); + const real_t tmp_qloop_47 = tmp_qloop_46*2.0; + const real_t tmp_qloop_48 = tmp_qloop_47 - _data_q_p_1[q]; + const real_t tmp_qloop_49 = tmp_qloop_38 - tmp_qloop_42 + tmp_qloop_46*-4.0; + const real_t tmp_qloop_50 = tmp_qloop_37 - tmp_qloop_42 + tmp_qloop_43*-4.0; + const real_t tmp_qloop_51 = tmp_qloop_42 + tmp_qloop_44 + tmp_qloop_47 - 3.0*_data_q_p_0[q] - 3.0*_data_q_p_1[q] + 1.0; + const real_t tmp_qloop_52 = tmp_qloop_42*ux_dof_3 + tmp_qloop_45*ux_dof_1 + tmp_qloop_48*ux_dof_2 + tmp_qloop_49*ux_dof_4 + tmp_qloop_50*ux_dof_5 + tmp_qloop_51*ux_dof_0; + const real_t tmp_qloop_53 = tmp_qloop_42*uy_dof_3 + tmp_qloop_45*uy_dof_1 + tmp_qloop_48*uy_dof_2 + tmp_qloop_49*uy_dof_4 + tmp_qloop_50*uy_dof_5 + tmp_qloop_51*uy_dof_0; + const real_t tmp_qloop_57 = tmp_qloop_37 - 1.0; + const real_t tmp_qloop_58 = jac_affine_inv_0_0_GRAY*tmp_qloop_57; + const real_t tmp_qloop_59 = jac_affine_inv_0_1_GRAY*tmp_qloop_57; + const real_t tmp_qloop_61 = tmp_qloop_38 - 1.0; + const real_t tmp_qloop_62 = jac_affine_inv_1_0_GRAY*tmp_qloop_61; + const real_t tmp_qloop_63 = jac_affine_inv_1_1_GRAY*tmp_qloop_61; + const real_t tmp_qloop_65 = jac_affine_inv_1_0_GRAY*tmp_qloop_37; + const real_t tmp_qloop_66 = jac_affine_inv_0_0_GRAY*tmp_qloop_38; + const real_t tmp_qloop_67 = tmp_qloop_65 + tmp_qloop_66; + const real_t tmp_qloop_68 = jac_affine_inv_1_1_GRAY*tmp_qloop_37; + const real_t tmp_qloop_69 = jac_affine_inv_0_1_GRAY*tmp_qloop_38; + const real_t tmp_qloop_70 = tmp_qloop_68 + tmp_qloop_69; + const real_t tmp_qloop_72 = -tmp_qloop_37 - 8.0*_data_q_p_1[q] + 4.0; + const real_t tmp_qloop_73 = jac_affine_inv_1_0_GRAY*tmp_qloop_72 - tmp_qloop_66; + const real_t tmp_qloop_74 = jac_affine_inv_1_1_GRAY*tmp_qloop_72 - tmp_qloop_69; + const real_t tmp_qloop_76 = -tmp_qloop_38 - 8.0*_data_q_p_0[q] + 4.0; + const real_t tmp_qloop_77 = jac_affine_inv_0_0_GRAY*tmp_qloop_76 - tmp_qloop_65; + const real_t tmp_qloop_78 = jac_affine_inv_0_1_GRAY*tmp_qloop_76 - tmp_qloop_68; + const real_t jac_blending_0_0 = tmp_qloop_0*tmp_qloop_14 + tmp_qloop_18*tmp_qloop_4; + const real_t jac_blending_0_1 = -tmp_qloop_0*tmp_qloop_15*tmp_qloop_16*tmp_qloop_3 - tmp_qloop_0*tmp_qloop_19; + const real_t jac_blending_1_0 = tmp_qloop_14*tmp_qloop_3 - tmp_qloop_20*tmp_qloop_3; + const real_t jac_blending_1_1 = tmp_qloop_15*tmp_qloop_16*tmp_qloop_2*1.0 - tmp_qloop_19*tmp_qloop_3; + const real_t tmp_qloop_21 = jac_blending_0_0*jac_blending_1_1 - jac_blending_0_1*jac_blending_1_0; + const real_t tmp_qloop_22 = 1.0 / (tmp_qloop_21); + const real_t abs_det_jac_blending = tmp_qloop_21; + const real_t tmp_qloop_55 = abs_det_jac_affine_GRAY*abs_det_jac_blending*(cp_dof_0*tmp_qloop_51 + cp_dof_1*tmp_qloop_45 + cp_dof_2*tmp_qloop_48 + cp_dof_3*tmp_qloop_42 + cp_dof_4*tmp_qloop_49 + cp_dof_5*tmp_qloop_50)*_data_q_w[q]; + const real_t tmp_qloop_56 = tmp_qloop_51*tmp_qloop_55; + const real_t tmp_qloop_80 = tmp_qloop_45*tmp_qloop_55; + const real_t tmp_qloop_81 = tmp_qloop_48*tmp_qloop_55; + const real_t tmp_qloop_82 = tmp_qloop_42*tmp_qloop_55; + const real_t tmp_qloop_83 = tmp_qloop_49*tmp_qloop_55; + const real_t tmp_qloop_84 = tmp_qloop_50*tmp_qloop_55; + const real_t jac_blending_inv_0_0 = jac_blending_1_1*tmp_qloop_22; + const real_t jac_blending_inv_0_1 = -jac_blending_0_1*tmp_qloop_22; + const real_t jac_blending_inv_1_0 = -jac_blending_1_0*tmp_qloop_22; + const real_t jac_blending_inv_1_1 = jac_blending_0_0*tmp_qloop_22; + const real_t tmp_qloop_54 = tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_40 + jac_blending_inv_1_0*tmp_qloop_41) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_40 + jac_blending_inv_1_1*tmp_qloop_41); + const real_t tmp_qloop_60 = tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_58 + jac_blending_inv_1_0*tmp_qloop_59) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_58 + jac_blending_inv_1_1*tmp_qloop_59); + const real_t tmp_qloop_64 = tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_62 + jac_blending_inv_1_0*tmp_qloop_63) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_62 + jac_blending_inv_1_1*tmp_qloop_63); + const real_t tmp_qloop_71 = tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_67 + jac_blending_inv_1_0*tmp_qloop_70) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_67 + jac_blending_inv_1_1*tmp_qloop_70); + const real_t tmp_qloop_75 = tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_73 + jac_blending_inv_1_0*tmp_qloop_74) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_73 + jac_blending_inv_1_1*tmp_qloop_74); + const real_t tmp_qloop_79 = tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_77 + jac_blending_inv_1_0*tmp_qloop_78) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_77 + jac_blending_inv_1_1*tmp_qloop_78); + const real_t hessian_blending_0_0_0 = tmp_qloop_14 - tmp_qloop_25 + tmp_qloop_28; + const real_t hessian_blending_1_0_0 = -tmp_qloop_26*(tmp_qloop_3*tmp_qloop_3*tmp_qloop_3) + tmp_qloop_29*tmp_qloop_4 + tmp_qloop_3*tmp_qloop_32 - tmp_qloop_31; + const real_t hessian_blending_0_0_1 = tmp_qloop_1*tmp_qloop_11*tmp_qloop_15*tmp_qloop_30*-2.0 - tmp_qloop_34; + const real_t hessian_blending_1_0_1 = tmp_qloop_1*tmp_qloop_10*tmp_qloop_6*tmp_qloop_9*1.0 - tmp_qloop_20 - tmp_qloop_28 - tmp_qloop_35; + const real_t hessian_blending_0_1_0 = tmp_qloop_10*tmp_qloop_15*tmp_qloop_2*tmp_qloop_8*tmp_qloop_9*1.0 - tmp_qloop_19 - tmp_qloop_31 - tmp_qloop_34; + const real_t hessian_blending_1_1_0 = -tmp_qloop_20 + tmp_qloop_27 - tmp_qloop_35 + tmp_qloop_36; + const real_t hessian_blending_0_1_1 = tmp_qloop_0*tmp_qloop_32 - (tmp_qloop_0*tmp_qloop_0*tmp_qloop_0)*tmp_qloop_26 + tmp_qloop_25 + tmp_qloop_36; + const real_t hessian_blending_1_1_1 = tmp_qloop_10*tmp_qloop_15*tmp_qloop_2*tmp_qloop_7*tmp_qloop_9*1.0 + tmp_qloop_10*tmp_qloop_15*tmp_qloop_4*tmp_qloop_8*tmp_qloop_9*1.0 - tmp_qloop_19 - tmp_qloop_33; + const real_t q_tmp_0_0 = tmp_qloop_54*tmp_qloop_56; + const real_t q_tmp_0_1 = tmp_qloop_56*tmp_qloop_60; + const real_t q_tmp_0_2 = tmp_qloop_56*tmp_qloop_64; + const real_t q_tmp_0_3 = tmp_qloop_56*tmp_qloop_71; + const real_t q_tmp_0_4 = tmp_qloop_56*tmp_qloop_75; + const real_t q_tmp_0_5 = tmp_qloop_56*tmp_qloop_79; + const real_t q_tmp_1_0 = tmp_qloop_54*tmp_qloop_80; + const real_t q_tmp_1_1 = tmp_qloop_60*tmp_qloop_80; + const real_t q_tmp_1_2 = tmp_qloop_64*tmp_qloop_80; + const real_t q_tmp_1_3 = tmp_qloop_71*tmp_qloop_80; + const real_t q_tmp_1_4 = tmp_qloop_75*tmp_qloop_80; + const real_t q_tmp_1_5 = tmp_qloop_79*tmp_qloop_80; + const real_t q_tmp_2_0 = tmp_qloop_54*tmp_qloop_81; + const real_t q_tmp_2_1 = tmp_qloop_60*tmp_qloop_81; + const real_t q_tmp_2_2 = tmp_qloop_64*tmp_qloop_81; + const real_t q_tmp_2_3 = tmp_qloop_71*tmp_qloop_81; + const real_t q_tmp_2_4 = tmp_qloop_75*tmp_qloop_81; + const real_t q_tmp_2_5 = tmp_qloop_79*tmp_qloop_81; + const real_t q_tmp_3_0 = tmp_qloop_54*tmp_qloop_82; + const real_t q_tmp_3_1 = tmp_qloop_60*tmp_qloop_82; + const real_t q_tmp_3_2 = tmp_qloop_64*tmp_qloop_82; + const real_t q_tmp_3_3 = tmp_qloop_71*tmp_qloop_82; + const real_t q_tmp_3_4 = tmp_qloop_75*tmp_qloop_82; + const real_t q_tmp_3_5 = tmp_qloop_79*tmp_qloop_82; + const real_t q_tmp_4_0 = tmp_qloop_54*tmp_qloop_83; + const real_t q_tmp_4_1 = tmp_qloop_60*tmp_qloop_83; + const real_t q_tmp_4_2 = tmp_qloop_64*tmp_qloop_83; + const real_t q_tmp_4_3 = tmp_qloop_71*tmp_qloop_83; + const real_t q_tmp_4_4 = tmp_qloop_75*tmp_qloop_83; + const real_t q_tmp_4_5 = tmp_qloop_79*tmp_qloop_83; + const real_t q_tmp_5_0 = tmp_qloop_54*tmp_qloop_84; + const real_t q_tmp_5_1 = tmp_qloop_60*tmp_qloop_84; + const real_t q_tmp_5_2 = tmp_qloop_64*tmp_qloop_84; + const real_t q_tmp_5_3 = tmp_qloop_71*tmp_qloop_84; + const real_t q_tmp_5_4 = tmp_qloop_75*tmp_qloop_84; + const real_t q_tmp_5_5 = tmp_qloop_79*tmp_qloop_84; + q_acc_0_0 = q_acc_0_0 + q_tmp_0_0; + q_acc_0_1 = q_acc_0_1 + q_tmp_0_1; + q_acc_0_2 = q_acc_0_2 + q_tmp_0_2; + q_acc_0_3 = q_acc_0_3 + q_tmp_0_3; + q_acc_0_4 = q_acc_0_4 + q_tmp_0_4; + q_acc_0_5 = q_acc_0_5 + q_tmp_0_5; + q_acc_1_0 = q_acc_1_0 + q_tmp_1_0; + q_acc_1_1 = q_acc_1_1 + q_tmp_1_1; + q_acc_1_2 = q_acc_1_2 + q_tmp_1_2; + q_acc_1_3 = q_acc_1_3 + q_tmp_1_3; + q_acc_1_4 = q_acc_1_4 + q_tmp_1_4; + q_acc_1_5 = q_acc_1_5 + q_tmp_1_5; + q_acc_2_0 = q_acc_2_0 + q_tmp_2_0; + q_acc_2_1 = q_acc_2_1 + q_tmp_2_1; + q_acc_2_2 = q_acc_2_2 + q_tmp_2_2; + q_acc_2_3 = q_acc_2_3 + q_tmp_2_3; + q_acc_2_4 = q_acc_2_4 + q_tmp_2_4; + q_acc_2_5 = q_acc_2_5 + q_tmp_2_5; + q_acc_3_0 = q_acc_3_0 + q_tmp_3_0; + q_acc_3_1 = q_acc_3_1 + q_tmp_3_1; + q_acc_3_2 = q_acc_3_2 + q_tmp_3_2; + q_acc_3_3 = q_acc_3_3 + q_tmp_3_3; + q_acc_3_4 = q_acc_3_4 + q_tmp_3_4; + q_acc_3_5 = q_acc_3_5 + q_tmp_3_5; + q_acc_4_0 = q_acc_4_0 + q_tmp_4_0; + q_acc_4_1 = q_acc_4_1 + q_tmp_4_1; + q_acc_4_2 = q_acc_4_2 + q_tmp_4_2; + q_acc_4_3 = q_acc_4_3 + q_tmp_4_3; + q_acc_4_4 = q_acc_4_4 + q_tmp_4_4; + q_acc_4_5 = q_acc_4_5 + q_tmp_4_5; + q_acc_5_0 = q_acc_5_0 + q_tmp_5_0; + q_acc_5_1 = q_acc_5_1 + q_tmp_5_1; + q_acc_5_2 = q_acc_5_2 + q_tmp_5_2; + q_acc_5_3 = q_acc_5_3 + q_tmp_5_3; + q_acc_5_4 = q_acc_5_4 + q_tmp_5_4; + q_acc_5_5 = q_acc_5_5 + q_tmp_5_5; + } + const real_t elMatVec_0 = q_acc_0_0*src_dof_0 + q_acc_0_1*src_dof_1 + q_acc_0_2*src_dof_2 + q_acc_0_3*src_dof_3 + q_acc_0_4*src_dof_4 + q_acc_0_5*src_dof_5; + const real_t elMatVec_1 = q_acc_1_0*src_dof_0 + q_acc_1_1*src_dof_1 + q_acc_1_2*src_dof_2 + q_acc_1_3*src_dof_3 + q_acc_1_4*src_dof_4 + q_acc_1_5*src_dof_5; + const real_t elMatVec_2 = q_acc_2_0*src_dof_0 + q_acc_2_1*src_dof_1 + q_acc_2_2*src_dof_2 + q_acc_2_3*src_dof_3 + q_acc_2_4*src_dof_4 + q_acc_2_5*src_dof_5; + const real_t elMatVec_3 = q_acc_3_0*src_dof_0 + q_acc_3_1*src_dof_1 + q_acc_3_2*src_dof_2 + q_acc_3_3*src_dof_3 + q_acc_3_4*src_dof_4 + q_acc_3_5*src_dof_5; + const real_t elMatVec_4 = q_acc_4_0*src_dof_0 + q_acc_4_1*src_dof_1 + q_acc_4_2*src_dof_2 + q_acc_4_3*src_dof_3 + q_acc_4_4*src_dof_4 + q_acc_4_5*src_dof_5; + const real_t elMatVec_5 = q_acc_5_0*src_dof_0 + q_acc_5_1*src_dof_1 + q_acc_5_2*src_dof_2 + q_acc_5_3*src_dof_3 + q_acc_5_4*src_dof_4 + q_acc_5_5*src_dof_5; + _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))] = elMatVec_0 + _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]; + _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1] = elMatVec_1 + _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatVec_2 + _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))] = elMatVec_3 + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))] = elMatVec_4 + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))] = elMatVec_5 + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]; + } + } + } + const real_t tmp_coords_jac_0_BLUE = 1.0 / (micro_edges_per_macro_edge_float)*1.0; + const real_t tmp_coords_jac_1_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0); + const real_t tmp_coords_jac_2_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1); + const real_t tmp_coords_jac_3_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0); + const real_t tmp_coords_jac_4_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1); + const real_t p_affine_const_0_0_BLUE = tmp_coords_jac_1_BLUE; + const real_t p_affine_const_0_1_BLUE = tmp_coords_jac_2_BLUE; + const real_t p_affine_const_1_0_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_3_BLUE; + const real_t p_affine_const_1_1_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_4_BLUE; + const real_t p_affine_const_2_0_BLUE = tmp_coords_jac_1_BLUE + tmp_coords_jac_3_BLUE; + const real_t p_affine_const_2_1_BLUE = tmp_coords_jac_2_BLUE + tmp_coords_jac_4_BLUE; + const real_t jac_affine_0_0_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_1_0_BLUE; + const real_t jac_affine_0_1_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_2_0_BLUE; + const real_t jac_affine_1_0_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_1_1_BLUE; + const real_t jac_affine_1_1_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_2_1_BLUE; + const real_t tmp_coords_jac_5_BLUE = jac_affine_0_0_BLUE*jac_affine_1_1_BLUE - jac_affine_0_1_BLUE*jac_affine_1_0_BLUE; + const real_t tmp_coords_jac_6_BLUE = 1.0 / (tmp_coords_jac_5_BLUE); + const real_t jac_affine_inv_0_0_BLUE = jac_affine_1_1_BLUE*tmp_coords_jac_6_BLUE; + const real_t jac_affine_inv_0_1_BLUE = -jac_affine_0_1_BLUE*tmp_coords_jac_6_BLUE; + const real_t jac_affine_inv_1_0_BLUE = -jac_affine_1_0_BLUE*tmp_coords_jac_6_BLUE; + const real_t jac_affine_inv_1_1_BLUE = jac_affine_0_0_BLUE*tmp_coords_jac_6_BLUE; + const real_t abs_det_jac_affine_BLUE = abs(tmp_coords_jac_5_BLUE); + { + /* FaceType.BLUE */ + for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1) + { + for (int64_t ctr_0 = 0; ctr_0 < (int64_t)((-ctr_1 + micro_edges_per_macro_edge - 1) / (4)) * (4); ctr_0 += 4) + { + + const int64_t phantom_ctr_0 = ctr_0; + real_t _data_float_loop_ctr_array_dim_0[4]; + _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0; + _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1; + _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2; + _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3; + real_t _data_float_loop_ctr_array_dim_1[4]; + _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1; + + const __m256d p_affine_0_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)); + const __m256d p_affine_0_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)); + const __m256d p_affine_1_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)); + const __m256d p_affine_1_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)); + const __m256d p_affine_2_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)); + const __m256d p_affine_2_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)); + const __m256d src_dof_0 = _mm256_loadu_pd(& _data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]); + const __m256d src_dof_1 = _mm256_loadu_pd(& _data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]); + const __m256d src_dof_2 = _mm256_loadu_pd(& _data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]); + const __m256d src_dof_3 = _mm256_loadu_pd(& _data_srcEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]); + const __m256d src_dof_4 = _mm256_loadu_pd(& _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]); + const __m256d src_dof_5 = _mm256_loadu_pd(& _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]); + const __m256d cp_dof_0 = _mm256_loadu_pd(& _data_cpVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]); + const __m256d cp_dof_1 = _mm256_loadu_pd(& _data_cpVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]); + const __m256d cp_dof_2 = _mm256_loadu_pd(& _data_cpVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]); + const __m256d cp_dof_3 = _mm256_loadu_pd(& _data_cpEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]); + const __m256d cp_dof_4 = _mm256_loadu_pd(& _data_cpEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]); + const __m256d cp_dof_5 = _mm256_loadu_pd(& _data_cpEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]); + const __m256d ux_dof_0 = _mm256_loadu_pd(& _data_uxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]); + const __m256d ux_dof_1 = _mm256_loadu_pd(& _data_uxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]); + const __m256d ux_dof_2 = _mm256_loadu_pd(& _data_uxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]); + const __m256d ux_dof_3 = _mm256_loadu_pd(& _data_uxEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]); + const __m256d ux_dof_4 = _mm256_loadu_pd(& _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]); + const __m256d ux_dof_5 = _mm256_loadu_pd(& _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]); + const __m256d uy_dof_0 = _mm256_loadu_pd(& _data_uyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]); + const __m256d uy_dof_1 = _mm256_loadu_pd(& _data_uyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]); + const __m256d uy_dof_2 = _mm256_loadu_pd(& _data_uyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]); + const __m256d uy_dof_3 = _mm256_loadu_pd(& _data_uyEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]); + const __m256d uy_dof_4 = _mm256_loadu_pd(& _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]); + const __m256d uy_dof_5 = _mm256_loadu_pd(& _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]); + __m256d q_acc_0_0 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_1 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_2 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_3 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_4 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_5 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_0 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_1 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_2 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_3 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_4 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_5 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_0 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_1 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_2 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_3 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_4 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_5 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_0 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_1 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_2 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_3 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_4 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_5 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_4_0 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_4_1 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_4_2 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_4_3 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_4_4 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_4_5 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_5_0 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_5_1 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_5_2 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_5_3 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_5_4 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_5_5 = _mm256_set_pd(0.0,0.0,0.0,0.0); + for (int64_t q = 0; q < 4; q += 1) + { + const __m256d tmp_qloop_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_1_0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_2_0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),p_affine_0_0); + const __m256d tmp_qloop_2 = _mm256_mul_pd(tmp_qloop_0,tmp_qloop_0); + const __m256d tmp_qloop_3 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_1_1),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_2_1),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),p_affine_0_1); + const __m256d tmp_qloop_4 = _mm256_mul_pd(tmp_qloop_3,tmp_qloop_3); + const __m256d tmp_qloop_5 = _mm256_add_pd(tmp_qloop_2,tmp_qloop_4); + const __m256d tmp_qloop_6 = _mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_sqrt_pd(tmp_qloop_5)); + const __m256d tmp_qloop_13 = _mm256_mul_pd(tmp_qloop_6,_mm256_set_pd(tmp_qloop_12,tmp_qloop_12,tmp_qloop_12,tmp_qloop_12)); + const __m256d tmp_qloop_14 = _mm256_mul_pd(tmp_qloop_13,_mm256_set_pd(tmp_qloop_1,tmp_qloop_1,tmp_qloop_1,tmp_qloop_1)); + const __m256d tmp_qloop_15 = _mm256_div_pd(_mm256_sqrt_pd(tmp_qloop_5),_mm256_mul_pd(tmp_qloop_5,tmp_qloop_5)); + const __m256d tmp_qloop_16 = _mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(rayVertex_0,rayVertex_0,rayVertex_0,rayVertex_0)),tmp_qloop_0),_mm256_set_pd(tmp_qloop_1,tmp_qloop_1,tmp_qloop_1,tmp_qloop_1)),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(rayVertex_1,rayVertex_1,rayVertex_1,rayVertex_1)),tmp_qloop_3),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_8,tmp_qloop_8,tmp_qloop_8,tmp_qloop_8))),_mm256_set_pd(tmp_qloop_11,tmp_qloop_11,tmp_qloop_11,tmp_qloop_11)),_mm256_set_pd(radRayVertex,radRayVertex,radRayVertex,radRayVertex)); + const __m256d tmp_qloop_17 = _mm256_mul_pd(tmp_qloop_15,tmp_qloop_16); + const __m256d tmp_qloop_18 = _mm256_mul_pd(tmp_qloop_17,_mm256_set_pd(1.0,1.0,1.0,1.0)); + const __m256d tmp_qloop_19 = _mm256_mul_pd(tmp_qloop_13,_mm256_set_pd(tmp_qloop_8,tmp_qloop_8,tmp_qloop_8,tmp_qloop_8)); + const __m256d tmp_qloop_20 = _mm256_mul_pd(tmp_qloop_0,tmp_qloop_18); + const __m256d tmp_qloop_23 = _mm256_mul_pd(tmp_qloop_15,_mm256_set_pd(tmp_qloop_12,tmp_qloop_12,tmp_qloop_12,tmp_qloop_12)); + const __m256d tmp_qloop_24 = _mm256_mul_pd(tmp_qloop_23,_mm256_set_pd(tmp_qloop_1,tmp_qloop_1,tmp_qloop_1,tmp_qloop_1)); + const __m256d tmp_qloop_25 = _mm256_mul_pd(tmp_qloop_2,tmp_qloop_24); + const __m256d tmp_qloop_26 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_16,_mm256_div_pd(_mm256_sqrt_pd(tmp_qloop_5),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_5,tmp_qloop_5),tmp_qloop_5))),_mm256_set_pd(3.0,3.0,3.0,3.0)); + const __m256d tmp_qloop_27 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_0,tmp_qloop_26),tmp_qloop_4); + const __m256d tmp_qloop_28 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_27,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_24,tmp_qloop_4)); + const __m256d tmp_qloop_29 = _mm256_mul_pd(tmp_qloop_23,_mm256_set_pd(tmp_qloop_7,tmp_qloop_7,tmp_qloop_7,tmp_qloop_7)); + const __m256d tmp_qloop_30 = _mm256_mul_pd(tmp_qloop_0,tmp_qloop_3); + const __m256d tmp_qloop_31 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_30); + const __m256d tmp_qloop_32 = _mm256_mul_pd(tmp_qloop_17,_mm256_set_pd(2.0,2.0,2.0,2.0)); + const __m256d tmp_qloop_33 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_2,tmp_qloop_26),tmp_qloop_3); + const __m256d tmp_qloop_34 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_33,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_18,tmp_qloop_3)); + const __m256d tmp_qloop_35 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_30); + const __m256d tmp_qloop_36 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_23,tmp_qloop_30),_mm256_set_pd(tmp_qloop_8,tmp_qloop_8,tmp_qloop_8,tmp_qloop_8)); + const __m256d tmp_qloop_37 = _mm256_mul_pd(_mm256_set_pd(4.0,4.0,4.0,4.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])); + const __m256d tmp_qloop_38 = _mm256_mul_pd(_mm256_set_pd(4.0,4.0,4.0,4.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])); + const __m256d tmp_qloop_39 = _mm256_add_pd(_mm256_add_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),tmp_qloop_37),tmp_qloop_38); + const __m256d tmp_qloop_40 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_39,_mm256_set_pd(jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE)),_mm256_mul_pd(tmp_qloop_39,_mm256_set_pd(jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE))); + const __m256d tmp_qloop_41 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_39,_mm256_set_pd(jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE)),_mm256_mul_pd(tmp_qloop_39,_mm256_set_pd(jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE))); + const __m256d tmp_qloop_42 = _mm256_mul_pd(tmp_qloop_37,_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])); + const __m256d tmp_qloop_43 = _mm256_mul_pd(_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])); + const __m256d tmp_qloop_44 = _mm256_mul_pd(tmp_qloop_43,_mm256_set_pd(2.0,2.0,2.0,2.0)); + const __m256d tmp_qloop_45 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),tmp_qloop_44); + const __m256d tmp_qloop_46 = _mm256_mul_pd(_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])); + const __m256d tmp_qloop_47 = _mm256_mul_pd(tmp_qloop_46,_mm256_set_pd(2.0,2.0,2.0,2.0)); + const __m256d tmp_qloop_48 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])),tmp_qloop_47); + const __m256d tmp_qloop_49 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_46,_mm256_set_pd(-4.0,-4.0,-4.0,-4.0)),_mm256_mul_pd(tmp_qloop_42,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_38); + const __m256d tmp_qloop_50 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_43,_mm256_set_pd(-4.0,-4.0,-4.0,-4.0)),_mm256_mul_pd(tmp_qloop_42,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_37); + const __m256d tmp_qloop_51 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),tmp_qloop_42),tmp_qloop_44),tmp_qloop_47); + const __m256d tmp_qloop_52 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_42,ux_dof_3),_mm256_mul_pd(tmp_qloop_45,ux_dof_1)),_mm256_mul_pd(tmp_qloop_48,ux_dof_2)),_mm256_mul_pd(tmp_qloop_49,ux_dof_4)),_mm256_mul_pd(tmp_qloop_50,ux_dof_5)),_mm256_mul_pd(tmp_qloop_51,ux_dof_0)); + const __m256d tmp_qloop_53 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_42,uy_dof_3),_mm256_mul_pd(tmp_qloop_45,uy_dof_1)),_mm256_mul_pd(tmp_qloop_48,uy_dof_2)),_mm256_mul_pd(tmp_qloop_49,uy_dof_4)),_mm256_mul_pd(tmp_qloop_50,uy_dof_5)),_mm256_mul_pd(tmp_qloop_51,uy_dof_0)); + const __m256d tmp_qloop_57 = _mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),tmp_qloop_37); + const __m256d tmp_qloop_58 = _mm256_mul_pd(tmp_qloop_57,_mm256_set_pd(jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE)); + const __m256d tmp_qloop_59 = _mm256_mul_pd(tmp_qloop_57,_mm256_set_pd(jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE)); + const __m256d tmp_qloop_61 = _mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),tmp_qloop_38); + const __m256d tmp_qloop_62 = _mm256_mul_pd(tmp_qloop_61,_mm256_set_pd(jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE)); + const __m256d tmp_qloop_63 = _mm256_mul_pd(tmp_qloop_61,_mm256_set_pd(jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE)); + const __m256d tmp_qloop_65 = _mm256_mul_pd(tmp_qloop_37,_mm256_set_pd(jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE)); + const __m256d tmp_qloop_66 = _mm256_mul_pd(tmp_qloop_38,_mm256_set_pd(jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE)); + const __m256d tmp_qloop_67 = _mm256_add_pd(tmp_qloop_65,tmp_qloop_66); + const __m256d tmp_qloop_68 = _mm256_mul_pd(tmp_qloop_37,_mm256_set_pd(jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE)); + const __m256d tmp_qloop_69 = _mm256_mul_pd(tmp_qloop_38,_mm256_set_pd(jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE)); + const __m256d tmp_qloop_70 = _mm256_add_pd(tmp_qloop_68,tmp_qloop_69); + const __m256d tmp_qloop_72 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-8.0,-8.0,-8.0,-8.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])),_mm256_mul_pd(tmp_qloop_37,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_set_pd(4.0,4.0,4.0,4.0)); + const __m256d tmp_qloop_73 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_66,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_72,_mm256_set_pd(jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE))); + const __m256d tmp_qloop_74 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_69,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_72,_mm256_set_pd(jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE))); + const __m256d tmp_qloop_76 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-8.0,-8.0,-8.0,-8.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(tmp_qloop_38,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_set_pd(4.0,4.0,4.0,4.0)); + const __m256d tmp_qloop_77 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_65,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_76,_mm256_set_pd(jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE))); + const __m256d tmp_qloop_78 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_68,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_76,_mm256_set_pd(jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE))); + const __m256d jac_blending_0_0 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_0,tmp_qloop_14),_mm256_mul_pd(tmp_qloop_18,tmp_qloop_4)); + const __m256d jac_blending_0_1 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_0,tmp_qloop_19),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_0,tmp_qloop_15),tmp_qloop_16),tmp_qloop_3),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))); + const __m256d jac_blending_1_0 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_14,tmp_qloop_3),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_20,tmp_qloop_3),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))); + const __m256d jac_blending_1_1 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_19,tmp_qloop_3),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_15,tmp_qloop_16),tmp_qloop_2),_mm256_set_pd(1.0,1.0,1.0,1.0))); + const __m256d tmp_qloop_21 = _mm256_add_pd(_mm256_mul_pd(jac_blending_0_0,jac_blending_1_1),_mm256_mul_pd(_mm256_mul_pd(jac_blending_0_1,jac_blending_1_0),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))); + const __m256d tmp_qloop_22 = _mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),tmp_qloop_21); + const __m256d abs_det_jac_blending = tmp_qloop_21; + const __m256d tmp_qloop_55 = _mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(abs_det_jac_blending,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(cp_dof_0,tmp_qloop_51),_mm256_mul_pd(cp_dof_1,tmp_qloop_45)),_mm256_mul_pd(cp_dof_2,tmp_qloop_48)),_mm256_mul_pd(cp_dof_3,tmp_qloop_42)),_mm256_mul_pd(cp_dof_4,tmp_qloop_49)),_mm256_mul_pd(cp_dof_5,tmp_qloop_50))),_mm256_set_pd(_data_q_w[q],_data_q_w[q],_data_q_w[q],_data_q_w[q])),_mm256_set_pd(abs_det_jac_affine_BLUE,abs_det_jac_affine_BLUE,abs_det_jac_affine_BLUE,abs_det_jac_affine_BLUE)); + const __m256d tmp_qloop_56 = _mm256_mul_pd(tmp_qloop_51,tmp_qloop_55); + const __m256d tmp_qloop_80 = _mm256_mul_pd(tmp_qloop_45,tmp_qloop_55); + const __m256d tmp_qloop_81 = _mm256_mul_pd(tmp_qloop_48,tmp_qloop_55); + const __m256d tmp_qloop_82 = _mm256_mul_pd(tmp_qloop_42,tmp_qloop_55); + const __m256d tmp_qloop_83 = _mm256_mul_pd(tmp_qloop_49,tmp_qloop_55); + const __m256d tmp_qloop_84 = _mm256_mul_pd(tmp_qloop_50,tmp_qloop_55); + const __m256d jac_blending_inv_0_0 = _mm256_mul_pd(jac_blending_1_1,tmp_qloop_22); + const __m256d jac_blending_inv_0_1 = _mm256_mul_pd(_mm256_mul_pd(jac_blending_0_1,tmp_qloop_22),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)); + const __m256d jac_blending_inv_1_0 = _mm256_mul_pd(_mm256_mul_pd(jac_blending_1_0,tmp_qloop_22),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)); + const __m256d jac_blending_inv_1_1 = _mm256_mul_pd(jac_blending_0_0,tmp_qloop_22); + const __m256d tmp_qloop_54 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_52,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,tmp_qloop_40),_mm256_mul_pd(jac_blending_inv_1_0,tmp_qloop_41))),_mm256_mul_pd(tmp_qloop_53,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,tmp_qloop_40),_mm256_mul_pd(jac_blending_inv_1_1,tmp_qloop_41)))); + const __m256d tmp_qloop_60 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_52,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,tmp_qloop_58),_mm256_mul_pd(jac_blending_inv_1_0,tmp_qloop_59))),_mm256_mul_pd(tmp_qloop_53,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,tmp_qloop_58),_mm256_mul_pd(jac_blending_inv_1_1,tmp_qloop_59)))); + const __m256d tmp_qloop_64 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_52,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,tmp_qloop_62),_mm256_mul_pd(jac_blending_inv_1_0,tmp_qloop_63))),_mm256_mul_pd(tmp_qloop_53,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,tmp_qloop_62),_mm256_mul_pd(jac_blending_inv_1_1,tmp_qloop_63)))); + const __m256d tmp_qloop_71 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_52,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,tmp_qloop_67),_mm256_mul_pd(jac_blending_inv_1_0,tmp_qloop_70))),_mm256_mul_pd(tmp_qloop_53,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,tmp_qloop_67),_mm256_mul_pd(jac_blending_inv_1_1,tmp_qloop_70)))); + const __m256d tmp_qloop_75 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_52,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,tmp_qloop_73),_mm256_mul_pd(jac_blending_inv_1_0,tmp_qloop_74))),_mm256_mul_pd(tmp_qloop_53,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,tmp_qloop_73),_mm256_mul_pd(jac_blending_inv_1_1,tmp_qloop_74)))); + const __m256d tmp_qloop_79 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_52,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,tmp_qloop_77),_mm256_mul_pd(jac_blending_inv_1_0,tmp_qloop_78))),_mm256_mul_pd(tmp_qloop_53,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,tmp_qloop_77),_mm256_mul_pd(jac_blending_inv_1_1,tmp_qloop_78)))); + const __m256d hessian_blending_0_0_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_25,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),tmp_qloop_14),tmp_qloop_28); + const __m256d hessian_blending_1_0_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_31,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_29,tmp_qloop_4)),_mm256_mul_pd(tmp_qloop_3,tmp_qloop_32)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_26,_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_3,tmp_qloop_3),tmp_qloop_3)),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))); + const __m256d hessian_blending_0_0_1 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_34,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_15,tmp_qloop_30),_mm256_set_pd(-2.0,-2.0,-2.0,-2.0)),_mm256_set_pd(tmp_qloop_1,tmp_qloop_1,tmp_qloop_1,tmp_qloop_1)),_mm256_set_pd(tmp_qloop_11,tmp_qloop_11,tmp_qloop_11,tmp_qloop_11))); + const __m256d hessian_blending_1_0_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_20,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_28,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(tmp_qloop_35,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_6,_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_1,tmp_qloop_1,tmp_qloop_1,tmp_qloop_1)),_mm256_set_pd(tmp_qloop_10,tmp_qloop_10,tmp_qloop_10,tmp_qloop_10)),_mm256_set_pd(tmp_qloop_9,tmp_qloop_9,tmp_qloop_9,tmp_qloop_9))); + const __m256d hessian_blending_0_1_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_19,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_31,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(tmp_qloop_34,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_15,tmp_qloop_2),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_10,tmp_qloop_10,tmp_qloop_10,tmp_qloop_10)),_mm256_set_pd(tmp_qloop_8,tmp_qloop_8,tmp_qloop_8,tmp_qloop_8)),_mm256_set_pd(tmp_qloop_9,tmp_qloop_9,tmp_qloop_9,tmp_qloop_9))); + const __m256d hessian_blending_1_1_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_20,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_35,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_27),tmp_qloop_36); + const __m256d hessian_blending_0_1_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_0,tmp_qloop_32),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_0,tmp_qloop_0),tmp_qloop_0),tmp_qloop_26),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_25),tmp_qloop_36); + const __m256d hessian_blending_1_1_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_19,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_33,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_15,tmp_qloop_2),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_10,tmp_qloop_10,tmp_qloop_10,tmp_qloop_10)),_mm256_set_pd(tmp_qloop_7,tmp_qloop_7,tmp_qloop_7,tmp_qloop_7)),_mm256_set_pd(tmp_qloop_9,tmp_qloop_9,tmp_qloop_9,tmp_qloop_9))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_15,tmp_qloop_4),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_10,tmp_qloop_10,tmp_qloop_10,tmp_qloop_10)),_mm256_set_pd(tmp_qloop_8,tmp_qloop_8,tmp_qloop_8,tmp_qloop_8)),_mm256_set_pd(tmp_qloop_9,tmp_qloop_9,tmp_qloop_9,tmp_qloop_9))); + const __m256d q_tmp_0_0 = _mm256_mul_pd(tmp_qloop_54,tmp_qloop_56); + const __m256d q_tmp_0_1 = _mm256_mul_pd(tmp_qloop_56,tmp_qloop_60); + const __m256d q_tmp_0_2 = _mm256_mul_pd(tmp_qloop_56,tmp_qloop_64); + const __m256d q_tmp_0_3 = _mm256_mul_pd(tmp_qloop_56,tmp_qloop_71); + const __m256d q_tmp_0_4 = _mm256_mul_pd(tmp_qloop_56,tmp_qloop_75); + const __m256d q_tmp_0_5 = _mm256_mul_pd(tmp_qloop_56,tmp_qloop_79); + const __m256d q_tmp_1_0 = _mm256_mul_pd(tmp_qloop_54,tmp_qloop_80); + const __m256d q_tmp_1_1 = _mm256_mul_pd(tmp_qloop_60,tmp_qloop_80); + const __m256d q_tmp_1_2 = _mm256_mul_pd(tmp_qloop_64,tmp_qloop_80); + const __m256d q_tmp_1_3 = _mm256_mul_pd(tmp_qloop_71,tmp_qloop_80); + const __m256d q_tmp_1_4 = _mm256_mul_pd(tmp_qloop_75,tmp_qloop_80); + const __m256d q_tmp_1_5 = _mm256_mul_pd(tmp_qloop_79,tmp_qloop_80); + const __m256d q_tmp_2_0 = _mm256_mul_pd(tmp_qloop_54,tmp_qloop_81); + const __m256d q_tmp_2_1 = _mm256_mul_pd(tmp_qloop_60,tmp_qloop_81); + const __m256d q_tmp_2_2 = _mm256_mul_pd(tmp_qloop_64,tmp_qloop_81); + const __m256d q_tmp_2_3 = _mm256_mul_pd(tmp_qloop_71,tmp_qloop_81); + const __m256d q_tmp_2_4 = _mm256_mul_pd(tmp_qloop_75,tmp_qloop_81); + const __m256d q_tmp_2_5 = _mm256_mul_pd(tmp_qloop_79,tmp_qloop_81); + const __m256d q_tmp_3_0 = _mm256_mul_pd(tmp_qloop_54,tmp_qloop_82); + const __m256d q_tmp_3_1 = _mm256_mul_pd(tmp_qloop_60,tmp_qloop_82); + const __m256d q_tmp_3_2 = _mm256_mul_pd(tmp_qloop_64,tmp_qloop_82); + const __m256d q_tmp_3_3 = _mm256_mul_pd(tmp_qloop_71,tmp_qloop_82); + const __m256d q_tmp_3_4 = _mm256_mul_pd(tmp_qloop_75,tmp_qloop_82); + const __m256d q_tmp_3_5 = _mm256_mul_pd(tmp_qloop_79,tmp_qloop_82); + const __m256d q_tmp_4_0 = _mm256_mul_pd(tmp_qloop_54,tmp_qloop_83); + const __m256d q_tmp_4_1 = _mm256_mul_pd(tmp_qloop_60,tmp_qloop_83); + const __m256d q_tmp_4_2 = _mm256_mul_pd(tmp_qloop_64,tmp_qloop_83); + const __m256d q_tmp_4_3 = _mm256_mul_pd(tmp_qloop_71,tmp_qloop_83); + const __m256d q_tmp_4_4 = _mm256_mul_pd(tmp_qloop_75,tmp_qloop_83); + const __m256d q_tmp_4_5 = _mm256_mul_pd(tmp_qloop_79,tmp_qloop_83); + const __m256d q_tmp_5_0 = _mm256_mul_pd(tmp_qloop_54,tmp_qloop_84); + const __m256d q_tmp_5_1 = _mm256_mul_pd(tmp_qloop_60,tmp_qloop_84); + const __m256d q_tmp_5_2 = _mm256_mul_pd(tmp_qloop_64,tmp_qloop_84); + const __m256d q_tmp_5_3 = _mm256_mul_pd(tmp_qloop_71,tmp_qloop_84); + const __m256d q_tmp_5_4 = _mm256_mul_pd(tmp_qloop_75,tmp_qloop_84); + const __m256d q_tmp_5_5 = _mm256_mul_pd(tmp_qloop_79,tmp_qloop_84); + q_acc_0_0 = _mm256_add_pd(q_acc_0_0,q_tmp_0_0); + q_acc_0_1 = _mm256_add_pd(q_acc_0_1,q_tmp_0_1); + q_acc_0_2 = _mm256_add_pd(q_acc_0_2,q_tmp_0_2); + q_acc_0_3 = _mm256_add_pd(q_acc_0_3,q_tmp_0_3); + q_acc_0_4 = _mm256_add_pd(q_acc_0_4,q_tmp_0_4); + q_acc_0_5 = _mm256_add_pd(q_acc_0_5,q_tmp_0_5); + q_acc_1_0 = _mm256_add_pd(q_acc_1_0,q_tmp_1_0); + q_acc_1_1 = _mm256_add_pd(q_acc_1_1,q_tmp_1_1); + q_acc_1_2 = _mm256_add_pd(q_acc_1_2,q_tmp_1_2); + q_acc_1_3 = _mm256_add_pd(q_acc_1_3,q_tmp_1_3); + q_acc_1_4 = _mm256_add_pd(q_acc_1_4,q_tmp_1_4); + q_acc_1_5 = _mm256_add_pd(q_acc_1_5,q_tmp_1_5); + q_acc_2_0 = _mm256_add_pd(q_acc_2_0,q_tmp_2_0); + q_acc_2_1 = _mm256_add_pd(q_acc_2_1,q_tmp_2_1); + q_acc_2_2 = _mm256_add_pd(q_acc_2_2,q_tmp_2_2); + q_acc_2_3 = _mm256_add_pd(q_acc_2_3,q_tmp_2_3); + q_acc_2_4 = _mm256_add_pd(q_acc_2_4,q_tmp_2_4); + q_acc_2_5 = _mm256_add_pd(q_acc_2_5,q_tmp_2_5); + q_acc_3_0 = _mm256_add_pd(q_acc_3_0,q_tmp_3_0); + q_acc_3_1 = _mm256_add_pd(q_acc_3_1,q_tmp_3_1); + q_acc_3_2 = _mm256_add_pd(q_acc_3_2,q_tmp_3_2); + q_acc_3_3 = _mm256_add_pd(q_acc_3_3,q_tmp_3_3); + q_acc_3_4 = _mm256_add_pd(q_acc_3_4,q_tmp_3_4); + q_acc_3_5 = _mm256_add_pd(q_acc_3_5,q_tmp_3_5); + q_acc_4_0 = _mm256_add_pd(q_acc_4_0,q_tmp_4_0); + q_acc_4_1 = _mm256_add_pd(q_acc_4_1,q_tmp_4_1); + q_acc_4_2 = _mm256_add_pd(q_acc_4_2,q_tmp_4_2); + q_acc_4_3 = _mm256_add_pd(q_acc_4_3,q_tmp_4_3); + q_acc_4_4 = _mm256_add_pd(q_acc_4_4,q_tmp_4_4); + q_acc_4_5 = _mm256_add_pd(q_acc_4_5,q_tmp_4_5); + q_acc_5_0 = _mm256_add_pd(q_acc_5_0,q_tmp_5_0); + q_acc_5_1 = _mm256_add_pd(q_acc_5_1,q_tmp_5_1); + q_acc_5_2 = _mm256_add_pd(q_acc_5_2,q_tmp_5_2); + q_acc_5_3 = _mm256_add_pd(q_acc_5_3,q_tmp_5_3); + q_acc_5_4 = _mm256_add_pd(q_acc_5_4,q_tmp_5_4); + q_acc_5_5 = _mm256_add_pd(q_acc_5_5,q_tmp_5_5); + } + const __m256d elMatVec_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_0_0,src_dof_0),_mm256_mul_pd(q_acc_0_1,src_dof_1)),_mm256_mul_pd(q_acc_0_2,src_dof_2)),_mm256_mul_pd(q_acc_0_3,src_dof_3)),_mm256_mul_pd(q_acc_0_4,src_dof_4)),_mm256_mul_pd(q_acc_0_5,src_dof_5)); + const __m256d elMatVec_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_1_0,src_dof_0),_mm256_mul_pd(q_acc_1_1,src_dof_1)),_mm256_mul_pd(q_acc_1_2,src_dof_2)),_mm256_mul_pd(q_acc_1_3,src_dof_3)),_mm256_mul_pd(q_acc_1_4,src_dof_4)),_mm256_mul_pd(q_acc_1_5,src_dof_5)); + const __m256d elMatVec_2 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_2_0,src_dof_0),_mm256_mul_pd(q_acc_2_1,src_dof_1)),_mm256_mul_pd(q_acc_2_2,src_dof_2)),_mm256_mul_pd(q_acc_2_3,src_dof_3)),_mm256_mul_pd(q_acc_2_4,src_dof_4)),_mm256_mul_pd(q_acc_2_5,src_dof_5)); + const __m256d elMatVec_3 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_3_0,src_dof_0),_mm256_mul_pd(q_acc_3_1,src_dof_1)),_mm256_mul_pd(q_acc_3_2,src_dof_2)),_mm256_mul_pd(q_acc_3_3,src_dof_3)),_mm256_mul_pd(q_acc_3_4,src_dof_4)),_mm256_mul_pd(q_acc_3_5,src_dof_5)); + const __m256d elMatVec_4 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_4_0,src_dof_0),_mm256_mul_pd(q_acc_4_1,src_dof_1)),_mm256_mul_pd(q_acc_4_2,src_dof_2)),_mm256_mul_pd(q_acc_4_3,src_dof_3)),_mm256_mul_pd(q_acc_4_4,src_dof_4)),_mm256_mul_pd(q_acc_4_5,src_dof_5)); + const __m256d elMatVec_5 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_5_0,src_dof_0),_mm256_mul_pd(q_acc_5_1,src_dof_1)),_mm256_mul_pd(q_acc_5_2,src_dof_2)),_mm256_mul_pd(q_acc_5_3,src_dof_3)),_mm256_mul_pd(q_acc_5_4,src_dof_4)),_mm256_mul_pd(q_acc_5_5,src_dof_5)); + _mm256_storeu_pd(&_data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1],_mm256_add_pd(elMatVec_0,_mm256_loadu_pd(& _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]))); + _mm256_storeu_pd(&_data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))],_mm256_add_pd(elMatVec_1,_mm256_loadu_pd(& _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]))); + _mm256_storeu_pd(&_data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1],_mm256_add_pd(elMatVec_2,_mm256_loadu_pd(& _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]))); + _mm256_storeu_pd(&_data_dstEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))],_mm256_add_pd(elMatVec_3,_mm256_loadu_pd(& _data_dstEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]))); + _mm256_storeu_pd(&_data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1],_mm256_add_pd(elMatVec_4,_mm256_loadu_pd(& _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]))); + _mm256_storeu_pd(&_data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))],_mm256_add_pd(elMatVec_5,_mm256_loadu_pd(& _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]))); + } + for (int64_t ctr_0 = (int64_t)((-ctr_1 + micro_edges_per_macro_edge - 1) / (4)) * (4); ctr_0 < -ctr_1 + micro_edges_per_macro_edge - 1; ctr_0 += 1) + { + + const int64_t phantom_ctr_0 = ctr_0; + real_t _data_float_loop_ctr_array_dim_0[4]; + _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0; + _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1; + _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2; + _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3; + real_t _data_float_loop_ctr_array_dim_1[4]; + _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1; + + const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; + const real_t src_dof_0 = _data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + const real_t src_dof_1 = _data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t src_dof_2 = _data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]; + const real_t src_dof_3 = _data_srcEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t src_dof_4 = _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]; + const real_t src_dof_5 = _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t cp_dof_0 = _data_cpVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + const real_t cp_dof_1 = _data_cpVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t cp_dof_2 = _data_cpVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]; + const real_t cp_dof_3 = _data_cpEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t cp_dof_4 = _data_cpEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]; + const real_t cp_dof_5 = _data_cpEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t ux_dof_0 = _data_uxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + const real_t ux_dof_1 = _data_uxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t ux_dof_2 = _data_uxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]; + const real_t ux_dof_3 = _data_uxEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t ux_dof_4 = _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]; + const real_t ux_dof_5 = _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t uy_dof_0 = _data_uyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + const real_t uy_dof_1 = _data_uyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t uy_dof_2 = _data_uyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]; + const real_t uy_dof_3 = _data_uyEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t uy_dof_4 = _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]; + const real_t uy_dof_5 = _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + real_t q_acc_0_0 = 0.0; + real_t q_acc_0_1 = 0.0; + real_t q_acc_0_2 = 0.0; + real_t q_acc_0_3 = 0.0; + real_t q_acc_0_4 = 0.0; + real_t q_acc_0_5 = 0.0; + real_t q_acc_1_0 = 0.0; + real_t q_acc_1_1 = 0.0; + real_t q_acc_1_2 = 0.0; + real_t q_acc_1_3 = 0.0; + real_t q_acc_1_4 = 0.0; + real_t q_acc_1_5 = 0.0; + real_t q_acc_2_0 = 0.0; + real_t q_acc_2_1 = 0.0; + real_t q_acc_2_2 = 0.0; + real_t q_acc_2_3 = 0.0; + real_t q_acc_2_4 = 0.0; + real_t q_acc_2_5 = 0.0; + real_t q_acc_3_0 = 0.0; + real_t q_acc_3_1 = 0.0; + real_t q_acc_3_2 = 0.0; + real_t q_acc_3_3 = 0.0; + real_t q_acc_3_4 = 0.0; + real_t q_acc_3_5 = 0.0; + real_t q_acc_4_0 = 0.0; + real_t q_acc_4_1 = 0.0; + real_t q_acc_4_2 = 0.0; + real_t q_acc_4_3 = 0.0; + real_t q_acc_4_4 = 0.0; + real_t q_acc_4_5 = 0.0; + real_t q_acc_5_0 = 0.0; + real_t q_acc_5_1 = 0.0; + real_t q_acc_5_2 = 0.0; + real_t q_acc_5_3 = 0.0; + real_t q_acc_5_4 = 0.0; + real_t q_acc_5_5 = 0.0; + for (int64_t q = 0; q < 4; q += 1) + { + const real_t tmp_qloop_0 = p_affine_0_0 + (-p_affine_0_0 + p_affine_1_0)*_data_q_p_0[q] + (-p_affine_0_0 + p_affine_2_0)*_data_q_p_1[q]; + const real_t tmp_qloop_2 = (tmp_qloop_0*tmp_qloop_0); + const real_t tmp_qloop_3 = p_affine_0_1 + (-p_affine_0_1 + p_affine_1_1)*_data_q_p_0[q] + (-p_affine_0_1 + p_affine_2_1)*_data_q_p_1[q]; + const real_t tmp_qloop_4 = (tmp_qloop_3*tmp_qloop_3); + const real_t tmp_qloop_5 = tmp_qloop_2 + tmp_qloop_4; + const real_t tmp_qloop_6 = pow(tmp_qloop_5, -0.50000000000000000); + const real_t tmp_qloop_13 = tmp_qloop_12*tmp_qloop_6; + const real_t tmp_qloop_14 = tmp_qloop_1*tmp_qloop_13; + const real_t tmp_qloop_15 = pow(tmp_qloop_5, -1.5000000000000000); + const real_t tmp_qloop_16 = radRayVertex + tmp_qloop_11*(tmp_qloop_1*(-rayVertex_0 + tmp_qloop_0) - tmp_qloop_8*(-rayVertex_1 + tmp_qloop_3)); + const real_t tmp_qloop_17 = tmp_qloop_15*tmp_qloop_16; + const real_t tmp_qloop_18 = tmp_qloop_17*1.0; + const real_t tmp_qloop_19 = tmp_qloop_13*tmp_qloop_8; + const real_t tmp_qloop_20 = tmp_qloop_0*tmp_qloop_18; + const real_t tmp_qloop_23 = tmp_qloop_12*tmp_qloop_15; + const real_t tmp_qloop_24 = tmp_qloop_1*tmp_qloop_23; + const real_t tmp_qloop_25 = tmp_qloop_2*tmp_qloop_24; + const real_t tmp_qloop_26 = tmp_qloop_16*pow(tmp_qloop_5, -2.5000000000000000)*3.0; + const real_t tmp_qloop_27 = tmp_qloop_0*tmp_qloop_26*tmp_qloop_4; + const real_t tmp_qloop_28 = tmp_qloop_24*tmp_qloop_4 - tmp_qloop_27; + const real_t tmp_qloop_29 = tmp_qloop_23*tmp_qloop_7; + const real_t tmp_qloop_30 = tmp_qloop_0*tmp_qloop_3; + const real_t tmp_qloop_31 = tmp_qloop_24*tmp_qloop_30; + const real_t tmp_qloop_32 = tmp_qloop_17*2.0; + const real_t tmp_qloop_33 = tmp_qloop_2*tmp_qloop_26*tmp_qloop_3; + const real_t tmp_qloop_34 = tmp_qloop_18*tmp_qloop_3 - tmp_qloop_33; + const real_t tmp_qloop_35 = tmp_qloop_29*tmp_qloop_30; + const real_t tmp_qloop_36 = tmp_qloop_23*tmp_qloop_30*tmp_qloop_8; + const real_t tmp_qloop_37 = 4.0*_data_q_p_0[q]; + const real_t tmp_qloop_38 = 4.0*_data_q_p_1[q]; + const real_t tmp_qloop_39 = tmp_qloop_37 + tmp_qloop_38 - 3.0; + const real_t tmp_qloop_40 = jac_affine_inv_0_0_BLUE*tmp_qloop_39 + jac_affine_inv_1_0_BLUE*tmp_qloop_39; + const real_t tmp_qloop_41 = jac_affine_inv_0_1_BLUE*tmp_qloop_39 + jac_affine_inv_1_1_BLUE*tmp_qloop_39; + const real_t tmp_qloop_42 = tmp_qloop_37*_data_q_p_1[q]; + const real_t tmp_qloop_43 = (_data_q_p_0[q]*_data_q_p_0[q]); + const real_t tmp_qloop_44 = tmp_qloop_43*2.0; + const real_t tmp_qloop_45 = tmp_qloop_44 - _data_q_p_0[q]; + const real_t tmp_qloop_46 = (_data_q_p_1[q]*_data_q_p_1[q]); + const real_t tmp_qloop_47 = tmp_qloop_46*2.0; + const real_t tmp_qloop_48 = tmp_qloop_47 - _data_q_p_1[q]; + const real_t tmp_qloop_49 = tmp_qloop_38 - tmp_qloop_42 + tmp_qloop_46*-4.0; + const real_t tmp_qloop_50 = tmp_qloop_37 - tmp_qloop_42 + tmp_qloop_43*-4.0; + const real_t tmp_qloop_51 = tmp_qloop_42 + tmp_qloop_44 + tmp_qloop_47 - 3.0*_data_q_p_0[q] - 3.0*_data_q_p_1[q] + 1.0; + const real_t tmp_qloop_52 = tmp_qloop_42*ux_dof_3 + tmp_qloop_45*ux_dof_1 + tmp_qloop_48*ux_dof_2 + tmp_qloop_49*ux_dof_4 + tmp_qloop_50*ux_dof_5 + tmp_qloop_51*ux_dof_0; + const real_t tmp_qloop_53 = tmp_qloop_42*uy_dof_3 + tmp_qloop_45*uy_dof_1 + tmp_qloop_48*uy_dof_2 + tmp_qloop_49*uy_dof_4 + tmp_qloop_50*uy_dof_5 + tmp_qloop_51*uy_dof_0; + const real_t tmp_qloop_57 = tmp_qloop_37 - 1.0; + const real_t tmp_qloop_58 = jac_affine_inv_0_0_BLUE*tmp_qloop_57; + const real_t tmp_qloop_59 = jac_affine_inv_0_1_BLUE*tmp_qloop_57; + const real_t tmp_qloop_61 = tmp_qloop_38 - 1.0; + const real_t tmp_qloop_62 = jac_affine_inv_1_0_BLUE*tmp_qloop_61; + const real_t tmp_qloop_63 = jac_affine_inv_1_1_BLUE*tmp_qloop_61; + const real_t tmp_qloop_65 = jac_affine_inv_1_0_BLUE*tmp_qloop_37; + const real_t tmp_qloop_66 = jac_affine_inv_0_0_BLUE*tmp_qloop_38; + const real_t tmp_qloop_67 = tmp_qloop_65 + tmp_qloop_66; + const real_t tmp_qloop_68 = jac_affine_inv_1_1_BLUE*tmp_qloop_37; + const real_t tmp_qloop_69 = jac_affine_inv_0_1_BLUE*tmp_qloop_38; + const real_t tmp_qloop_70 = tmp_qloop_68 + tmp_qloop_69; + const real_t tmp_qloop_72 = -tmp_qloop_37 - 8.0*_data_q_p_1[q] + 4.0; + const real_t tmp_qloop_73 = jac_affine_inv_1_0_BLUE*tmp_qloop_72 - tmp_qloop_66; + const real_t tmp_qloop_74 = jac_affine_inv_1_1_BLUE*tmp_qloop_72 - tmp_qloop_69; + const real_t tmp_qloop_76 = -tmp_qloop_38 - 8.0*_data_q_p_0[q] + 4.0; + const real_t tmp_qloop_77 = jac_affine_inv_0_0_BLUE*tmp_qloop_76 - tmp_qloop_65; + const real_t tmp_qloop_78 = jac_affine_inv_0_1_BLUE*tmp_qloop_76 - tmp_qloop_68; + const real_t jac_blending_0_0 = tmp_qloop_0*tmp_qloop_14 + tmp_qloop_18*tmp_qloop_4; + const real_t jac_blending_0_1 = -tmp_qloop_0*tmp_qloop_15*tmp_qloop_16*tmp_qloop_3 - tmp_qloop_0*tmp_qloop_19; + const real_t jac_blending_1_0 = tmp_qloop_14*tmp_qloop_3 - tmp_qloop_20*tmp_qloop_3; + const real_t jac_blending_1_1 = tmp_qloop_15*tmp_qloop_16*tmp_qloop_2*1.0 - tmp_qloop_19*tmp_qloop_3; + const real_t tmp_qloop_21 = jac_blending_0_0*jac_blending_1_1 - jac_blending_0_1*jac_blending_1_0; + const real_t tmp_qloop_22 = 1.0 / (tmp_qloop_21); + const real_t abs_det_jac_blending = tmp_qloop_21; + const real_t tmp_qloop_55 = abs_det_jac_affine_BLUE*abs_det_jac_blending*(cp_dof_0*tmp_qloop_51 + cp_dof_1*tmp_qloop_45 + cp_dof_2*tmp_qloop_48 + cp_dof_3*tmp_qloop_42 + cp_dof_4*tmp_qloop_49 + cp_dof_5*tmp_qloop_50)*_data_q_w[q]; + const real_t tmp_qloop_56 = tmp_qloop_51*tmp_qloop_55; + const real_t tmp_qloop_80 = tmp_qloop_45*tmp_qloop_55; + const real_t tmp_qloop_81 = tmp_qloop_48*tmp_qloop_55; + const real_t tmp_qloop_82 = tmp_qloop_42*tmp_qloop_55; + const real_t tmp_qloop_83 = tmp_qloop_49*tmp_qloop_55; + const real_t tmp_qloop_84 = tmp_qloop_50*tmp_qloop_55; + const real_t jac_blending_inv_0_0 = jac_blending_1_1*tmp_qloop_22; + const real_t jac_blending_inv_0_1 = -jac_blending_0_1*tmp_qloop_22; + const real_t jac_blending_inv_1_0 = -jac_blending_1_0*tmp_qloop_22; + const real_t jac_blending_inv_1_1 = jac_blending_0_0*tmp_qloop_22; + const real_t tmp_qloop_54 = tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_40 + jac_blending_inv_1_0*tmp_qloop_41) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_40 + jac_blending_inv_1_1*tmp_qloop_41); + const real_t tmp_qloop_60 = tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_58 + jac_blending_inv_1_0*tmp_qloop_59) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_58 + jac_blending_inv_1_1*tmp_qloop_59); + const real_t tmp_qloop_64 = tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_62 + jac_blending_inv_1_0*tmp_qloop_63) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_62 + jac_blending_inv_1_1*tmp_qloop_63); + const real_t tmp_qloop_71 = tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_67 + jac_blending_inv_1_0*tmp_qloop_70) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_67 + jac_blending_inv_1_1*tmp_qloop_70); + const real_t tmp_qloop_75 = tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_73 + jac_blending_inv_1_0*tmp_qloop_74) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_73 + jac_blending_inv_1_1*tmp_qloop_74); + const real_t tmp_qloop_79 = tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_77 + jac_blending_inv_1_0*tmp_qloop_78) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_77 + jac_blending_inv_1_1*tmp_qloop_78); + const real_t hessian_blending_0_0_0 = tmp_qloop_14 - tmp_qloop_25 + tmp_qloop_28; + const real_t hessian_blending_1_0_0 = -tmp_qloop_26*(tmp_qloop_3*tmp_qloop_3*tmp_qloop_3) + tmp_qloop_29*tmp_qloop_4 + tmp_qloop_3*tmp_qloop_32 - tmp_qloop_31; + const real_t hessian_blending_0_0_1 = tmp_qloop_1*tmp_qloop_11*tmp_qloop_15*tmp_qloop_30*-2.0 - tmp_qloop_34; + const real_t hessian_blending_1_0_1 = tmp_qloop_1*tmp_qloop_10*tmp_qloop_6*tmp_qloop_9*1.0 - tmp_qloop_20 - tmp_qloop_28 - tmp_qloop_35; + const real_t hessian_blending_0_1_0 = tmp_qloop_10*tmp_qloop_15*tmp_qloop_2*tmp_qloop_8*tmp_qloop_9*1.0 - tmp_qloop_19 - tmp_qloop_31 - tmp_qloop_34; + const real_t hessian_blending_1_1_0 = -tmp_qloop_20 + tmp_qloop_27 - tmp_qloop_35 + tmp_qloop_36; + const real_t hessian_blending_0_1_1 = tmp_qloop_0*tmp_qloop_32 - (tmp_qloop_0*tmp_qloop_0*tmp_qloop_0)*tmp_qloop_26 + tmp_qloop_25 + tmp_qloop_36; + const real_t hessian_blending_1_1_1 = tmp_qloop_10*tmp_qloop_15*tmp_qloop_2*tmp_qloop_7*tmp_qloop_9*1.0 + tmp_qloop_10*tmp_qloop_15*tmp_qloop_4*tmp_qloop_8*tmp_qloop_9*1.0 - tmp_qloop_19 - tmp_qloop_33; + const real_t q_tmp_0_0 = tmp_qloop_54*tmp_qloop_56; + const real_t q_tmp_0_1 = tmp_qloop_56*tmp_qloop_60; + const real_t q_tmp_0_2 = tmp_qloop_56*tmp_qloop_64; + const real_t q_tmp_0_3 = tmp_qloop_56*tmp_qloop_71; + const real_t q_tmp_0_4 = tmp_qloop_56*tmp_qloop_75; + const real_t q_tmp_0_5 = tmp_qloop_56*tmp_qloop_79; + const real_t q_tmp_1_0 = tmp_qloop_54*tmp_qloop_80; + const real_t q_tmp_1_1 = tmp_qloop_60*tmp_qloop_80; + const real_t q_tmp_1_2 = tmp_qloop_64*tmp_qloop_80; + const real_t q_tmp_1_3 = tmp_qloop_71*tmp_qloop_80; + const real_t q_tmp_1_4 = tmp_qloop_75*tmp_qloop_80; + const real_t q_tmp_1_5 = tmp_qloop_79*tmp_qloop_80; + const real_t q_tmp_2_0 = tmp_qloop_54*tmp_qloop_81; + const real_t q_tmp_2_1 = tmp_qloop_60*tmp_qloop_81; + const real_t q_tmp_2_2 = tmp_qloop_64*tmp_qloop_81; + const real_t q_tmp_2_3 = tmp_qloop_71*tmp_qloop_81; + const real_t q_tmp_2_4 = tmp_qloop_75*tmp_qloop_81; + const real_t q_tmp_2_5 = tmp_qloop_79*tmp_qloop_81; + const real_t q_tmp_3_0 = tmp_qloop_54*tmp_qloop_82; + const real_t q_tmp_3_1 = tmp_qloop_60*tmp_qloop_82; + const real_t q_tmp_3_2 = tmp_qloop_64*tmp_qloop_82; + const real_t q_tmp_3_3 = tmp_qloop_71*tmp_qloop_82; + const real_t q_tmp_3_4 = tmp_qloop_75*tmp_qloop_82; + const real_t q_tmp_3_5 = tmp_qloop_79*tmp_qloop_82; + const real_t q_tmp_4_0 = tmp_qloop_54*tmp_qloop_83; + const real_t q_tmp_4_1 = tmp_qloop_60*tmp_qloop_83; + const real_t q_tmp_4_2 = tmp_qloop_64*tmp_qloop_83; + const real_t q_tmp_4_3 = tmp_qloop_71*tmp_qloop_83; + const real_t q_tmp_4_4 = tmp_qloop_75*tmp_qloop_83; + const real_t q_tmp_4_5 = tmp_qloop_79*tmp_qloop_83; + const real_t q_tmp_5_0 = tmp_qloop_54*tmp_qloop_84; + const real_t q_tmp_5_1 = tmp_qloop_60*tmp_qloop_84; + const real_t q_tmp_5_2 = tmp_qloop_64*tmp_qloop_84; + const real_t q_tmp_5_3 = tmp_qloop_71*tmp_qloop_84; + const real_t q_tmp_5_4 = tmp_qloop_75*tmp_qloop_84; + const real_t q_tmp_5_5 = tmp_qloop_79*tmp_qloop_84; + q_acc_0_0 = q_acc_0_0 + q_tmp_0_0; + q_acc_0_1 = q_acc_0_1 + q_tmp_0_1; + q_acc_0_2 = q_acc_0_2 + q_tmp_0_2; + q_acc_0_3 = q_acc_0_3 + q_tmp_0_3; + q_acc_0_4 = q_acc_0_4 + q_tmp_0_4; + q_acc_0_5 = q_acc_0_5 + q_tmp_0_5; + q_acc_1_0 = q_acc_1_0 + q_tmp_1_0; + q_acc_1_1 = q_acc_1_1 + q_tmp_1_1; + q_acc_1_2 = q_acc_1_2 + q_tmp_1_2; + q_acc_1_3 = q_acc_1_3 + q_tmp_1_3; + q_acc_1_4 = q_acc_1_4 + q_tmp_1_4; + q_acc_1_5 = q_acc_1_5 + q_tmp_1_5; + q_acc_2_0 = q_acc_2_0 + q_tmp_2_0; + q_acc_2_1 = q_acc_2_1 + q_tmp_2_1; + q_acc_2_2 = q_acc_2_2 + q_tmp_2_2; + q_acc_2_3 = q_acc_2_3 + q_tmp_2_3; + q_acc_2_4 = q_acc_2_4 + q_tmp_2_4; + q_acc_2_5 = q_acc_2_5 + q_tmp_2_5; + q_acc_3_0 = q_acc_3_0 + q_tmp_3_0; + q_acc_3_1 = q_acc_3_1 + q_tmp_3_1; + q_acc_3_2 = q_acc_3_2 + q_tmp_3_2; + q_acc_3_3 = q_acc_3_3 + q_tmp_3_3; + q_acc_3_4 = q_acc_3_4 + q_tmp_3_4; + q_acc_3_5 = q_acc_3_5 + q_tmp_3_5; + q_acc_4_0 = q_acc_4_0 + q_tmp_4_0; + q_acc_4_1 = q_acc_4_1 + q_tmp_4_1; + q_acc_4_2 = q_acc_4_2 + q_tmp_4_2; + q_acc_4_3 = q_acc_4_3 + q_tmp_4_3; + q_acc_4_4 = q_acc_4_4 + q_tmp_4_4; + q_acc_4_5 = q_acc_4_5 + q_tmp_4_5; + q_acc_5_0 = q_acc_5_0 + q_tmp_5_0; + q_acc_5_1 = q_acc_5_1 + q_tmp_5_1; + q_acc_5_2 = q_acc_5_2 + q_tmp_5_2; + q_acc_5_3 = q_acc_5_3 + q_tmp_5_3; + q_acc_5_4 = q_acc_5_4 + q_tmp_5_4; + q_acc_5_5 = q_acc_5_5 + q_tmp_5_5; + } + const real_t elMatVec_0 = q_acc_0_0*src_dof_0 + q_acc_0_1*src_dof_1 + q_acc_0_2*src_dof_2 + q_acc_0_3*src_dof_3 + q_acc_0_4*src_dof_4 + q_acc_0_5*src_dof_5; + const real_t elMatVec_1 = q_acc_1_0*src_dof_0 + q_acc_1_1*src_dof_1 + q_acc_1_2*src_dof_2 + q_acc_1_3*src_dof_3 + q_acc_1_4*src_dof_4 + q_acc_1_5*src_dof_5; + const real_t elMatVec_2 = q_acc_2_0*src_dof_0 + q_acc_2_1*src_dof_1 + q_acc_2_2*src_dof_2 + q_acc_2_3*src_dof_3 + q_acc_2_4*src_dof_4 + q_acc_2_5*src_dof_5; + const real_t elMatVec_3 = q_acc_3_0*src_dof_0 + q_acc_3_1*src_dof_1 + q_acc_3_2*src_dof_2 + q_acc_3_3*src_dof_3 + q_acc_3_4*src_dof_4 + q_acc_3_5*src_dof_5; + const real_t elMatVec_4 = q_acc_4_0*src_dof_0 + q_acc_4_1*src_dof_1 + q_acc_4_2*src_dof_2 + q_acc_4_3*src_dof_3 + q_acc_4_4*src_dof_4 + q_acc_4_5*src_dof_5; + const real_t elMatVec_5 = q_acc_5_0*src_dof_0 + q_acc_5_1*src_dof_1 + q_acc_5_2*src_dof_2 + q_acc_5_3*src_dof_3 + q_acc_5_4*src_dof_4 + q_acc_5_5*src_dof_5; + _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1] = elMatVec_0 + _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatVec_1 + _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1] = elMatVec_2 + _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]; + _data_dstEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatVec_3 + _data_dstEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1] = elMatVec_4 + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]; + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))] = elMatVec_5 + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + } + } + } + } +} +} // namespace operatorgeneration + +} // namespace hyteg diff --git a/operators/advection/avx/P2ElementwiseAdvectionAnnulusMap_computeInverseDiagonalOperatorValues_P2ElementwiseAdvectionAnnulusMap_macro_2D.cpp b/operators/advection/avx/P2ElementwiseAdvectionAnnulusMap_computeInverseDiagonalOperatorValues_P2ElementwiseAdvectionAnnulusMap_macro_2D.cpp new file mode 100644 index 0000000000000000000000000000000000000000..cbfd3a92a612ab010deca9a0232f1d27fe0989e1 --- /dev/null +++ b/operators/advection/avx/P2ElementwiseAdvectionAnnulusMap_computeInverseDiagonalOperatorValues_P2ElementwiseAdvectionAnnulusMap_macro_2D.cpp @@ -0,0 +1,761 @@ +/* +* Copyright (c) 2017-2024 Nils Kohl, Daniel Bauer, Fabian Böhm. +* +* This file is part of HyTeG +* (see https://i10git.cs.fau.de/hyteg/hyteg). +* +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. +* +* You should have received a copy of the GNU General Public License +* along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +/* +* The entire file was generated with the HyTeG Operator Generator. +* +* Avoid modifying this file. If buggy, consider fixing the generator itself. +*/ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +#include "../P2ElementwiseAdvectionAnnulusMap.hpp" + +#define FUNC_PREFIX + +namespace hyteg { + +namespace operatorgeneration { + +void P2ElementwiseAdvectionAnnulusMap::computeInverseDiagonalOperatorValues_P2ElementwiseAdvectionAnnulusMap_macro_2D( real_t * RESTRICT _data_cpEdge, real_t * RESTRICT _data_cpVertex, real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_uxEdge, real_t * RESTRICT _data_uxVertex, real_t * RESTRICT _data_uyEdge, real_t * RESTRICT _data_uyVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const +{ + { + const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669}; + + const real_t _data_q_p_0 [] = {0.33333333333333331, 0.20000000000000001, 0.59999999999999998, 0.20000000000000001}; + + const real_t _data_q_p_1 [] = {0.33333333333333331, 0.59999999999999998, 0.20000000000000001, 0.20000000000000001}; + + const real_t tmp_coords_jac_0_GRAY = 1.0 / (micro_edges_per_macro_edge_float)*1.0; + const real_t p_affine_const_0_0_GRAY = macro_vertex_coord_id_0comp0; + const real_t p_affine_const_0_1_GRAY = macro_vertex_coord_id_0comp1; + const real_t p_affine_const_1_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0); + const real_t p_affine_const_1_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1); + const real_t p_affine_const_2_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0); + const real_t p_affine_const_2_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1); + const real_t jac_affine_0_0_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_1_0_GRAY; + const real_t jac_affine_0_1_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_2_0_GRAY; + const real_t jac_affine_1_0_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_1_1_GRAY; + const real_t jac_affine_1_1_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_2_1_GRAY; + const real_t tmp_coords_jac_1_GRAY = jac_affine_0_0_GRAY*jac_affine_1_1_GRAY - jac_affine_0_1_GRAY*jac_affine_1_0_GRAY; + const real_t tmp_coords_jac_2_GRAY = 1.0 / (tmp_coords_jac_1_GRAY); + const real_t jac_affine_inv_0_0_GRAY = jac_affine_1_1_GRAY*tmp_coords_jac_2_GRAY; + const real_t jac_affine_inv_0_1_GRAY = -jac_affine_0_1_GRAY*tmp_coords_jac_2_GRAY; + const real_t jac_affine_inv_1_0_GRAY = -jac_affine_1_0_GRAY*tmp_coords_jac_2_GRAY; + const real_t jac_affine_inv_1_1_GRAY = jac_affine_0_0_GRAY*tmp_coords_jac_2_GRAY; + const real_t abs_det_jac_affine_GRAY = abs(tmp_coords_jac_1_GRAY); + const real_t tmp_qloop_1 = -rayVertex_1 + thrVertex_1; + const real_t tmp_qloop_7 = rayVertex_0 - thrVertex_0; + const real_t tmp_qloop_8 = -tmp_qloop_7; + const real_t tmp_qloop_9 = 1.0 / (tmp_qloop_1*(-rayVertex_0 + refVertex_0) - tmp_qloop_8*(-rayVertex_1 + refVertex_1)); + const real_t tmp_qloop_10 = -radRayVertex + radRefVertex; + const real_t tmp_qloop_11 = tmp_qloop_10*tmp_qloop_9; + const real_t tmp_qloop_12 = tmp_qloop_11*1.0; + { + /* FaceType.GRAY */ + for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1) + { + for (int64_t ctr_0 = 0; ctr_0 < (int64_t)((-ctr_1 + micro_edges_per_macro_edge) / (4)) * (4); ctr_0 += 4) + { + + const int64_t phantom_ctr_0 = ctr_0; + real_t _data_float_loop_ctr_array_dim_0[4]; + _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0; + _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1; + _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2; + _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3; + real_t _data_float_loop_ctr_array_dim_1[4]; + _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1; + + const __m256d p_affine_0_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)); + const __m256d p_affine_0_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)); + const __m256d p_affine_1_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)); + const __m256d p_affine_1_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)); + const __m256d p_affine_2_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)); + const __m256d p_affine_2_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)); + const __m256d cp_dof_0 = _mm256_loadu_pd(& _data_cpVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]); + const __m256d cp_dof_1 = _mm256_loadu_pd(& _data_cpVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]); + const __m256d cp_dof_2 = _mm256_loadu_pd(& _data_cpVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]); + const __m256d cp_dof_3 = _mm256_loadu_pd(& _data_cpEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]); + const __m256d cp_dof_4 = _mm256_loadu_pd(& _data_cpEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]); + const __m256d cp_dof_5 = _mm256_loadu_pd(& _data_cpEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]); + const __m256d ux_dof_0 = _mm256_loadu_pd(& _data_uxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]); + const __m256d ux_dof_1 = _mm256_loadu_pd(& _data_uxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]); + const __m256d ux_dof_2 = _mm256_loadu_pd(& _data_uxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]); + const __m256d ux_dof_3 = _mm256_loadu_pd(& _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]); + const __m256d ux_dof_4 = _mm256_loadu_pd(& _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]); + const __m256d ux_dof_5 = _mm256_loadu_pd(& _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]); + const __m256d uy_dof_0 = _mm256_loadu_pd(& _data_uyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]); + const __m256d uy_dof_1 = _mm256_loadu_pd(& _data_uyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]); + const __m256d uy_dof_2 = _mm256_loadu_pd(& _data_uyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]); + const __m256d uy_dof_3 = _mm256_loadu_pd(& _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]); + const __m256d uy_dof_4 = _mm256_loadu_pd(& _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]); + const __m256d uy_dof_5 = _mm256_loadu_pd(& _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]); + __m256d q_acc_0_0 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_1 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_2 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_3 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_4_4 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_5_5 = _mm256_set_pd(0.0,0.0,0.0,0.0); + for (int64_t q = 0; q < 4; q += 1) + { + const __m256d tmp_qloop_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_1_0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_2_0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),p_affine_0_0); + const __m256d tmp_qloop_2 = _mm256_mul_pd(tmp_qloop_0,tmp_qloop_0); + const __m256d tmp_qloop_3 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_1_1),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_2_1),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),p_affine_0_1); + const __m256d tmp_qloop_4 = _mm256_mul_pd(tmp_qloop_3,tmp_qloop_3); + const __m256d tmp_qloop_5 = _mm256_add_pd(tmp_qloop_2,tmp_qloop_4); + const __m256d tmp_qloop_6 = _mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_sqrt_pd(tmp_qloop_5)); + const __m256d tmp_qloop_13 = _mm256_mul_pd(tmp_qloop_6,_mm256_set_pd(tmp_qloop_12,tmp_qloop_12,tmp_qloop_12,tmp_qloop_12)); + const __m256d tmp_qloop_14 = _mm256_mul_pd(tmp_qloop_13,_mm256_set_pd(tmp_qloop_1,tmp_qloop_1,tmp_qloop_1,tmp_qloop_1)); + const __m256d tmp_qloop_15 = _mm256_div_pd(_mm256_sqrt_pd(tmp_qloop_5),_mm256_mul_pd(tmp_qloop_5,tmp_qloop_5)); + const __m256d tmp_qloop_16 = _mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(rayVertex_0,rayVertex_0,rayVertex_0,rayVertex_0)),tmp_qloop_0),_mm256_set_pd(tmp_qloop_1,tmp_qloop_1,tmp_qloop_1,tmp_qloop_1)),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(rayVertex_1,rayVertex_1,rayVertex_1,rayVertex_1)),tmp_qloop_3),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_8,tmp_qloop_8,tmp_qloop_8,tmp_qloop_8))),_mm256_set_pd(tmp_qloop_11,tmp_qloop_11,tmp_qloop_11,tmp_qloop_11)),_mm256_set_pd(radRayVertex,radRayVertex,radRayVertex,radRayVertex)); + const __m256d tmp_qloop_17 = _mm256_mul_pd(tmp_qloop_15,tmp_qloop_16); + const __m256d tmp_qloop_18 = _mm256_mul_pd(tmp_qloop_17,_mm256_set_pd(1.0,1.0,1.0,1.0)); + const __m256d tmp_qloop_19 = _mm256_mul_pd(tmp_qloop_13,_mm256_set_pd(tmp_qloop_8,tmp_qloop_8,tmp_qloop_8,tmp_qloop_8)); + const __m256d tmp_qloop_20 = _mm256_mul_pd(tmp_qloop_0,tmp_qloop_18); + const __m256d tmp_qloop_23 = _mm256_mul_pd(tmp_qloop_15,_mm256_set_pd(tmp_qloop_12,tmp_qloop_12,tmp_qloop_12,tmp_qloop_12)); + const __m256d tmp_qloop_24 = _mm256_mul_pd(tmp_qloop_23,_mm256_set_pd(tmp_qloop_1,tmp_qloop_1,tmp_qloop_1,tmp_qloop_1)); + const __m256d tmp_qloop_25 = _mm256_mul_pd(tmp_qloop_2,tmp_qloop_24); + const __m256d tmp_qloop_26 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_16,_mm256_div_pd(_mm256_sqrt_pd(tmp_qloop_5),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_5,tmp_qloop_5),tmp_qloop_5))),_mm256_set_pd(3.0,3.0,3.0,3.0)); + const __m256d tmp_qloop_27 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_0,tmp_qloop_26),tmp_qloop_4); + const __m256d tmp_qloop_28 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_27,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_24,tmp_qloop_4)); + const __m256d tmp_qloop_29 = _mm256_mul_pd(tmp_qloop_23,_mm256_set_pd(tmp_qloop_7,tmp_qloop_7,tmp_qloop_7,tmp_qloop_7)); + const __m256d tmp_qloop_30 = _mm256_mul_pd(tmp_qloop_0,tmp_qloop_3); + const __m256d tmp_qloop_31 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_30); + const __m256d tmp_qloop_32 = _mm256_mul_pd(tmp_qloop_17,_mm256_set_pd(2.0,2.0,2.0,2.0)); + const __m256d tmp_qloop_33 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_2,tmp_qloop_26),tmp_qloop_3); + const __m256d tmp_qloop_34 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_33,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_18,tmp_qloop_3)); + const __m256d tmp_qloop_35 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_30); + const __m256d tmp_qloop_36 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_23,tmp_qloop_30),_mm256_set_pd(tmp_qloop_8,tmp_qloop_8,tmp_qloop_8,tmp_qloop_8)); + const __m256d tmp_qloop_37 = _mm256_mul_pd(_mm256_set_pd(4.0,4.0,4.0,4.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])); + const __m256d tmp_qloop_38 = _mm256_mul_pd(tmp_qloop_37,_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])); + const __m256d tmp_qloop_39 = _mm256_mul_pd(_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])); + const __m256d tmp_qloop_40 = _mm256_mul_pd(tmp_qloop_39,_mm256_set_pd(2.0,2.0,2.0,2.0)); + const __m256d tmp_qloop_41 = _mm256_mul_pd(_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])); + const __m256d tmp_qloop_42 = _mm256_mul_pd(tmp_qloop_41,_mm256_set_pd(2.0,2.0,2.0,2.0)); + const __m256d tmp_qloop_43 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),tmp_qloop_38),tmp_qloop_40),tmp_qloop_42); + const __m256d tmp_qloop_44 = _mm256_mul_pd(_mm256_set_pd(4.0,4.0,4.0,4.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])); + const __m256d tmp_qloop_45 = _mm256_add_pd(_mm256_add_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),tmp_qloop_37),tmp_qloop_44); + const __m256d tmp_qloop_46 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_45,_mm256_set_pd(jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY)),_mm256_mul_pd(tmp_qloop_45,_mm256_set_pd(jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY))); + const __m256d tmp_qloop_47 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_45,_mm256_set_pd(jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY)),_mm256_mul_pd(tmp_qloop_45,_mm256_set_pd(jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY))); + const __m256d tmp_qloop_48 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),tmp_qloop_40); + const __m256d tmp_qloop_49 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])),tmp_qloop_42); + const __m256d tmp_qloop_50 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_41,_mm256_set_pd(-4.0,-4.0,-4.0,-4.0)),_mm256_mul_pd(tmp_qloop_38,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_44); + const __m256d tmp_qloop_51 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_39,_mm256_set_pd(-4.0,-4.0,-4.0,-4.0)),_mm256_mul_pd(tmp_qloop_38,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_37); + const __m256d tmp_qloop_52 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_38,ux_dof_3),_mm256_mul_pd(tmp_qloop_43,ux_dof_0)),_mm256_mul_pd(tmp_qloop_48,ux_dof_1)),_mm256_mul_pd(tmp_qloop_49,ux_dof_2)),_mm256_mul_pd(tmp_qloop_50,ux_dof_4)),_mm256_mul_pd(tmp_qloop_51,ux_dof_5)); + const __m256d tmp_qloop_53 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_38,uy_dof_3),_mm256_mul_pd(tmp_qloop_43,uy_dof_0)),_mm256_mul_pd(tmp_qloop_48,uy_dof_1)),_mm256_mul_pd(tmp_qloop_49,uy_dof_2)),_mm256_mul_pd(tmp_qloop_50,uy_dof_4)),_mm256_mul_pd(tmp_qloop_51,uy_dof_5)); + const __m256d tmp_qloop_55 = _mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),tmp_qloop_37); + const __m256d tmp_qloop_56 = _mm256_mul_pd(tmp_qloop_55,_mm256_set_pd(jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY)); + const __m256d tmp_qloop_57 = _mm256_mul_pd(tmp_qloop_55,_mm256_set_pd(jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY)); + const __m256d tmp_qloop_58 = _mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),tmp_qloop_44); + const __m256d tmp_qloop_59 = _mm256_mul_pd(tmp_qloop_58,_mm256_set_pd(jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY)); + const __m256d tmp_qloop_60 = _mm256_mul_pd(tmp_qloop_58,_mm256_set_pd(jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY)); + const __m256d tmp_qloop_61 = _mm256_mul_pd(tmp_qloop_37,_mm256_set_pd(jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY)); + const __m256d tmp_qloop_62 = _mm256_mul_pd(tmp_qloop_44,_mm256_set_pd(jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY)); + const __m256d tmp_qloop_63 = _mm256_add_pd(tmp_qloop_61,tmp_qloop_62); + const __m256d tmp_qloop_64 = _mm256_mul_pd(tmp_qloop_37,_mm256_set_pd(jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY)); + const __m256d tmp_qloop_65 = _mm256_mul_pd(tmp_qloop_44,_mm256_set_pd(jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY)); + const __m256d tmp_qloop_66 = _mm256_add_pd(tmp_qloop_64,tmp_qloop_65); + const __m256d tmp_qloop_67 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-8.0,-8.0,-8.0,-8.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])),_mm256_mul_pd(tmp_qloop_37,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_set_pd(4.0,4.0,4.0,4.0)); + const __m256d tmp_qloop_68 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_62,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_67,_mm256_set_pd(jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY))); + const __m256d tmp_qloop_69 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_65,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_67,_mm256_set_pd(jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY))); + const __m256d tmp_qloop_70 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-8.0,-8.0,-8.0,-8.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(tmp_qloop_44,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_set_pd(4.0,4.0,4.0,4.0)); + const __m256d tmp_qloop_71 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_61,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_70,_mm256_set_pd(jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY))); + const __m256d tmp_qloop_72 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_64,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_70,_mm256_set_pd(jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY))); + const __m256d jac_blending_0_0 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_0,tmp_qloop_14),_mm256_mul_pd(tmp_qloop_18,tmp_qloop_4)); + const __m256d jac_blending_0_1 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_0,tmp_qloop_19),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_0,tmp_qloop_15),tmp_qloop_16),tmp_qloop_3),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))); + const __m256d jac_blending_1_0 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_14,tmp_qloop_3),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_20,tmp_qloop_3),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))); + const __m256d jac_blending_1_1 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_19,tmp_qloop_3),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_15,tmp_qloop_16),tmp_qloop_2),_mm256_set_pd(1.0,1.0,1.0,1.0))); + const __m256d tmp_qloop_21 = _mm256_add_pd(_mm256_mul_pd(jac_blending_0_0,jac_blending_1_1),_mm256_mul_pd(_mm256_mul_pd(jac_blending_0_1,jac_blending_1_0),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))); + const __m256d tmp_qloop_22 = _mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),tmp_qloop_21); + const __m256d abs_det_jac_blending = tmp_qloop_21; + const __m256d tmp_qloop_54 = _mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(abs_det_jac_blending,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(cp_dof_0,tmp_qloop_43),_mm256_mul_pd(cp_dof_1,tmp_qloop_48)),_mm256_mul_pd(cp_dof_2,tmp_qloop_49)),_mm256_mul_pd(cp_dof_3,tmp_qloop_38)),_mm256_mul_pd(cp_dof_4,tmp_qloop_50)),_mm256_mul_pd(cp_dof_5,tmp_qloop_51))),_mm256_set_pd(_data_q_w[q],_data_q_w[q],_data_q_w[q],_data_q_w[q])),_mm256_set_pd(abs_det_jac_affine_GRAY,abs_det_jac_affine_GRAY,abs_det_jac_affine_GRAY,abs_det_jac_affine_GRAY)); + const __m256d jac_blending_inv_0_0 = _mm256_mul_pd(jac_blending_1_1,tmp_qloop_22); + const __m256d jac_blending_inv_0_1 = _mm256_mul_pd(_mm256_mul_pd(jac_blending_0_1,tmp_qloop_22),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)); + const __m256d jac_blending_inv_1_0 = _mm256_mul_pd(_mm256_mul_pd(jac_blending_1_0,tmp_qloop_22),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)); + const __m256d jac_blending_inv_1_1 = _mm256_mul_pd(jac_blending_0_0,tmp_qloop_22); + const __m256d hessian_blending_0_0_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_25,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),tmp_qloop_14),tmp_qloop_28); + const __m256d hessian_blending_1_0_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_31,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_29,tmp_qloop_4)),_mm256_mul_pd(tmp_qloop_3,tmp_qloop_32)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_26,_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_3,tmp_qloop_3),tmp_qloop_3)),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))); + const __m256d hessian_blending_0_0_1 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_34,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_15,tmp_qloop_30),_mm256_set_pd(-2.0,-2.0,-2.0,-2.0)),_mm256_set_pd(tmp_qloop_1,tmp_qloop_1,tmp_qloop_1,tmp_qloop_1)),_mm256_set_pd(tmp_qloop_11,tmp_qloop_11,tmp_qloop_11,tmp_qloop_11))); + const __m256d hessian_blending_1_0_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_20,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_28,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(tmp_qloop_35,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_6,_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_1,tmp_qloop_1,tmp_qloop_1,tmp_qloop_1)),_mm256_set_pd(tmp_qloop_10,tmp_qloop_10,tmp_qloop_10,tmp_qloop_10)),_mm256_set_pd(tmp_qloop_9,tmp_qloop_9,tmp_qloop_9,tmp_qloop_9))); + const __m256d hessian_blending_0_1_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_19,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_31,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(tmp_qloop_34,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_15,tmp_qloop_2),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_10,tmp_qloop_10,tmp_qloop_10,tmp_qloop_10)),_mm256_set_pd(tmp_qloop_8,tmp_qloop_8,tmp_qloop_8,tmp_qloop_8)),_mm256_set_pd(tmp_qloop_9,tmp_qloop_9,tmp_qloop_9,tmp_qloop_9))); + const __m256d hessian_blending_1_1_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_20,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_35,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_27),tmp_qloop_36); + const __m256d hessian_blending_0_1_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_0,tmp_qloop_32),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_0,tmp_qloop_0),tmp_qloop_0),tmp_qloop_26),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_25),tmp_qloop_36); + const __m256d hessian_blending_1_1_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_19,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_33,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_15,tmp_qloop_2),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_10,tmp_qloop_10,tmp_qloop_10,tmp_qloop_10)),_mm256_set_pd(tmp_qloop_7,tmp_qloop_7,tmp_qloop_7,tmp_qloop_7)),_mm256_set_pd(tmp_qloop_9,tmp_qloop_9,tmp_qloop_9,tmp_qloop_9))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_15,tmp_qloop_4),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_10,tmp_qloop_10,tmp_qloop_10,tmp_qloop_10)),_mm256_set_pd(tmp_qloop_8,tmp_qloop_8,tmp_qloop_8,tmp_qloop_8)),_mm256_set_pd(tmp_qloop_9,tmp_qloop_9,tmp_qloop_9,tmp_qloop_9))); + const __m256d q_tmp_0_0 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_43,tmp_qloop_54),_mm256_add_pd(_mm256_mul_pd(tmp_qloop_52,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,tmp_qloop_46),_mm256_mul_pd(jac_blending_inv_1_0,tmp_qloop_47))),_mm256_mul_pd(tmp_qloop_53,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,tmp_qloop_46),_mm256_mul_pd(jac_blending_inv_1_1,tmp_qloop_47))))); + const __m256d q_tmp_1_1 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_48,tmp_qloop_54),_mm256_add_pd(_mm256_mul_pd(tmp_qloop_52,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,tmp_qloop_56),_mm256_mul_pd(jac_blending_inv_1_0,tmp_qloop_57))),_mm256_mul_pd(tmp_qloop_53,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,tmp_qloop_56),_mm256_mul_pd(jac_blending_inv_1_1,tmp_qloop_57))))); + const __m256d q_tmp_2_2 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_49,tmp_qloop_54),_mm256_add_pd(_mm256_mul_pd(tmp_qloop_52,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,tmp_qloop_59),_mm256_mul_pd(jac_blending_inv_1_0,tmp_qloop_60))),_mm256_mul_pd(tmp_qloop_53,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,tmp_qloop_59),_mm256_mul_pd(jac_blending_inv_1_1,tmp_qloop_60))))); + const __m256d q_tmp_3_3 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_38,tmp_qloop_54),_mm256_add_pd(_mm256_mul_pd(tmp_qloop_52,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,tmp_qloop_63),_mm256_mul_pd(jac_blending_inv_1_0,tmp_qloop_66))),_mm256_mul_pd(tmp_qloop_53,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,tmp_qloop_63),_mm256_mul_pd(jac_blending_inv_1_1,tmp_qloop_66))))); + const __m256d q_tmp_4_4 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_50,tmp_qloop_54),_mm256_add_pd(_mm256_mul_pd(tmp_qloop_52,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,tmp_qloop_68),_mm256_mul_pd(jac_blending_inv_1_0,tmp_qloop_69))),_mm256_mul_pd(tmp_qloop_53,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,tmp_qloop_68),_mm256_mul_pd(jac_blending_inv_1_1,tmp_qloop_69))))); + const __m256d q_tmp_5_5 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_51,tmp_qloop_54),_mm256_add_pd(_mm256_mul_pd(tmp_qloop_52,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,tmp_qloop_71),_mm256_mul_pd(jac_blending_inv_1_0,tmp_qloop_72))),_mm256_mul_pd(tmp_qloop_53,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,tmp_qloop_71),_mm256_mul_pd(jac_blending_inv_1_1,tmp_qloop_72))))); + q_acc_0_0 = _mm256_add_pd(q_acc_0_0,q_tmp_0_0); + q_acc_1_1 = _mm256_add_pd(q_acc_1_1,q_tmp_1_1); + q_acc_2_2 = _mm256_add_pd(q_acc_2_2,q_tmp_2_2); + q_acc_3_3 = _mm256_add_pd(q_acc_3_3,q_tmp_3_3); + q_acc_4_4 = _mm256_add_pd(q_acc_4_4,q_tmp_4_4); + q_acc_5_5 = _mm256_add_pd(q_acc_5_5,q_tmp_5_5); + } + const __m256d elMatDiag_0 = q_acc_0_0; + const __m256d elMatDiag_1 = q_acc_1_1; + const __m256d elMatDiag_2 = q_acc_2_2; + const __m256d elMatDiag_3 = q_acc_3_3; + const __m256d elMatDiag_4 = q_acc_4_4; + const __m256d elMatDiag_5 = q_acc_5_5; + _mm256_storeu_pd(&_data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))],_mm256_add_pd(elMatDiag_0,_mm256_loadu_pd(& _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]))); + _mm256_storeu_pd(&_data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1],_mm256_add_pd(elMatDiag_1,_mm256_loadu_pd(& _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]))); + _mm256_storeu_pd(&_data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))],_mm256_add_pd(elMatDiag_2,_mm256_loadu_pd(& _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]))); + _mm256_storeu_pd(&_data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))],_mm256_add_pd(elMatDiag_3,_mm256_loadu_pd(& _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]))); + _mm256_storeu_pd(&_data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))],_mm256_add_pd(elMatDiag_4,_mm256_loadu_pd(& _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]))); + _mm256_storeu_pd(&_data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))],_mm256_add_pd(elMatDiag_5,_mm256_loadu_pd(& _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]))); + } + for (int64_t ctr_0 = (int64_t)((-ctr_1 + micro_edges_per_macro_edge) / (4)) * (4); ctr_0 < -ctr_1 + micro_edges_per_macro_edge; ctr_0 += 1) + { + + const int64_t phantom_ctr_0 = ctr_0; + real_t _data_float_loop_ctr_array_dim_0[4]; + _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0; + _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1; + _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2; + _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3; + real_t _data_float_loop_ctr_array_dim_1[4]; + _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1; + + const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; + const real_t cp_dof_0 = _data_cpVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]; + const real_t cp_dof_1 = _data_cpVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + const real_t cp_dof_2 = _data_cpVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t cp_dof_3 = _data_cpEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t cp_dof_4 = _data_cpEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t cp_dof_5 = _data_cpEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]; + const real_t ux_dof_0 = _data_uxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]; + const real_t ux_dof_1 = _data_uxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + const real_t ux_dof_2 = _data_uxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t ux_dof_3 = _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t ux_dof_4 = _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t ux_dof_5 = _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]; + const real_t uy_dof_0 = _data_uyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]; + const real_t uy_dof_1 = _data_uyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + const real_t uy_dof_2 = _data_uyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t uy_dof_3 = _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t uy_dof_4 = _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t uy_dof_5 = _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]; + real_t q_acc_0_0 = 0.0; + real_t q_acc_1_1 = 0.0; + real_t q_acc_2_2 = 0.0; + real_t q_acc_3_3 = 0.0; + real_t q_acc_4_4 = 0.0; + real_t q_acc_5_5 = 0.0; + for (int64_t q = 0; q < 4; q += 1) + { + const real_t tmp_qloop_0 = p_affine_0_0 + (-p_affine_0_0 + p_affine_1_0)*_data_q_p_0[q] + (-p_affine_0_0 + p_affine_2_0)*_data_q_p_1[q]; + const real_t tmp_qloop_2 = (tmp_qloop_0*tmp_qloop_0); + const real_t tmp_qloop_3 = p_affine_0_1 + (-p_affine_0_1 + p_affine_1_1)*_data_q_p_0[q] + (-p_affine_0_1 + p_affine_2_1)*_data_q_p_1[q]; + const real_t tmp_qloop_4 = (tmp_qloop_3*tmp_qloop_3); + const real_t tmp_qloop_5 = tmp_qloop_2 + tmp_qloop_4; + const real_t tmp_qloop_6 = pow(tmp_qloop_5, -0.50000000000000000); + const real_t tmp_qloop_13 = tmp_qloop_12*tmp_qloop_6; + const real_t tmp_qloop_14 = tmp_qloop_1*tmp_qloop_13; + const real_t tmp_qloop_15 = pow(tmp_qloop_5, -1.5000000000000000); + const real_t tmp_qloop_16 = radRayVertex + tmp_qloop_11*(tmp_qloop_1*(-rayVertex_0 + tmp_qloop_0) - tmp_qloop_8*(-rayVertex_1 + tmp_qloop_3)); + const real_t tmp_qloop_17 = tmp_qloop_15*tmp_qloop_16; + const real_t tmp_qloop_18 = tmp_qloop_17*1.0; + const real_t tmp_qloop_19 = tmp_qloop_13*tmp_qloop_8; + const real_t tmp_qloop_20 = tmp_qloop_0*tmp_qloop_18; + const real_t tmp_qloop_23 = tmp_qloop_12*tmp_qloop_15; + const real_t tmp_qloop_24 = tmp_qloop_1*tmp_qloop_23; + const real_t tmp_qloop_25 = tmp_qloop_2*tmp_qloop_24; + const real_t tmp_qloop_26 = tmp_qloop_16*pow(tmp_qloop_5, -2.5000000000000000)*3.0; + const real_t tmp_qloop_27 = tmp_qloop_0*tmp_qloop_26*tmp_qloop_4; + const real_t tmp_qloop_28 = tmp_qloop_24*tmp_qloop_4 - tmp_qloop_27; + const real_t tmp_qloop_29 = tmp_qloop_23*tmp_qloop_7; + const real_t tmp_qloop_30 = tmp_qloop_0*tmp_qloop_3; + const real_t tmp_qloop_31 = tmp_qloop_24*tmp_qloop_30; + const real_t tmp_qloop_32 = tmp_qloop_17*2.0; + const real_t tmp_qloop_33 = tmp_qloop_2*tmp_qloop_26*tmp_qloop_3; + const real_t tmp_qloop_34 = tmp_qloop_18*tmp_qloop_3 - tmp_qloop_33; + const real_t tmp_qloop_35 = tmp_qloop_29*tmp_qloop_30; + const real_t tmp_qloop_36 = tmp_qloop_23*tmp_qloop_30*tmp_qloop_8; + const real_t tmp_qloop_37 = 4.0*_data_q_p_0[q]; + const real_t tmp_qloop_38 = tmp_qloop_37*_data_q_p_1[q]; + const real_t tmp_qloop_39 = (_data_q_p_0[q]*_data_q_p_0[q]); + const real_t tmp_qloop_40 = tmp_qloop_39*2.0; + const real_t tmp_qloop_41 = (_data_q_p_1[q]*_data_q_p_1[q]); + const real_t tmp_qloop_42 = tmp_qloop_41*2.0; + const real_t tmp_qloop_43 = tmp_qloop_38 + tmp_qloop_40 + tmp_qloop_42 - 3.0*_data_q_p_0[q] - 3.0*_data_q_p_1[q] + 1.0; + const real_t tmp_qloop_44 = 4.0*_data_q_p_1[q]; + const real_t tmp_qloop_45 = tmp_qloop_37 + tmp_qloop_44 - 3.0; + const real_t tmp_qloop_46 = jac_affine_inv_0_0_GRAY*tmp_qloop_45 + jac_affine_inv_1_0_GRAY*tmp_qloop_45; + const real_t tmp_qloop_47 = jac_affine_inv_0_1_GRAY*tmp_qloop_45 + jac_affine_inv_1_1_GRAY*tmp_qloop_45; + const real_t tmp_qloop_48 = tmp_qloop_40 - _data_q_p_0[q]; + const real_t tmp_qloop_49 = tmp_qloop_42 - _data_q_p_1[q]; + const real_t tmp_qloop_50 = -tmp_qloop_38 + tmp_qloop_41*-4.0 + tmp_qloop_44; + const real_t tmp_qloop_51 = tmp_qloop_37 - tmp_qloop_38 + tmp_qloop_39*-4.0; + const real_t tmp_qloop_52 = tmp_qloop_38*ux_dof_3 + tmp_qloop_43*ux_dof_0 + tmp_qloop_48*ux_dof_1 + tmp_qloop_49*ux_dof_2 + tmp_qloop_50*ux_dof_4 + tmp_qloop_51*ux_dof_5; + const real_t tmp_qloop_53 = tmp_qloop_38*uy_dof_3 + tmp_qloop_43*uy_dof_0 + tmp_qloop_48*uy_dof_1 + tmp_qloop_49*uy_dof_2 + tmp_qloop_50*uy_dof_4 + tmp_qloop_51*uy_dof_5; + const real_t tmp_qloop_55 = tmp_qloop_37 - 1.0; + const real_t tmp_qloop_56 = jac_affine_inv_0_0_GRAY*tmp_qloop_55; + const real_t tmp_qloop_57 = jac_affine_inv_0_1_GRAY*tmp_qloop_55; + const real_t tmp_qloop_58 = tmp_qloop_44 - 1.0; + const real_t tmp_qloop_59 = jac_affine_inv_1_0_GRAY*tmp_qloop_58; + const real_t tmp_qloop_60 = jac_affine_inv_1_1_GRAY*tmp_qloop_58; + const real_t tmp_qloop_61 = jac_affine_inv_1_0_GRAY*tmp_qloop_37; + const real_t tmp_qloop_62 = jac_affine_inv_0_0_GRAY*tmp_qloop_44; + const real_t tmp_qloop_63 = tmp_qloop_61 + tmp_qloop_62; + const real_t tmp_qloop_64 = jac_affine_inv_1_1_GRAY*tmp_qloop_37; + const real_t tmp_qloop_65 = jac_affine_inv_0_1_GRAY*tmp_qloop_44; + const real_t tmp_qloop_66 = tmp_qloop_64 + tmp_qloop_65; + const real_t tmp_qloop_67 = -tmp_qloop_37 - 8.0*_data_q_p_1[q] + 4.0; + const real_t tmp_qloop_68 = jac_affine_inv_1_0_GRAY*tmp_qloop_67 - tmp_qloop_62; + const real_t tmp_qloop_69 = jac_affine_inv_1_1_GRAY*tmp_qloop_67 - tmp_qloop_65; + const real_t tmp_qloop_70 = -tmp_qloop_44 - 8.0*_data_q_p_0[q] + 4.0; + const real_t tmp_qloop_71 = jac_affine_inv_0_0_GRAY*tmp_qloop_70 - tmp_qloop_61; + const real_t tmp_qloop_72 = jac_affine_inv_0_1_GRAY*tmp_qloop_70 - tmp_qloop_64; + const real_t jac_blending_0_0 = tmp_qloop_0*tmp_qloop_14 + tmp_qloop_18*tmp_qloop_4; + const real_t jac_blending_0_1 = -tmp_qloop_0*tmp_qloop_15*tmp_qloop_16*tmp_qloop_3 - tmp_qloop_0*tmp_qloop_19; + const real_t jac_blending_1_0 = tmp_qloop_14*tmp_qloop_3 - tmp_qloop_20*tmp_qloop_3; + const real_t jac_blending_1_1 = tmp_qloop_15*tmp_qloop_16*tmp_qloop_2*1.0 - tmp_qloop_19*tmp_qloop_3; + const real_t tmp_qloop_21 = jac_blending_0_0*jac_blending_1_1 - jac_blending_0_1*jac_blending_1_0; + const real_t tmp_qloop_22 = 1.0 / (tmp_qloop_21); + const real_t abs_det_jac_blending = tmp_qloop_21; + const real_t tmp_qloop_54 = abs_det_jac_affine_GRAY*abs_det_jac_blending*(cp_dof_0*tmp_qloop_43 + cp_dof_1*tmp_qloop_48 + cp_dof_2*tmp_qloop_49 + cp_dof_3*tmp_qloop_38 + cp_dof_4*tmp_qloop_50 + cp_dof_5*tmp_qloop_51)*_data_q_w[q]; + const real_t jac_blending_inv_0_0 = jac_blending_1_1*tmp_qloop_22; + const real_t jac_blending_inv_0_1 = -jac_blending_0_1*tmp_qloop_22; + const real_t jac_blending_inv_1_0 = -jac_blending_1_0*tmp_qloop_22; + const real_t jac_blending_inv_1_1 = jac_blending_0_0*tmp_qloop_22; + const real_t hessian_blending_0_0_0 = tmp_qloop_14 - tmp_qloop_25 + tmp_qloop_28; + const real_t hessian_blending_1_0_0 = -tmp_qloop_26*(tmp_qloop_3*tmp_qloop_3*tmp_qloop_3) + tmp_qloop_29*tmp_qloop_4 + tmp_qloop_3*tmp_qloop_32 - tmp_qloop_31; + const real_t hessian_blending_0_0_1 = tmp_qloop_1*tmp_qloop_11*tmp_qloop_15*tmp_qloop_30*-2.0 - tmp_qloop_34; + const real_t hessian_blending_1_0_1 = tmp_qloop_1*tmp_qloop_10*tmp_qloop_6*tmp_qloop_9*1.0 - tmp_qloop_20 - tmp_qloop_28 - tmp_qloop_35; + const real_t hessian_blending_0_1_0 = tmp_qloop_10*tmp_qloop_15*tmp_qloop_2*tmp_qloop_8*tmp_qloop_9*1.0 - tmp_qloop_19 - tmp_qloop_31 - tmp_qloop_34; + const real_t hessian_blending_1_1_0 = -tmp_qloop_20 + tmp_qloop_27 - tmp_qloop_35 + tmp_qloop_36; + const real_t hessian_blending_0_1_1 = tmp_qloop_0*tmp_qloop_32 - (tmp_qloop_0*tmp_qloop_0*tmp_qloop_0)*tmp_qloop_26 + tmp_qloop_25 + tmp_qloop_36; + const real_t hessian_blending_1_1_1 = tmp_qloop_10*tmp_qloop_15*tmp_qloop_2*tmp_qloop_7*tmp_qloop_9*1.0 + tmp_qloop_10*tmp_qloop_15*tmp_qloop_4*tmp_qloop_8*tmp_qloop_9*1.0 - tmp_qloop_19 - tmp_qloop_33; + const real_t q_tmp_0_0 = tmp_qloop_43*tmp_qloop_54*(tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_46 + jac_blending_inv_1_0*tmp_qloop_47) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_46 + jac_blending_inv_1_1*tmp_qloop_47)); + const real_t q_tmp_1_1 = tmp_qloop_48*tmp_qloop_54*(tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_56 + jac_blending_inv_1_0*tmp_qloop_57) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_56 + jac_blending_inv_1_1*tmp_qloop_57)); + const real_t q_tmp_2_2 = tmp_qloop_49*tmp_qloop_54*(tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_59 + jac_blending_inv_1_0*tmp_qloop_60) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_59 + jac_blending_inv_1_1*tmp_qloop_60)); + const real_t q_tmp_3_3 = tmp_qloop_38*tmp_qloop_54*(tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_63 + jac_blending_inv_1_0*tmp_qloop_66) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_63 + jac_blending_inv_1_1*tmp_qloop_66)); + const real_t q_tmp_4_4 = tmp_qloop_50*tmp_qloop_54*(tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_68 + jac_blending_inv_1_0*tmp_qloop_69) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_68 + jac_blending_inv_1_1*tmp_qloop_69)); + const real_t q_tmp_5_5 = tmp_qloop_51*tmp_qloop_54*(tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_71 + jac_blending_inv_1_0*tmp_qloop_72) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_71 + jac_blending_inv_1_1*tmp_qloop_72)); + q_acc_0_0 = q_acc_0_0 + q_tmp_0_0; + q_acc_1_1 = q_acc_1_1 + q_tmp_1_1; + q_acc_2_2 = q_acc_2_2 + q_tmp_2_2; + q_acc_3_3 = q_acc_3_3 + q_tmp_3_3; + q_acc_4_4 = q_acc_4_4 + q_tmp_4_4; + q_acc_5_5 = q_acc_5_5 + q_tmp_5_5; + } + const real_t elMatDiag_0 = q_acc_0_0; + const real_t elMatDiag_1 = q_acc_1_1; + const real_t elMatDiag_2 = q_acc_2_2; + const real_t elMatDiag_3 = q_acc_3_3; + const real_t elMatDiag_4 = q_acc_4_4; + const real_t elMatDiag_5 = q_acc_5_5; + _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))] = elMatDiag_0 + _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]; + _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1] = elMatDiag_1 + _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatDiag_2 + _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))] = elMatDiag_3 + _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))] = elMatDiag_4 + _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))] = elMatDiag_5 + _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]; + } + } + } + const real_t tmp_coords_jac_0_BLUE = 1.0 / (micro_edges_per_macro_edge_float)*1.0; + const real_t tmp_coords_jac_1_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0); + const real_t tmp_coords_jac_2_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1); + const real_t tmp_coords_jac_3_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0); + const real_t tmp_coords_jac_4_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1); + const real_t p_affine_const_0_0_BLUE = tmp_coords_jac_1_BLUE; + const real_t p_affine_const_0_1_BLUE = tmp_coords_jac_2_BLUE; + const real_t p_affine_const_1_0_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_3_BLUE; + const real_t p_affine_const_1_1_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_4_BLUE; + const real_t p_affine_const_2_0_BLUE = tmp_coords_jac_1_BLUE + tmp_coords_jac_3_BLUE; + const real_t p_affine_const_2_1_BLUE = tmp_coords_jac_2_BLUE + tmp_coords_jac_4_BLUE; + const real_t jac_affine_0_0_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_1_0_BLUE; + const real_t jac_affine_0_1_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_2_0_BLUE; + const real_t jac_affine_1_0_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_1_1_BLUE; + const real_t jac_affine_1_1_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_2_1_BLUE; + const real_t tmp_coords_jac_5_BLUE = jac_affine_0_0_BLUE*jac_affine_1_1_BLUE - jac_affine_0_1_BLUE*jac_affine_1_0_BLUE; + const real_t tmp_coords_jac_6_BLUE = 1.0 / (tmp_coords_jac_5_BLUE); + const real_t jac_affine_inv_0_0_BLUE = jac_affine_1_1_BLUE*tmp_coords_jac_6_BLUE; + const real_t jac_affine_inv_0_1_BLUE = -jac_affine_0_1_BLUE*tmp_coords_jac_6_BLUE; + const real_t jac_affine_inv_1_0_BLUE = -jac_affine_1_0_BLUE*tmp_coords_jac_6_BLUE; + const real_t jac_affine_inv_1_1_BLUE = jac_affine_0_0_BLUE*tmp_coords_jac_6_BLUE; + const real_t abs_det_jac_affine_BLUE = abs(tmp_coords_jac_5_BLUE); + { + /* FaceType.BLUE */ + for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1) + { + for (int64_t ctr_0 = 0; ctr_0 < (int64_t)((-ctr_1 + micro_edges_per_macro_edge - 1) / (4)) * (4); ctr_0 += 4) + { + + const int64_t phantom_ctr_0 = ctr_0; + real_t _data_float_loop_ctr_array_dim_0[4]; + _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0; + _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1; + _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2; + _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3; + real_t _data_float_loop_ctr_array_dim_1[4]; + _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1; + + const __m256d p_affine_0_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)); + const __m256d p_affine_0_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)); + const __m256d p_affine_1_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)); + const __m256d p_affine_1_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)); + const __m256d p_affine_2_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)); + const __m256d p_affine_2_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)); + const __m256d cp_dof_0 = _mm256_loadu_pd(& _data_cpVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]); + const __m256d cp_dof_1 = _mm256_loadu_pd(& _data_cpVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]); + const __m256d cp_dof_2 = _mm256_loadu_pd(& _data_cpVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]); + const __m256d cp_dof_3 = _mm256_loadu_pd(& _data_cpEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]); + const __m256d cp_dof_4 = _mm256_loadu_pd(& _data_cpEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]); + const __m256d cp_dof_5 = _mm256_loadu_pd(& _data_cpEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]); + const __m256d ux_dof_0 = _mm256_loadu_pd(& _data_uxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]); + const __m256d ux_dof_1 = _mm256_loadu_pd(& _data_uxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]); + const __m256d ux_dof_2 = _mm256_loadu_pd(& _data_uxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]); + const __m256d ux_dof_3 = _mm256_loadu_pd(& _data_uxEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]); + const __m256d ux_dof_4 = _mm256_loadu_pd(& _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]); + const __m256d ux_dof_5 = _mm256_loadu_pd(& _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]); + const __m256d uy_dof_0 = _mm256_loadu_pd(& _data_uyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]); + const __m256d uy_dof_1 = _mm256_loadu_pd(& _data_uyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]); + const __m256d uy_dof_2 = _mm256_loadu_pd(& _data_uyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]); + const __m256d uy_dof_3 = _mm256_loadu_pd(& _data_uyEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]); + const __m256d uy_dof_4 = _mm256_loadu_pd(& _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]); + const __m256d uy_dof_5 = _mm256_loadu_pd(& _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]); + __m256d q_acc_0_0 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_1 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_2 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_3 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_4_4 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_5_5 = _mm256_set_pd(0.0,0.0,0.0,0.0); + for (int64_t q = 0; q < 4; q += 1) + { + const __m256d tmp_qloop_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_1_0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_2_0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),p_affine_0_0); + const __m256d tmp_qloop_2 = _mm256_mul_pd(tmp_qloop_0,tmp_qloop_0); + const __m256d tmp_qloop_3 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_1_1),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_2_1),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),p_affine_0_1); + const __m256d tmp_qloop_4 = _mm256_mul_pd(tmp_qloop_3,tmp_qloop_3); + const __m256d tmp_qloop_5 = _mm256_add_pd(tmp_qloop_2,tmp_qloop_4); + const __m256d tmp_qloop_6 = _mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_sqrt_pd(tmp_qloop_5)); + const __m256d tmp_qloop_13 = _mm256_mul_pd(tmp_qloop_6,_mm256_set_pd(tmp_qloop_12,tmp_qloop_12,tmp_qloop_12,tmp_qloop_12)); + const __m256d tmp_qloop_14 = _mm256_mul_pd(tmp_qloop_13,_mm256_set_pd(tmp_qloop_1,tmp_qloop_1,tmp_qloop_1,tmp_qloop_1)); + const __m256d tmp_qloop_15 = _mm256_div_pd(_mm256_sqrt_pd(tmp_qloop_5),_mm256_mul_pd(tmp_qloop_5,tmp_qloop_5)); + const __m256d tmp_qloop_16 = _mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(rayVertex_0,rayVertex_0,rayVertex_0,rayVertex_0)),tmp_qloop_0),_mm256_set_pd(tmp_qloop_1,tmp_qloop_1,tmp_qloop_1,tmp_qloop_1)),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(rayVertex_1,rayVertex_1,rayVertex_1,rayVertex_1)),tmp_qloop_3),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_8,tmp_qloop_8,tmp_qloop_8,tmp_qloop_8))),_mm256_set_pd(tmp_qloop_11,tmp_qloop_11,tmp_qloop_11,tmp_qloop_11)),_mm256_set_pd(radRayVertex,radRayVertex,radRayVertex,radRayVertex)); + const __m256d tmp_qloop_17 = _mm256_mul_pd(tmp_qloop_15,tmp_qloop_16); + const __m256d tmp_qloop_18 = _mm256_mul_pd(tmp_qloop_17,_mm256_set_pd(1.0,1.0,1.0,1.0)); + const __m256d tmp_qloop_19 = _mm256_mul_pd(tmp_qloop_13,_mm256_set_pd(tmp_qloop_8,tmp_qloop_8,tmp_qloop_8,tmp_qloop_8)); + const __m256d tmp_qloop_20 = _mm256_mul_pd(tmp_qloop_0,tmp_qloop_18); + const __m256d tmp_qloop_23 = _mm256_mul_pd(tmp_qloop_15,_mm256_set_pd(tmp_qloop_12,tmp_qloop_12,tmp_qloop_12,tmp_qloop_12)); + const __m256d tmp_qloop_24 = _mm256_mul_pd(tmp_qloop_23,_mm256_set_pd(tmp_qloop_1,tmp_qloop_1,tmp_qloop_1,tmp_qloop_1)); + const __m256d tmp_qloop_25 = _mm256_mul_pd(tmp_qloop_2,tmp_qloop_24); + const __m256d tmp_qloop_26 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_16,_mm256_div_pd(_mm256_sqrt_pd(tmp_qloop_5),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_5,tmp_qloop_5),tmp_qloop_5))),_mm256_set_pd(3.0,3.0,3.0,3.0)); + const __m256d tmp_qloop_27 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_0,tmp_qloop_26),tmp_qloop_4); + const __m256d tmp_qloop_28 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_27,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_24,tmp_qloop_4)); + const __m256d tmp_qloop_29 = _mm256_mul_pd(tmp_qloop_23,_mm256_set_pd(tmp_qloop_7,tmp_qloop_7,tmp_qloop_7,tmp_qloop_7)); + const __m256d tmp_qloop_30 = _mm256_mul_pd(tmp_qloop_0,tmp_qloop_3); + const __m256d tmp_qloop_31 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_30); + const __m256d tmp_qloop_32 = _mm256_mul_pd(tmp_qloop_17,_mm256_set_pd(2.0,2.0,2.0,2.0)); + const __m256d tmp_qloop_33 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_2,tmp_qloop_26),tmp_qloop_3); + const __m256d tmp_qloop_34 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_33,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_18,tmp_qloop_3)); + const __m256d tmp_qloop_35 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_30); + const __m256d tmp_qloop_36 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_23,tmp_qloop_30),_mm256_set_pd(tmp_qloop_8,tmp_qloop_8,tmp_qloop_8,tmp_qloop_8)); + const __m256d tmp_qloop_37 = _mm256_mul_pd(_mm256_set_pd(4.0,4.0,4.0,4.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])); + const __m256d tmp_qloop_38 = _mm256_mul_pd(tmp_qloop_37,_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])); + const __m256d tmp_qloop_39 = _mm256_mul_pd(_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])); + const __m256d tmp_qloop_40 = _mm256_mul_pd(tmp_qloop_39,_mm256_set_pd(2.0,2.0,2.0,2.0)); + const __m256d tmp_qloop_41 = _mm256_mul_pd(_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])); + const __m256d tmp_qloop_42 = _mm256_mul_pd(tmp_qloop_41,_mm256_set_pd(2.0,2.0,2.0,2.0)); + const __m256d tmp_qloop_43 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),tmp_qloop_38),tmp_qloop_40),tmp_qloop_42); + const __m256d tmp_qloop_44 = _mm256_mul_pd(_mm256_set_pd(4.0,4.0,4.0,4.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])); + const __m256d tmp_qloop_45 = _mm256_add_pd(_mm256_add_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),tmp_qloop_37),tmp_qloop_44); + const __m256d tmp_qloop_46 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_45,_mm256_set_pd(jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE)),_mm256_mul_pd(tmp_qloop_45,_mm256_set_pd(jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE))); + const __m256d tmp_qloop_47 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_45,_mm256_set_pd(jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE)),_mm256_mul_pd(tmp_qloop_45,_mm256_set_pd(jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE))); + const __m256d tmp_qloop_48 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),tmp_qloop_40); + const __m256d tmp_qloop_49 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])),tmp_qloop_42); + const __m256d tmp_qloop_50 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_41,_mm256_set_pd(-4.0,-4.0,-4.0,-4.0)),_mm256_mul_pd(tmp_qloop_38,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_44); + const __m256d tmp_qloop_51 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_39,_mm256_set_pd(-4.0,-4.0,-4.0,-4.0)),_mm256_mul_pd(tmp_qloop_38,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_37); + const __m256d tmp_qloop_52 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_38,ux_dof_3),_mm256_mul_pd(tmp_qloop_43,ux_dof_0)),_mm256_mul_pd(tmp_qloop_48,ux_dof_1)),_mm256_mul_pd(tmp_qloop_49,ux_dof_2)),_mm256_mul_pd(tmp_qloop_50,ux_dof_4)),_mm256_mul_pd(tmp_qloop_51,ux_dof_5)); + const __m256d tmp_qloop_53 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_38,uy_dof_3),_mm256_mul_pd(tmp_qloop_43,uy_dof_0)),_mm256_mul_pd(tmp_qloop_48,uy_dof_1)),_mm256_mul_pd(tmp_qloop_49,uy_dof_2)),_mm256_mul_pd(tmp_qloop_50,uy_dof_4)),_mm256_mul_pd(tmp_qloop_51,uy_dof_5)); + const __m256d tmp_qloop_55 = _mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),tmp_qloop_37); + const __m256d tmp_qloop_56 = _mm256_mul_pd(tmp_qloop_55,_mm256_set_pd(jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE)); + const __m256d tmp_qloop_57 = _mm256_mul_pd(tmp_qloop_55,_mm256_set_pd(jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE)); + const __m256d tmp_qloop_58 = _mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),tmp_qloop_44); + const __m256d tmp_qloop_59 = _mm256_mul_pd(tmp_qloop_58,_mm256_set_pd(jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE)); + const __m256d tmp_qloop_60 = _mm256_mul_pd(tmp_qloop_58,_mm256_set_pd(jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE)); + const __m256d tmp_qloop_61 = _mm256_mul_pd(tmp_qloop_37,_mm256_set_pd(jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE)); + const __m256d tmp_qloop_62 = _mm256_mul_pd(tmp_qloop_44,_mm256_set_pd(jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE)); + const __m256d tmp_qloop_63 = _mm256_add_pd(tmp_qloop_61,tmp_qloop_62); + const __m256d tmp_qloop_64 = _mm256_mul_pd(tmp_qloop_37,_mm256_set_pd(jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE)); + const __m256d tmp_qloop_65 = _mm256_mul_pd(tmp_qloop_44,_mm256_set_pd(jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE)); + const __m256d tmp_qloop_66 = _mm256_add_pd(tmp_qloop_64,tmp_qloop_65); + const __m256d tmp_qloop_67 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-8.0,-8.0,-8.0,-8.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])),_mm256_mul_pd(tmp_qloop_37,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_set_pd(4.0,4.0,4.0,4.0)); + const __m256d tmp_qloop_68 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_62,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_67,_mm256_set_pd(jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE))); + const __m256d tmp_qloop_69 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_65,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_67,_mm256_set_pd(jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE))); + const __m256d tmp_qloop_70 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-8.0,-8.0,-8.0,-8.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(tmp_qloop_44,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_set_pd(4.0,4.0,4.0,4.0)); + const __m256d tmp_qloop_71 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_61,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_70,_mm256_set_pd(jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE))); + const __m256d tmp_qloop_72 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_64,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_70,_mm256_set_pd(jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE))); + const __m256d jac_blending_0_0 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_0,tmp_qloop_14),_mm256_mul_pd(tmp_qloop_18,tmp_qloop_4)); + const __m256d jac_blending_0_1 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_0,tmp_qloop_19),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_0,tmp_qloop_15),tmp_qloop_16),tmp_qloop_3),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))); + const __m256d jac_blending_1_0 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_14,tmp_qloop_3),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_20,tmp_qloop_3),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))); + const __m256d jac_blending_1_1 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_19,tmp_qloop_3),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_15,tmp_qloop_16),tmp_qloop_2),_mm256_set_pd(1.0,1.0,1.0,1.0))); + const __m256d tmp_qloop_21 = _mm256_add_pd(_mm256_mul_pd(jac_blending_0_0,jac_blending_1_1),_mm256_mul_pd(_mm256_mul_pd(jac_blending_0_1,jac_blending_1_0),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))); + const __m256d tmp_qloop_22 = _mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),tmp_qloop_21); + const __m256d abs_det_jac_blending = tmp_qloop_21; + const __m256d tmp_qloop_54 = _mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(abs_det_jac_blending,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(cp_dof_0,tmp_qloop_43),_mm256_mul_pd(cp_dof_1,tmp_qloop_48)),_mm256_mul_pd(cp_dof_2,tmp_qloop_49)),_mm256_mul_pd(cp_dof_3,tmp_qloop_38)),_mm256_mul_pd(cp_dof_4,tmp_qloop_50)),_mm256_mul_pd(cp_dof_5,tmp_qloop_51))),_mm256_set_pd(_data_q_w[q],_data_q_w[q],_data_q_w[q],_data_q_w[q])),_mm256_set_pd(abs_det_jac_affine_BLUE,abs_det_jac_affine_BLUE,abs_det_jac_affine_BLUE,abs_det_jac_affine_BLUE)); + const __m256d jac_blending_inv_0_0 = _mm256_mul_pd(jac_blending_1_1,tmp_qloop_22); + const __m256d jac_blending_inv_0_1 = _mm256_mul_pd(_mm256_mul_pd(jac_blending_0_1,tmp_qloop_22),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)); + const __m256d jac_blending_inv_1_0 = _mm256_mul_pd(_mm256_mul_pd(jac_blending_1_0,tmp_qloop_22),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)); + const __m256d jac_blending_inv_1_1 = _mm256_mul_pd(jac_blending_0_0,tmp_qloop_22); + const __m256d hessian_blending_0_0_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_25,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),tmp_qloop_14),tmp_qloop_28); + const __m256d hessian_blending_1_0_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_31,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_29,tmp_qloop_4)),_mm256_mul_pd(tmp_qloop_3,tmp_qloop_32)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_26,_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_3,tmp_qloop_3),tmp_qloop_3)),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))); + const __m256d hessian_blending_0_0_1 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_34,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_15,tmp_qloop_30),_mm256_set_pd(-2.0,-2.0,-2.0,-2.0)),_mm256_set_pd(tmp_qloop_1,tmp_qloop_1,tmp_qloop_1,tmp_qloop_1)),_mm256_set_pd(tmp_qloop_11,tmp_qloop_11,tmp_qloop_11,tmp_qloop_11))); + const __m256d hessian_blending_1_0_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_20,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_28,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(tmp_qloop_35,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_6,_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_1,tmp_qloop_1,tmp_qloop_1,tmp_qloop_1)),_mm256_set_pd(tmp_qloop_10,tmp_qloop_10,tmp_qloop_10,tmp_qloop_10)),_mm256_set_pd(tmp_qloop_9,tmp_qloop_9,tmp_qloop_9,tmp_qloop_9))); + const __m256d hessian_blending_0_1_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_19,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_31,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(tmp_qloop_34,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_15,tmp_qloop_2),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_10,tmp_qloop_10,tmp_qloop_10,tmp_qloop_10)),_mm256_set_pd(tmp_qloop_8,tmp_qloop_8,tmp_qloop_8,tmp_qloop_8)),_mm256_set_pd(tmp_qloop_9,tmp_qloop_9,tmp_qloop_9,tmp_qloop_9))); + const __m256d hessian_blending_1_1_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_20,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_35,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_27),tmp_qloop_36); + const __m256d hessian_blending_0_1_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_0,tmp_qloop_32),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_0,tmp_qloop_0),tmp_qloop_0),tmp_qloop_26),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_25),tmp_qloop_36); + const __m256d hessian_blending_1_1_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_19,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_33,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_15,tmp_qloop_2),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_10,tmp_qloop_10,tmp_qloop_10,tmp_qloop_10)),_mm256_set_pd(tmp_qloop_7,tmp_qloop_7,tmp_qloop_7,tmp_qloop_7)),_mm256_set_pd(tmp_qloop_9,tmp_qloop_9,tmp_qloop_9,tmp_qloop_9))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_15,tmp_qloop_4),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_10,tmp_qloop_10,tmp_qloop_10,tmp_qloop_10)),_mm256_set_pd(tmp_qloop_8,tmp_qloop_8,tmp_qloop_8,tmp_qloop_8)),_mm256_set_pd(tmp_qloop_9,tmp_qloop_9,tmp_qloop_9,tmp_qloop_9))); + const __m256d q_tmp_0_0 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_43,tmp_qloop_54),_mm256_add_pd(_mm256_mul_pd(tmp_qloop_52,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,tmp_qloop_46),_mm256_mul_pd(jac_blending_inv_1_0,tmp_qloop_47))),_mm256_mul_pd(tmp_qloop_53,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,tmp_qloop_46),_mm256_mul_pd(jac_blending_inv_1_1,tmp_qloop_47))))); + const __m256d q_tmp_1_1 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_48,tmp_qloop_54),_mm256_add_pd(_mm256_mul_pd(tmp_qloop_52,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,tmp_qloop_56),_mm256_mul_pd(jac_blending_inv_1_0,tmp_qloop_57))),_mm256_mul_pd(tmp_qloop_53,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,tmp_qloop_56),_mm256_mul_pd(jac_blending_inv_1_1,tmp_qloop_57))))); + const __m256d q_tmp_2_2 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_49,tmp_qloop_54),_mm256_add_pd(_mm256_mul_pd(tmp_qloop_52,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,tmp_qloop_59),_mm256_mul_pd(jac_blending_inv_1_0,tmp_qloop_60))),_mm256_mul_pd(tmp_qloop_53,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,tmp_qloop_59),_mm256_mul_pd(jac_blending_inv_1_1,tmp_qloop_60))))); + const __m256d q_tmp_3_3 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_38,tmp_qloop_54),_mm256_add_pd(_mm256_mul_pd(tmp_qloop_52,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,tmp_qloop_63),_mm256_mul_pd(jac_blending_inv_1_0,tmp_qloop_66))),_mm256_mul_pd(tmp_qloop_53,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,tmp_qloop_63),_mm256_mul_pd(jac_blending_inv_1_1,tmp_qloop_66))))); + const __m256d q_tmp_4_4 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_50,tmp_qloop_54),_mm256_add_pd(_mm256_mul_pd(tmp_qloop_52,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,tmp_qloop_68),_mm256_mul_pd(jac_blending_inv_1_0,tmp_qloop_69))),_mm256_mul_pd(tmp_qloop_53,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,tmp_qloop_68),_mm256_mul_pd(jac_blending_inv_1_1,tmp_qloop_69))))); + const __m256d q_tmp_5_5 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_51,tmp_qloop_54),_mm256_add_pd(_mm256_mul_pd(tmp_qloop_52,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,tmp_qloop_71),_mm256_mul_pd(jac_blending_inv_1_0,tmp_qloop_72))),_mm256_mul_pd(tmp_qloop_53,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,tmp_qloop_71),_mm256_mul_pd(jac_blending_inv_1_1,tmp_qloop_72))))); + q_acc_0_0 = _mm256_add_pd(q_acc_0_0,q_tmp_0_0); + q_acc_1_1 = _mm256_add_pd(q_acc_1_1,q_tmp_1_1); + q_acc_2_2 = _mm256_add_pd(q_acc_2_2,q_tmp_2_2); + q_acc_3_3 = _mm256_add_pd(q_acc_3_3,q_tmp_3_3); + q_acc_4_4 = _mm256_add_pd(q_acc_4_4,q_tmp_4_4); + q_acc_5_5 = _mm256_add_pd(q_acc_5_5,q_tmp_5_5); + } + const __m256d elMatDiag_0 = q_acc_0_0; + const __m256d elMatDiag_1 = q_acc_1_1; + const __m256d elMatDiag_2 = q_acc_2_2; + const __m256d elMatDiag_3 = q_acc_3_3; + const __m256d elMatDiag_4 = q_acc_4_4; + const __m256d elMatDiag_5 = q_acc_5_5; + _mm256_storeu_pd(&_data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1],_mm256_add_pd(elMatDiag_0,_mm256_loadu_pd(& _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]))); + _mm256_storeu_pd(&_data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))],_mm256_add_pd(elMatDiag_1,_mm256_loadu_pd(& _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]))); + _mm256_storeu_pd(&_data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1],_mm256_add_pd(elMatDiag_2,_mm256_loadu_pd(& _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]))); + _mm256_storeu_pd(&_data_invDiag_Edge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))],_mm256_add_pd(elMatDiag_3,_mm256_loadu_pd(& _data_invDiag_Edge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]))); + _mm256_storeu_pd(&_data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1],_mm256_add_pd(elMatDiag_4,_mm256_loadu_pd(& _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]))); + _mm256_storeu_pd(&_data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))],_mm256_add_pd(elMatDiag_5,_mm256_loadu_pd(& _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]))); + } + for (int64_t ctr_0 = (int64_t)((-ctr_1 + micro_edges_per_macro_edge - 1) / (4)) * (4); ctr_0 < -ctr_1 + micro_edges_per_macro_edge - 1; ctr_0 += 1) + { + + const int64_t phantom_ctr_0 = ctr_0; + real_t _data_float_loop_ctr_array_dim_0[4]; + _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0; + _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1; + _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2; + _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3; + real_t _data_float_loop_ctr_array_dim_1[4]; + _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1; + + const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; + const real_t cp_dof_0 = _data_cpVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + const real_t cp_dof_1 = _data_cpVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t cp_dof_2 = _data_cpVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]; + const real_t cp_dof_3 = _data_cpEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t cp_dof_4 = _data_cpEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]; + const real_t cp_dof_5 = _data_cpEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t ux_dof_0 = _data_uxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + const real_t ux_dof_1 = _data_uxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t ux_dof_2 = _data_uxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]; + const real_t ux_dof_3 = _data_uxEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t ux_dof_4 = _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]; + const real_t ux_dof_5 = _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t uy_dof_0 = _data_uyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + const real_t uy_dof_1 = _data_uyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t uy_dof_2 = _data_uyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]; + const real_t uy_dof_3 = _data_uyEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t uy_dof_4 = _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]; + const real_t uy_dof_5 = _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + real_t q_acc_0_0 = 0.0; + real_t q_acc_1_1 = 0.0; + real_t q_acc_2_2 = 0.0; + real_t q_acc_3_3 = 0.0; + real_t q_acc_4_4 = 0.0; + real_t q_acc_5_5 = 0.0; + for (int64_t q = 0; q < 4; q += 1) + { + const real_t tmp_qloop_0 = p_affine_0_0 + (-p_affine_0_0 + p_affine_1_0)*_data_q_p_0[q] + (-p_affine_0_0 + p_affine_2_0)*_data_q_p_1[q]; + const real_t tmp_qloop_2 = (tmp_qloop_0*tmp_qloop_0); + const real_t tmp_qloop_3 = p_affine_0_1 + (-p_affine_0_1 + p_affine_1_1)*_data_q_p_0[q] + (-p_affine_0_1 + p_affine_2_1)*_data_q_p_1[q]; + const real_t tmp_qloop_4 = (tmp_qloop_3*tmp_qloop_3); + const real_t tmp_qloop_5 = tmp_qloop_2 + tmp_qloop_4; + const real_t tmp_qloop_6 = pow(tmp_qloop_5, -0.50000000000000000); + const real_t tmp_qloop_13 = tmp_qloop_12*tmp_qloop_6; + const real_t tmp_qloop_14 = tmp_qloop_1*tmp_qloop_13; + const real_t tmp_qloop_15 = pow(tmp_qloop_5, -1.5000000000000000); + const real_t tmp_qloop_16 = radRayVertex + tmp_qloop_11*(tmp_qloop_1*(-rayVertex_0 + tmp_qloop_0) - tmp_qloop_8*(-rayVertex_1 + tmp_qloop_3)); + const real_t tmp_qloop_17 = tmp_qloop_15*tmp_qloop_16; + const real_t tmp_qloop_18 = tmp_qloop_17*1.0; + const real_t tmp_qloop_19 = tmp_qloop_13*tmp_qloop_8; + const real_t tmp_qloop_20 = tmp_qloop_0*tmp_qloop_18; + const real_t tmp_qloop_23 = tmp_qloop_12*tmp_qloop_15; + const real_t tmp_qloop_24 = tmp_qloop_1*tmp_qloop_23; + const real_t tmp_qloop_25 = tmp_qloop_2*tmp_qloop_24; + const real_t tmp_qloop_26 = tmp_qloop_16*pow(tmp_qloop_5, -2.5000000000000000)*3.0; + const real_t tmp_qloop_27 = tmp_qloop_0*tmp_qloop_26*tmp_qloop_4; + const real_t tmp_qloop_28 = tmp_qloop_24*tmp_qloop_4 - tmp_qloop_27; + const real_t tmp_qloop_29 = tmp_qloop_23*tmp_qloop_7; + const real_t tmp_qloop_30 = tmp_qloop_0*tmp_qloop_3; + const real_t tmp_qloop_31 = tmp_qloop_24*tmp_qloop_30; + const real_t tmp_qloop_32 = tmp_qloop_17*2.0; + const real_t tmp_qloop_33 = tmp_qloop_2*tmp_qloop_26*tmp_qloop_3; + const real_t tmp_qloop_34 = tmp_qloop_18*tmp_qloop_3 - tmp_qloop_33; + const real_t tmp_qloop_35 = tmp_qloop_29*tmp_qloop_30; + const real_t tmp_qloop_36 = tmp_qloop_23*tmp_qloop_30*tmp_qloop_8; + const real_t tmp_qloop_37 = 4.0*_data_q_p_0[q]; + const real_t tmp_qloop_38 = tmp_qloop_37*_data_q_p_1[q]; + const real_t tmp_qloop_39 = (_data_q_p_0[q]*_data_q_p_0[q]); + const real_t tmp_qloop_40 = tmp_qloop_39*2.0; + const real_t tmp_qloop_41 = (_data_q_p_1[q]*_data_q_p_1[q]); + const real_t tmp_qloop_42 = tmp_qloop_41*2.0; + const real_t tmp_qloop_43 = tmp_qloop_38 + tmp_qloop_40 + tmp_qloop_42 - 3.0*_data_q_p_0[q] - 3.0*_data_q_p_1[q] + 1.0; + const real_t tmp_qloop_44 = 4.0*_data_q_p_1[q]; + const real_t tmp_qloop_45 = tmp_qloop_37 + tmp_qloop_44 - 3.0; + const real_t tmp_qloop_46 = jac_affine_inv_0_0_BLUE*tmp_qloop_45 + jac_affine_inv_1_0_BLUE*tmp_qloop_45; + const real_t tmp_qloop_47 = jac_affine_inv_0_1_BLUE*tmp_qloop_45 + jac_affine_inv_1_1_BLUE*tmp_qloop_45; + const real_t tmp_qloop_48 = tmp_qloop_40 - _data_q_p_0[q]; + const real_t tmp_qloop_49 = tmp_qloop_42 - _data_q_p_1[q]; + const real_t tmp_qloop_50 = -tmp_qloop_38 + tmp_qloop_41*-4.0 + tmp_qloop_44; + const real_t tmp_qloop_51 = tmp_qloop_37 - tmp_qloop_38 + tmp_qloop_39*-4.0; + const real_t tmp_qloop_52 = tmp_qloop_38*ux_dof_3 + tmp_qloop_43*ux_dof_0 + tmp_qloop_48*ux_dof_1 + tmp_qloop_49*ux_dof_2 + tmp_qloop_50*ux_dof_4 + tmp_qloop_51*ux_dof_5; + const real_t tmp_qloop_53 = tmp_qloop_38*uy_dof_3 + tmp_qloop_43*uy_dof_0 + tmp_qloop_48*uy_dof_1 + tmp_qloop_49*uy_dof_2 + tmp_qloop_50*uy_dof_4 + tmp_qloop_51*uy_dof_5; + const real_t tmp_qloop_55 = tmp_qloop_37 - 1.0; + const real_t tmp_qloop_56 = jac_affine_inv_0_0_BLUE*tmp_qloop_55; + const real_t tmp_qloop_57 = jac_affine_inv_0_1_BLUE*tmp_qloop_55; + const real_t tmp_qloop_58 = tmp_qloop_44 - 1.0; + const real_t tmp_qloop_59 = jac_affine_inv_1_0_BLUE*tmp_qloop_58; + const real_t tmp_qloop_60 = jac_affine_inv_1_1_BLUE*tmp_qloop_58; + const real_t tmp_qloop_61 = jac_affine_inv_1_0_BLUE*tmp_qloop_37; + const real_t tmp_qloop_62 = jac_affine_inv_0_0_BLUE*tmp_qloop_44; + const real_t tmp_qloop_63 = tmp_qloop_61 + tmp_qloop_62; + const real_t tmp_qloop_64 = jac_affine_inv_1_1_BLUE*tmp_qloop_37; + const real_t tmp_qloop_65 = jac_affine_inv_0_1_BLUE*tmp_qloop_44; + const real_t tmp_qloop_66 = tmp_qloop_64 + tmp_qloop_65; + const real_t tmp_qloop_67 = -tmp_qloop_37 - 8.0*_data_q_p_1[q] + 4.0; + const real_t tmp_qloop_68 = jac_affine_inv_1_0_BLUE*tmp_qloop_67 - tmp_qloop_62; + const real_t tmp_qloop_69 = jac_affine_inv_1_1_BLUE*tmp_qloop_67 - tmp_qloop_65; + const real_t tmp_qloop_70 = -tmp_qloop_44 - 8.0*_data_q_p_0[q] + 4.0; + const real_t tmp_qloop_71 = jac_affine_inv_0_0_BLUE*tmp_qloop_70 - tmp_qloop_61; + const real_t tmp_qloop_72 = jac_affine_inv_0_1_BLUE*tmp_qloop_70 - tmp_qloop_64; + const real_t jac_blending_0_0 = tmp_qloop_0*tmp_qloop_14 + tmp_qloop_18*tmp_qloop_4; + const real_t jac_blending_0_1 = -tmp_qloop_0*tmp_qloop_15*tmp_qloop_16*tmp_qloop_3 - tmp_qloop_0*tmp_qloop_19; + const real_t jac_blending_1_0 = tmp_qloop_14*tmp_qloop_3 - tmp_qloop_20*tmp_qloop_3; + const real_t jac_blending_1_1 = tmp_qloop_15*tmp_qloop_16*tmp_qloop_2*1.0 - tmp_qloop_19*tmp_qloop_3; + const real_t tmp_qloop_21 = jac_blending_0_0*jac_blending_1_1 - jac_blending_0_1*jac_blending_1_0; + const real_t tmp_qloop_22 = 1.0 / (tmp_qloop_21); + const real_t abs_det_jac_blending = tmp_qloop_21; + const real_t tmp_qloop_54 = abs_det_jac_affine_BLUE*abs_det_jac_blending*(cp_dof_0*tmp_qloop_43 + cp_dof_1*tmp_qloop_48 + cp_dof_2*tmp_qloop_49 + cp_dof_3*tmp_qloop_38 + cp_dof_4*tmp_qloop_50 + cp_dof_5*tmp_qloop_51)*_data_q_w[q]; + const real_t jac_blending_inv_0_0 = jac_blending_1_1*tmp_qloop_22; + const real_t jac_blending_inv_0_1 = -jac_blending_0_1*tmp_qloop_22; + const real_t jac_blending_inv_1_0 = -jac_blending_1_0*tmp_qloop_22; + const real_t jac_blending_inv_1_1 = jac_blending_0_0*tmp_qloop_22; + const real_t hessian_blending_0_0_0 = tmp_qloop_14 - tmp_qloop_25 + tmp_qloop_28; + const real_t hessian_blending_1_0_0 = -tmp_qloop_26*(tmp_qloop_3*tmp_qloop_3*tmp_qloop_3) + tmp_qloop_29*tmp_qloop_4 + tmp_qloop_3*tmp_qloop_32 - tmp_qloop_31; + const real_t hessian_blending_0_0_1 = tmp_qloop_1*tmp_qloop_11*tmp_qloop_15*tmp_qloop_30*-2.0 - tmp_qloop_34; + const real_t hessian_blending_1_0_1 = tmp_qloop_1*tmp_qloop_10*tmp_qloop_6*tmp_qloop_9*1.0 - tmp_qloop_20 - tmp_qloop_28 - tmp_qloop_35; + const real_t hessian_blending_0_1_0 = tmp_qloop_10*tmp_qloop_15*tmp_qloop_2*tmp_qloop_8*tmp_qloop_9*1.0 - tmp_qloop_19 - tmp_qloop_31 - tmp_qloop_34; + const real_t hessian_blending_1_1_0 = -tmp_qloop_20 + tmp_qloop_27 - tmp_qloop_35 + tmp_qloop_36; + const real_t hessian_blending_0_1_1 = tmp_qloop_0*tmp_qloop_32 - (tmp_qloop_0*tmp_qloop_0*tmp_qloop_0)*tmp_qloop_26 + tmp_qloop_25 + tmp_qloop_36; + const real_t hessian_blending_1_1_1 = tmp_qloop_10*tmp_qloop_15*tmp_qloop_2*tmp_qloop_7*tmp_qloop_9*1.0 + tmp_qloop_10*tmp_qloop_15*tmp_qloop_4*tmp_qloop_8*tmp_qloop_9*1.0 - tmp_qloop_19 - tmp_qloop_33; + const real_t q_tmp_0_0 = tmp_qloop_43*tmp_qloop_54*(tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_46 + jac_blending_inv_1_0*tmp_qloop_47) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_46 + jac_blending_inv_1_1*tmp_qloop_47)); + const real_t q_tmp_1_1 = tmp_qloop_48*tmp_qloop_54*(tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_56 + jac_blending_inv_1_0*tmp_qloop_57) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_56 + jac_blending_inv_1_1*tmp_qloop_57)); + const real_t q_tmp_2_2 = tmp_qloop_49*tmp_qloop_54*(tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_59 + jac_blending_inv_1_0*tmp_qloop_60) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_59 + jac_blending_inv_1_1*tmp_qloop_60)); + const real_t q_tmp_3_3 = tmp_qloop_38*tmp_qloop_54*(tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_63 + jac_blending_inv_1_0*tmp_qloop_66) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_63 + jac_blending_inv_1_1*tmp_qloop_66)); + const real_t q_tmp_4_4 = tmp_qloop_50*tmp_qloop_54*(tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_68 + jac_blending_inv_1_0*tmp_qloop_69) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_68 + jac_blending_inv_1_1*tmp_qloop_69)); + const real_t q_tmp_5_5 = tmp_qloop_51*tmp_qloop_54*(tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_71 + jac_blending_inv_1_0*tmp_qloop_72) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_71 + jac_blending_inv_1_1*tmp_qloop_72)); + q_acc_0_0 = q_acc_0_0 + q_tmp_0_0; + q_acc_1_1 = q_acc_1_1 + q_tmp_1_1; + q_acc_2_2 = q_acc_2_2 + q_tmp_2_2; + q_acc_3_3 = q_acc_3_3 + q_tmp_3_3; + q_acc_4_4 = q_acc_4_4 + q_tmp_4_4; + q_acc_5_5 = q_acc_5_5 + q_tmp_5_5; + } + const real_t elMatDiag_0 = q_acc_0_0; + const real_t elMatDiag_1 = q_acc_1_1; + const real_t elMatDiag_2 = q_acc_2_2; + const real_t elMatDiag_3 = q_acc_3_3; + const real_t elMatDiag_4 = q_acc_4_4; + const real_t elMatDiag_5 = q_acc_5_5; + _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1] = elMatDiag_0 + _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatDiag_1 + _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1] = elMatDiag_2 + _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]; + _data_invDiag_Edge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatDiag_3 + _data_invDiag_Edge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1] = elMatDiag_4 + _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]; + _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))] = elMatDiag_5 + _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + } + } + } + } +} +} // namespace operatorgeneration + +} // namespace hyteg diff --git a/operators/advection/avx/P2ElementwiseAdvection_apply_P2ElementwiseAdvection_macro_2D.cpp b/operators/advection/avx/P2ElementwiseAdvection_apply_P2ElementwiseAdvection_macro_2D.cpp new file mode 100644 index 0000000000000000000000000000000000000000..6e8fc0c2a4da287a0792bfbc8bf673818d9a9c24 --- /dev/null +++ b/operators/advection/avx/P2ElementwiseAdvection_apply_P2ElementwiseAdvection_macro_2D.cpp @@ -0,0 +1,948 @@ +/* +* Copyright (c) 2017-2024 Nils Kohl, Daniel Bauer, Fabian Böhm. +* +* This file is part of HyTeG +* (see https://i10git.cs.fau.de/hyteg/hyteg). +* +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. +* +* You should have received a copy of the GNU General Public License +* along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +/* +* The entire file was generated with the HyTeG Operator Generator. +* +* Avoid modifying this file. If buggy, consider fixing the generator itself. +*/ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +#include "../P2ElementwiseAdvection.hpp" + +#define FUNC_PREFIX + +namespace hyteg { + +namespace operatorgeneration { + +void P2ElementwiseAdvection::apply_P2ElementwiseAdvection_macro_2D( real_t * RESTRICT _data_cpEdge, real_t * RESTRICT _data_cpVertex, real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t * RESTRICT _data_uxEdge, real_t * RESTRICT _data_uxVertex, real_t * RESTRICT _data_uyEdge, real_t * RESTRICT _data_uyVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +{ + { + const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669}; + + const real_t _data_q_p_0 [] = {0.33333333333333331, 0.20000000000000001, 0.59999999999999998, 0.20000000000000001}; + + const real_t _data_q_p_1 [] = {0.33333333333333331, 0.59999999999999998, 0.20000000000000001, 0.20000000000000001}; + + const real_t tmp_coords_jac_0_GRAY = 1.0 / (micro_edges_per_macro_edge_float)*1.0; + const real_t p_affine_const_0_0_GRAY = macro_vertex_coord_id_0comp0; + const real_t p_affine_const_0_1_GRAY = macro_vertex_coord_id_0comp1; + const real_t p_affine_const_1_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0); + const real_t p_affine_const_1_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1); + const real_t p_affine_const_2_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0); + const real_t p_affine_const_2_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1); + const real_t jac_affine_0_0_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_1_0_GRAY; + const real_t jac_affine_0_1_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_2_0_GRAY; + const real_t jac_affine_1_0_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_1_1_GRAY; + const real_t jac_affine_1_1_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_2_1_GRAY; + const real_t tmp_coords_jac_1_GRAY = jac_affine_0_0_GRAY*jac_affine_1_1_GRAY - jac_affine_0_1_GRAY*jac_affine_1_0_GRAY; + const real_t tmp_coords_jac_2_GRAY = 1.0 / (tmp_coords_jac_1_GRAY); + const real_t jac_affine_inv_0_0_GRAY = jac_affine_1_1_GRAY*tmp_coords_jac_2_GRAY; + const real_t jac_affine_inv_0_1_GRAY = -jac_affine_0_1_GRAY*tmp_coords_jac_2_GRAY; + const real_t jac_affine_inv_1_0_GRAY = -jac_affine_1_0_GRAY*tmp_coords_jac_2_GRAY; + const real_t jac_affine_inv_1_1_GRAY = jac_affine_0_0_GRAY*tmp_coords_jac_2_GRAY; + const real_t abs_det_jac_affine_GRAY = abs(tmp_coords_jac_1_GRAY); + { + /* FaceType.GRAY */ + for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1) + { + for (int64_t ctr_0 = 0; ctr_0 < (int64_t)((-ctr_1 + micro_edges_per_macro_edge) / (4)) * (4); ctr_0 += 4) + { + + const int64_t phantom_ctr_0 = ctr_0; + real_t _data_float_loop_ctr_array_dim_0[4]; + _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0; + _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1; + _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2; + _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3; + real_t _data_float_loop_ctr_array_dim_1[4]; + _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1; + + const __m256d p_affine_0_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)); + const __m256d p_affine_0_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)); + const __m256d p_affine_1_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)); + const __m256d p_affine_1_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)); + const __m256d p_affine_2_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)); + const __m256d p_affine_2_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)); + const __m256d src_dof_0 = _mm256_loadu_pd(& _data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]); + const __m256d src_dof_1 = _mm256_loadu_pd(& _data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]); + const __m256d src_dof_2 = _mm256_loadu_pd(& _data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]); + const __m256d src_dof_3 = _mm256_loadu_pd(& _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]); + const __m256d src_dof_4 = _mm256_loadu_pd(& _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]); + const __m256d src_dof_5 = _mm256_loadu_pd(& _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]); + const __m256d cp_dof_0 = _mm256_loadu_pd(& _data_cpVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]); + const __m256d cp_dof_1 = _mm256_loadu_pd(& _data_cpVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]); + const __m256d cp_dof_2 = _mm256_loadu_pd(& _data_cpVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]); + const __m256d cp_dof_3 = _mm256_loadu_pd(& _data_cpEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]); + const __m256d cp_dof_4 = _mm256_loadu_pd(& _data_cpEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]); + const __m256d cp_dof_5 = _mm256_loadu_pd(& _data_cpEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]); + const __m256d ux_dof_0 = _mm256_loadu_pd(& _data_uxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]); + const __m256d ux_dof_1 = _mm256_loadu_pd(& _data_uxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]); + const __m256d ux_dof_2 = _mm256_loadu_pd(& _data_uxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]); + const __m256d ux_dof_3 = _mm256_loadu_pd(& _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]); + const __m256d ux_dof_4 = _mm256_loadu_pd(& _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]); + const __m256d ux_dof_5 = _mm256_loadu_pd(& _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]); + const __m256d uy_dof_0 = _mm256_loadu_pd(& _data_uyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]); + const __m256d uy_dof_1 = _mm256_loadu_pd(& _data_uyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]); + const __m256d uy_dof_2 = _mm256_loadu_pd(& _data_uyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]); + const __m256d uy_dof_3 = _mm256_loadu_pd(& _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]); + const __m256d uy_dof_4 = _mm256_loadu_pd(& _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]); + const __m256d uy_dof_5 = _mm256_loadu_pd(& _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]); + __m256d q_acc_0_0 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_1 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_2 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_3 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_4 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_5 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_0 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_1 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_2 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_3 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_4 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_5 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_0 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_1 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_2 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_3 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_4 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_5 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_0 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_1 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_2 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_3 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_4 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_5 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_4_0 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_4_1 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_4_2 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_4_3 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_4_4 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_4_5 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_5_0 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_5_1 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_5_2 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_5_3 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_5_4 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_5_5 = _mm256_set_pd(0.0,0.0,0.0,0.0); + for (int64_t q = 0; q < 4; q += 1) + { + const __m256d tmp_qloop_0 = _mm256_mul_pd(_mm256_set_pd(4.0,4.0,4.0,4.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])); + const __m256d tmp_qloop_1 = _mm256_mul_pd(_mm256_set_pd(4.0,4.0,4.0,4.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])); + const __m256d tmp_qloop_2 = _mm256_add_pd(_mm256_add_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),tmp_qloop_0),tmp_qloop_1); + const __m256d tmp_qloop_3 = _mm256_mul_pd(tmp_qloop_0,_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])); + const __m256d tmp_qloop_4 = _mm256_mul_pd(_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])); + const __m256d tmp_qloop_5 = _mm256_mul_pd(tmp_qloop_4,_mm256_set_pd(2.0,2.0,2.0,2.0)); + const __m256d tmp_qloop_6 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),tmp_qloop_5); + const __m256d tmp_qloop_7 = _mm256_mul_pd(_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])); + const __m256d tmp_qloop_8 = _mm256_mul_pd(tmp_qloop_7,_mm256_set_pd(2.0,2.0,2.0,2.0)); + const __m256d tmp_qloop_9 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])),tmp_qloop_8); + const __m256d tmp_qloop_10 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_7,_mm256_set_pd(-4.0,-4.0,-4.0,-4.0)),_mm256_mul_pd(tmp_qloop_3,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_1); + const __m256d tmp_qloop_11 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_4,_mm256_set_pd(-4.0,-4.0,-4.0,-4.0)),_mm256_mul_pd(tmp_qloop_3,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_0); + const __m256d tmp_qloop_12 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),tmp_qloop_3),tmp_qloop_5),tmp_qloop_8); + const __m256d tmp_qloop_13 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_10,ux_dof_4),_mm256_mul_pd(tmp_qloop_11,ux_dof_5)),_mm256_mul_pd(tmp_qloop_12,ux_dof_0)),_mm256_mul_pd(tmp_qloop_3,ux_dof_3)),_mm256_mul_pd(tmp_qloop_6,ux_dof_1)),_mm256_mul_pd(tmp_qloop_9,ux_dof_2)); + const __m256d tmp_qloop_14 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_10,uy_dof_4),_mm256_mul_pd(tmp_qloop_11,uy_dof_5)),_mm256_mul_pd(tmp_qloop_12,uy_dof_0)),_mm256_mul_pd(tmp_qloop_3,uy_dof_3)),_mm256_mul_pd(tmp_qloop_6,uy_dof_1)),_mm256_mul_pd(tmp_qloop_9,uy_dof_2)); + const __m256d tmp_qloop_15 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_13,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_2,_mm256_set_pd(jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY)),_mm256_mul_pd(tmp_qloop_2,_mm256_set_pd(jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY)))),_mm256_mul_pd(tmp_qloop_14,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_2,_mm256_set_pd(jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY)),_mm256_mul_pd(tmp_qloop_2,_mm256_set_pd(jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY))))); + const __m256d tmp_qloop_16 = _mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(cp_dof_0,tmp_qloop_12),_mm256_mul_pd(cp_dof_1,tmp_qloop_6)),_mm256_mul_pd(cp_dof_2,tmp_qloop_9)),_mm256_mul_pd(cp_dof_3,tmp_qloop_3)),_mm256_mul_pd(cp_dof_4,tmp_qloop_10)),_mm256_mul_pd(cp_dof_5,tmp_qloop_11)),_mm256_set_pd(_data_q_w[q],_data_q_w[q],_data_q_w[q],_data_q_w[q])),_mm256_set_pd(abs_det_jac_affine_GRAY,abs_det_jac_affine_GRAY,abs_det_jac_affine_GRAY,abs_det_jac_affine_GRAY)); + const __m256d tmp_qloop_17 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_16); + const __m256d tmp_qloop_18 = _mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),tmp_qloop_0); + const __m256d tmp_qloop_19 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_13,tmp_qloop_18),_mm256_set_pd(jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_14,tmp_qloop_18),_mm256_set_pd(jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY))); + const __m256d tmp_qloop_20 = _mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),tmp_qloop_1); + const __m256d tmp_qloop_21 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_13,tmp_qloop_20),_mm256_set_pd(jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_14,tmp_qloop_20),_mm256_set_pd(jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY))); + const __m256d tmp_qloop_22 = _mm256_mul_pd(tmp_qloop_0,_mm256_set_pd(jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY)); + const __m256d tmp_qloop_23 = _mm256_mul_pd(tmp_qloop_1,_mm256_set_pd(jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY)); + const __m256d tmp_qloop_24 = _mm256_mul_pd(tmp_qloop_0,_mm256_set_pd(jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY)); + const __m256d tmp_qloop_25 = _mm256_mul_pd(tmp_qloop_1,_mm256_set_pd(jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY)); + const __m256d tmp_qloop_26 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_13,_mm256_add_pd(tmp_qloop_22,tmp_qloop_23)),_mm256_mul_pd(tmp_qloop_14,_mm256_add_pd(tmp_qloop_24,tmp_qloop_25))); + const __m256d tmp_qloop_27 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-8.0,-8.0,-8.0,-8.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])),_mm256_mul_pd(tmp_qloop_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_set_pd(4.0,4.0,4.0,4.0)); + const __m256d tmp_qloop_28 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_13,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_23,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_27,_mm256_set_pd(jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY)))),_mm256_mul_pd(tmp_qloop_14,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_25,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_27,_mm256_set_pd(jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY))))); + const __m256d tmp_qloop_29 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-8.0,-8.0,-8.0,-8.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(tmp_qloop_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_set_pd(4.0,4.0,4.0,4.0)); + const __m256d tmp_qloop_30 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_13,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_22,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_29,_mm256_set_pd(jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY)))),_mm256_mul_pd(tmp_qloop_14,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_24,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_29,_mm256_set_pd(jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY))))); + const __m256d tmp_qloop_31 = _mm256_mul_pd(tmp_qloop_16,tmp_qloop_6); + const __m256d tmp_qloop_32 = _mm256_mul_pd(tmp_qloop_16,tmp_qloop_9); + const __m256d tmp_qloop_33 = _mm256_mul_pd(tmp_qloop_16,tmp_qloop_3); + const __m256d tmp_qloop_34 = _mm256_mul_pd(tmp_qloop_10,tmp_qloop_16); + const __m256d tmp_qloop_35 = _mm256_mul_pd(tmp_qloop_11,tmp_qloop_16); + const __m256d q_tmp_0_0 = _mm256_mul_pd(tmp_qloop_15,tmp_qloop_17); + const __m256d q_tmp_0_1 = _mm256_mul_pd(tmp_qloop_17,tmp_qloop_19); + const __m256d q_tmp_0_2 = _mm256_mul_pd(tmp_qloop_17,tmp_qloop_21); + const __m256d q_tmp_0_3 = _mm256_mul_pd(tmp_qloop_17,tmp_qloop_26); + const __m256d q_tmp_0_4 = _mm256_mul_pd(tmp_qloop_17,tmp_qloop_28); + const __m256d q_tmp_0_5 = _mm256_mul_pd(tmp_qloop_17,tmp_qloop_30); + const __m256d q_tmp_1_0 = _mm256_mul_pd(tmp_qloop_15,tmp_qloop_31); + const __m256d q_tmp_1_1 = _mm256_mul_pd(tmp_qloop_19,tmp_qloop_31); + const __m256d q_tmp_1_2 = _mm256_mul_pd(tmp_qloop_21,tmp_qloop_31); + const __m256d q_tmp_1_3 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_31); + const __m256d q_tmp_1_4 = _mm256_mul_pd(tmp_qloop_28,tmp_qloop_31); + const __m256d q_tmp_1_5 = _mm256_mul_pd(tmp_qloop_30,tmp_qloop_31); + const __m256d q_tmp_2_0 = _mm256_mul_pd(tmp_qloop_15,tmp_qloop_32); + const __m256d q_tmp_2_1 = _mm256_mul_pd(tmp_qloop_19,tmp_qloop_32); + const __m256d q_tmp_2_2 = _mm256_mul_pd(tmp_qloop_21,tmp_qloop_32); + const __m256d q_tmp_2_3 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_32); + const __m256d q_tmp_2_4 = _mm256_mul_pd(tmp_qloop_28,tmp_qloop_32); + const __m256d q_tmp_2_5 = _mm256_mul_pd(tmp_qloop_30,tmp_qloop_32); + const __m256d q_tmp_3_0 = _mm256_mul_pd(tmp_qloop_15,tmp_qloop_33); + const __m256d q_tmp_3_1 = _mm256_mul_pd(tmp_qloop_19,tmp_qloop_33); + const __m256d q_tmp_3_2 = _mm256_mul_pd(tmp_qloop_21,tmp_qloop_33); + const __m256d q_tmp_3_3 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_33); + const __m256d q_tmp_3_4 = _mm256_mul_pd(tmp_qloop_28,tmp_qloop_33); + const __m256d q_tmp_3_5 = _mm256_mul_pd(tmp_qloop_30,tmp_qloop_33); + const __m256d q_tmp_4_0 = _mm256_mul_pd(tmp_qloop_15,tmp_qloop_34); + const __m256d q_tmp_4_1 = _mm256_mul_pd(tmp_qloop_19,tmp_qloop_34); + const __m256d q_tmp_4_2 = _mm256_mul_pd(tmp_qloop_21,tmp_qloop_34); + const __m256d q_tmp_4_3 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_34); + const __m256d q_tmp_4_4 = _mm256_mul_pd(tmp_qloop_28,tmp_qloop_34); + const __m256d q_tmp_4_5 = _mm256_mul_pd(tmp_qloop_30,tmp_qloop_34); + const __m256d q_tmp_5_0 = _mm256_mul_pd(tmp_qloop_15,tmp_qloop_35); + const __m256d q_tmp_5_1 = _mm256_mul_pd(tmp_qloop_19,tmp_qloop_35); + const __m256d q_tmp_5_2 = _mm256_mul_pd(tmp_qloop_21,tmp_qloop_35); + const __m256d q_tmp_5_3 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_35); + const __m256d q_tmp_5_4 = _mm256_mul_pd(tmp_qloop_28,tmp_qloop_35); + const __m256d q_tmp_5_5 = _mm256_mul_pd(tmp_qloop_30,tmp_qloop_35); + q_acc_0_0 = _mm256_add_pd(q_acc_0_0,q_tmp_0_0); + q_acc_0_1 = _mm256_add_pd(q_acc_0_1,q_tmp_0_1); + q_acc_0_2 = _mm256_add_pd(q_acc_0_2,q_tmp_0_2); + q_acc_0_3 = _mm256_add_pd(q_acc_0_3,q_tmp_0_3); + q_acc_0_4 = _mm256_add_pd(q_acc_0_4,q_tmp_0_4); + q_acc_0_5 = _mm256_add_pd(q_acc_0_5,q_tmp_0_5); + q_acc_1_0 = _mm256_add_pd(q_acc_1_0,q_tmp_1_0); + q_acc_1_1 = _mm256_add_pd(q_acc_1_1,q_tmp_1_1); + q_acc_1_2 = _mm256_add_pd(q_acc_1_2,q_tmp_1_2); + q_acc_1_3 = _mm256_add_pd(q_acc_1_3,q_tmp_1_3); + q_acc_1_4 = _mm256_add_pd(q_acc_1_4,q_tmp_1_4); + q_acc_1_5 = _mm256_add_pd(q_acc_1_5,q_tmp_1_5); + q_acc_2_0 = _mm256_add_pd(q_acc_2_0,q_tmp_2_0); + q_acc_2_1 = _mm256_add_pd(q_acc_2_1,q_tmp_2_1); + q_acc_2_2 = _mm256_add_pd(q_acc_2_2,q_tmp_2_2); + q_acc_2_3 = _mm256_add_pd(q_acc_2_3,q_tmp_2_3); + q_acc_2_4 = _mm256_add_pd(q_acc_2_4,q_tmp_2_4); + q_acc_2_5 = _mm256_add_pd(q_acc_2_5,q_tmp_2_5); + q_acc_3_0 = _mm256_add_pd(q_acc_3_0,q_tmp_3_0); + q_acc_3_1 = _mm256_add_pd(q_acc_3_1,q_tmp_3_1); + q_acc_3_2 = _mm256_add_pd(q_acc_3_2,q_tmp_3_2); + q_acc_3_3 = _mm256_add_pd(q_acc_3_3,q_tmp_3_3); + q_acc_3_4 = _mm256_add_pd(q_acc_3_4,q_tmp_3_4); + q_acc_3_5 = _mm256_add_pd(q_acc_3_5,q_tmp_3_5); + q_acc_4_0 = _mm256_add_pd(q_acc_4_0,q_tmp_4_0); + q_acc_4_1 = _mm256_add_pd(q_acc_4_1,q_tmp_4_1); + q_acc_4_2 = _mm256_add_pd(q_acc_4_2,q_tmp_4_2); + q_acc_4_3 = _mm256_add_pd(q_acc_4_3,q_tmp_4_3); + q_acc_4_4 = _mm256_add_pd(q_acc_4_4,q_tmp_4_4); + q_acc_4_5 = _mm256_add_pd(q_acc_4_5,q_tmp_4_5); + q_acc_5_0 = _mm256_add_pd(q_acc_5_0,q_tmp_5_0); + q_acc_5_1 = _mm256_add_pd(q_acc_5_1,q_tmp_5_1); + q_acc_5_2 = _mm256_add_pd(q_acc_5_2,q_tmp_5_2); + q_acc_5_3 = _mm256_add_pd(q_acc_5_3,q_tmp_5_3); + q_acc_5_4 = _mm256_add_pd(q_acc_5_4,q_tmp_5_4); + q_acc_5_5 = _mm256_add_pd(q_acc_5_5,q_tmp_5_5); + } + const __m256d elMatVec_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_0_0,src_dof_0),_mm256_mul_pd(q_acc_0_1,src_dof_1)),_mm256_mul_pd(q_acc_0_2,src_dof_2)),_mm256_mul_pd(q_acc_0_3,src_dof_3)),_mm256_mul_pd(q_acc_0_4,src_dof_4)),_mm256_mul_pd(q_acc_0_5,src_dof_5)); + const __m256d elMatVec_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_1_0,src_dof_0),_mm256_mul_pd(q_acc_1_1,src_dof_1)),_mm256_mul_pd(q_acc_1_2,src_dof_2)),_mm256_mul_pd(q_acc_1_3,src_dof_3)),_mm256_mul_pd(q_acc_1_4,src_dof_4)),_mm256_mul_pd(q_acc_1_5,src_dof_5)); + const __m256d elMatVec_2 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_2_0,src_dof_0),_mm256_mul_pd(q_acc_2_1,src_dof_1)),_mm256_mul_pd(q_acc_2_2,src_dof_2)),_mm256_mul_pd(q_acc_2_3,src_dof_3)),_mm256_mul_pd(q_acc_2_4,src_dof_4)),_mm256_mul_pd(q_acc_2_5,src_dof_5)); + const __m256d elMatVec_3 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_3_0,src_dof_0),_mm256_mul_pd(q_acc_3_1,src_dof_1)),_mm256_mul_pd(q_acc_3_2,src_dof_2)),_mm256_mul_pd(q_acc_3_3,src_dof_3)),_mm256_mul_pd(q_acc_3_4,src_dof_4)),_mm256_mul_pd(q_acc_3_5,src_dof_5)); + const __m256d elMatVec_4 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_4_0,src_dof_0),_mm256_mul_pd(q_acc_4_1,src_dof_1)),_mm256_mul_pd(q_acc_4_2,src_dof_2)),_mm256_mul_pd(q_acc_4_3,src_dof_3)),_mm256_mul_pd(q_acc_4_4,src_dof_4)),_mm256_mul_pd(q_acc_4_5,src_dof_5)); + const __m256d elMatVec_5 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_5_0,src_dof_0),_mm256_mul_pd(q_acc_5_1,src_dof_1)),_mm256_mul_pd(q_acc_5_2,src_dof_2)),_mm256_mul_pd(q_acc_5_3,src_dof_3)),_mm256_mul_pd(q_acc_5_4,src_dof_4)),_mm256_mul_pd(q_acc_5_5,src_dof_5)); + _mm256_storeu_pd(&_data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))],_mm256_add_pd(elMatVec_0,_mm256_loadu_pd(& _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]))); + _mm256_storeu_pd(&_data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1],_mm256_add_pd(elMatVec_1,_mm256_loadu_pd(& _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]))); + _mm256_storeu_pd(&_data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))],_mm256_add_pd(elMatVec_2,_mm256_loadu_pd(& _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]))); + _mm256_storeu_pd(&_data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))],_mm256_add_pd(elMatVec_3,_mm256_loadu_pd(& _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]))); + _mm256_storeu_pd(&_data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))],_mm256_add_pd(elMatVec_4,_mm256_loadu_pd(& _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]))); + _mm256_storeu_pd(&_data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))],_mm256_add_pd(elMatVec_5,_mm256_loadu_pd(& _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]))); + } + for (int64_t ctr_0 = (int64_t)((-ctr_1 + micro_edges_per_macro_edge) / (4)) * (4); ctr_0 < -ctr_1 + micro_edges_per_macro_edge; ctr_0 += 1) + { + + const int64_t phantom_ctr_0 = ctr_0; + real_t _data_float_loop_ctr_array_dim_0[4]; + _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0; + _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1; + _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2; + _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3; + real_t _data_float_loop_ctr_array_dim_1[4]; + _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1; + + const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; + const real_t src_dof_0 = _data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]; + const real_t src_dof_1 = _data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + const real_t src_dof_2 = _data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t src_dof_3 = _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t src_dof_4 = _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t src_dof_5 = _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]; + const real_t cp_dof_0 = _data_cpVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]; + const real_t cp_dof_1 = _data_cpVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + const real_t cp_dof_2 = _data_cpVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t cp_dof_3 = _data_cpEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t cp_dof_4 = _data_cpEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t cp_dof_5 = _data_cpEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]; + const real_t ux_dof_0 = _data_uxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]; + const real_t ux_dof_1 = _data_uxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + const real_t ux_dof_2 = _data_uxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t ux_dof_3 = _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t ux_dof_4 = _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t ux_dof_5 = _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]; + const real_t uy_dof_0 = _data_uyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]; + const real_t uy_dof_1 = _data_uyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + const real_t uy_dof_2 = _data_uyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t uy_dof_3 = _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t uy_dof_4 = _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t uy_dof_5 = _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]; + real_t q_acc_0_0 = 0.0; + real_t q_acc_0_1 = 0.0; + real_t q_acc_0_2 = 0.0; + real_t q_acc_0_3 = 0.0; + real_t q_acc_0_4 = 0.0; + real_t q_acc_0_5 = 0.0; + real_t q_acc_1_0 = 0.0; + real_t q_acc_1_1 = 0.0; + real_t q_acc_1_2 = 0.0; + real_t q_acc_1_3 = 0.0; + real_t q_acc_1_4 = 0.0; + real_t q_acc_1_5 = 0.0; + real_t q_acc_2_0 = 0.0; + real_t q_acc_2_1 = 0.0; + real_t q_acc_2_2 = 0.0; + real_t q_acc_2_3 = 0.0; + real_t q_acc_2_4 = 0.0; + real_t q_acc_2_5 = 0.0; + real_t q_acc_3_0 = 0.0; + real_t q_acc_3_1 = 0.0; + real_t q_acc_3_2 = 0.0; + real_t q_acc_3_3 = 0.0; + real_t q_acc_3_4 = 0.0; + real_t q_acc_3_5 = 0.0; + real_t q_acc_4_0 = 0.0; + real_t q_acc_4_1 = 0.0; + real_t q_acc_4_2 = 0.0; + real_t q_acc_4_3 = 0.0; + real_t q_acc_4_4 = 0.0; + real_t q_acc_4_5 = 0.0; + real_t q_acc_5_0 = 0.0; + real_t q_acc_5_1 = 0.0; + real_t q_acc_5_2 = 0.0; + real_t q_acc_5_3 = 0.0; + real_t q_acc_5_4 = 0.0; + real_t q_acc_5_5 = 0.0; + for (int64_t q = 0; q < 4; q += 1) + { + const real_t tmp_qloop_0 = 4.0*_data_q_p_0[q]; + const real_t tmp_qloop_1 = 4.0*_data_q_p_1[q]; + const real_t tmp_qloop_2 = tmp_qloop_0 + tmp_qloop_1 - 3.0; + const real_t tmp_qloop_3 = tmp_qloop_0*_data_q_p_1[q]; + const real_t tmp_qloop_4 = (_data_q_p_0[q]*_data_q_p_0[q]); + const real_t tmp_qloop_5 = tmp_qloop_4*2.0; + const real_t tmp_qloop_6 = tmp_qloop_5 - _data_q_p_0[q]; + const real_t tmp_qloop_7 = (_data_q_p_1[q]*_data_q_p_1[q]); + const real_t tmp_qloop_8 = tmp_qloop_7*2.0; + const real_t tmp_qloop_9 = tmp_qloop_8 - _data_q_p_1[q]; + const real_t tmp_qloop_10 = tmp_qloop_1 - tmp_qloop_3 + tmp_qloop_7*-4.0; + const real_t tmp_qloop_11 = tmp_qloop_0 - tmp_qloop_3 + tmp_qloop_4*-4.0; + const real_t tmp_qloop_12 = tmp_qloop_3 + tmp_qloop_5 + tmp_qloop_8 - 3.0*_data_q_p_0[q] - 3.0*_data_q_p_1[q] + 1.0; + const real_t tmp_qloop_13 = tmp_qloop_10*ux_dof_4 + tmp_qloop_11*ux_dof_5 + tmp_qloop_12*ux_dof_0 + tmp_qloop_3*ux_dof_3 + tmp_qloop_6*ux_dof_1 + tmp_qloop_9*ux_dof_2; + const real_t tmp_qloop_14 = tmp_qloop_10*uy_dof_4 + tmp_qloop_11*uy_dof_5 + tmp_qloop_12*uy_dof_0 + tmp_qloop_3*uy_dof_3 + tmp_qloop_6*uy_dof_1 + tmp_qloop_9*uy_dof_2; + const real_t tmp_qloop_15 = tmp_qloop_13*(jac_affine_inv_0_0_GRAY*tmp_qloop_2 + jac_affine_inv_1_0_GRAY*tmp_qloop_2) + tmp_qloop_14*(jac_affine_inv_0_1_GRAY*tmp_qloop_2 + jac_affine_inv_1_1_GRAY*tmp_qloop_2); + const real_t tmp_qloop_16 = abs_det_jac_affine_GRAY*(cp_dof_0*tmp_qloop_12 + cp_dof_1*tmp_qloop_6 + cp_dof_2*tmp_qloop_9 + cp_dof_3*tmp_qloop_3 + cp_dof_4*tmp_qloop_10 + cp_dof_5*tmp_qloop_11)*_data_q_w[q]; + const real_t tmp_qloop_17 = tmp_qloop_12*tmp_qloop_16; + const real_t tmp_qloop_18 = tmp_qloop_0 - 1.0; + const real_t tmp_qloop_19 = jac_affine_inv_0_0_GRAY*tmp_qloop_13*tmp_qloop_18 + jac_affine_inv_0_1_GRAY*tmp_qloop_14*tmp_qloop_18; + const real_t tmp_qloop_20 = tmp_qloop_1 - 1.0; + const real_t tmp_qloop_21 = jac_affine_inv_1_0_GRAY*tmp_qloop_13*tmp_qloop_20 + jac_affine_inv_1_1_GRAY*tmp_qloop_14*tmp_qloop_20; + const real_t tmp_qloop_22 = jac_affine_inv_1_0_GRAY*tmp_qloop_0; + const real_t tmp_qloop_23 = jac_affine_inv_0_0_GRAY*tmp_qloop_1; + const real_t tmp_qloop_24 = jac_affine_inv_1_1_GRAY*tmp_qloop_0; + const real_t tmp_qloop_25 = jac_affine_inv_0_1_GRAY*tmp_qloop_1; + const real_t tmp_qloop_26 = tmp_qloop_13*(tmp_qloop_22 + tmp_qloop_23) + tmp_qloop_14*(tmp_qloop_24 + tmp_qloop_25); + const real_t tmp_qloop_27 = -tmp_qloop_0 - 8.0*_data_q_p_1[q] + 4.0; + const real_t tmp_qloop_28 = tmp_qloop_13*(jac_affine_inv_1_0_GRAY*tmp_qloop_27 - tmp_qloop_23) + tmp_qloop_14*(jac_affine_inv_1_1_GRAY*tmp_qloop_27 - tmp_qloop_25); + const real_t tmp_qloop_29 = -tmp_qloop_1 - 8.0*_data_q_p_0[q] + 4.0; + const real_t tmp_qloop_30 = tmp_qloop_13*(jac_affine_inv_0_0_GRAY*tmp_qloop_29 - tmp_qloop_22) + tmp_qloop_14*(jac_affine_inv_0_1_GRAY*tmp_qloop_29 - tmp_qloop_24); + const real_t tmp_qloop_31 = tmp_qloop_16*tmp_qloop_6; + const real_t tmp_qloop_32 = tmp_qloop_16*tmp_qloop_9; + const real_t tmp_qloop_33 = tmp_qloop_16*tmp_qloop_3; + const real_t tmp_qloop_34 = tmp_qloop_10*tmp_qloop_16; + const real_t tmp_qloop_35 = tmp_qloop_11*tmp_qloop_16; + const real_t q_tmp_0_0 = tmp_qloop_15*tmp_qloop_17; + const real_t q_tmp_0_1 = tmp_qloop_17*tmp_qloop_19; + const real_t q_tmp_0_2 = tmp_qloop_17*tmp_qloop_21; + const real_t q_tmp_0_3 = tmp_qloop_17*tmp_qloop_26; + const real_t q_tmp_0_4 = tmp_qloop_17*tmp_qloop_28; + const real_t q_tmp_0_5 = tmp_qloop_17*tmp_qloop_30; + const real_t q_tmp_1_0 = tmp_qloop_15*tmp_qloop_31; + const real_t q_tmp_1_1 = tmp_qloop_19*tmp_qloop_31; + const real_t q_tmp_1_2 = tmp_qloop_21*tmp_qloop_31; + const real_t q_tmp_1_3 = tmp_qloop_26*tmp_qloop_31; + const real_t q_tmp_1_4 = tmp_qloop_28*tmp_qloop_31; + const real_t q_tmp_1_5 = tmp_qloop_30*tmp_qloop_31; + const real_t q_tmp_2_0 = tmp_qloop_15*tmp_qloop_32; + const real_t q_tmp_2_1 = tmp_qloop_19*tmp_qloop_32; + const real_t q_tmp_2_2 = tmp_qloop_21*tmp_qloop_32; + const real_t q_tmp_2_3 = tmp_qloop_26*tmp_qloop_32; + const real_t q_tmp_2_4 = tmp_qloop_28*tmp_qloop_32; + const real_t q_tmp_2_5 = tmp_qloop_30*tmp_qloop_32; + const real_t q_tmp_3_0 = tmp_qloop_15*tmp_qloop_33; + const real_t q_tmp_3_1 = tmp_qloop_19*tmp_qloop_33; + const real_t q_tmp_3_2 = tmp_qloop_21*tmp_qloop_33; + const real_t q_tmp_3_3 = tmp_qloop_26*tmp_qloop_33; + const real_t q_tmp_3_4 = tmp_qloop_28*tmp_qloop_33; + const real_t q_tmp_3_5 = tmp_qloop_30*tmp_qloop_33; + const real_t q_tmp_4_0 = tmp_qloop_15*tmp_qloop_34; + const real_t q_tmp_4_1 = tmp_qloop_19*tmp_qloop_34; + const real_t q_tmp_4_2 = tmp_qloop_21*tmp_qloop_34; + const real_t q_tmp_4_3 = tmp_qloop_26*tmp_qloop_34; + const real_t q_tmp_4_4 = tmp_qloop_28*tmp_qloop_34; + const real_t q_tmp_4_5 = tmp_qloop_30*tmp_qloop_34; + const real_t q_tmp_5_0 = tmp_qloop_15*tmp_qloop_35; + const real_t q_tmp_5_1 = tmp_qloop_19*tmp_qloop_35; + const real_t q_tmp_5_2 = tmp_qloop_21*tmp_qloop_35; + const real_t q_tmp_5_3 = tmp_qloop_26*tmp_qloop_35; + const real_t q_tmp_5_4 = tmp_qloop_28*tmp_qloop_35; + const real_t q_tmp_5_5 = tmp_qloop_30*tmp_qloop_35; + q_acc_0_0 = q_acc_0_0 + q_tmp_0_0; + q_acc_0_1 = q_acc_0_1 + q_tmp_0_1; + q_acc_0_2 = q_acc_0_2 + q_tmp_0_2; + q_acc_0_3 = q_acc_0_3 + q_tmp_0_3; + q_acc_0_4 = q_acc_0_4 + q_tmp_0_4; + q_acc_0_5 = q_acc_0_5 + q_tmp_0_5; + q_acc_1_0 = q_acc_1_0 + q_tmp_1_0; + q_acc_1_1 = q_acc_1_1 + q_tmp_1_1; + q_acc_1_2 = q_acc_1_2 + q_tmp_1_2; + q_acc_1_3 = q_acc_1_3 + q_tmp_1_3; + q_acc_1_4 = q_acc_1_4 + q_tmp_1_4; + q_acc_1_5 = q_acc_1_5 + q_tmp_1_5; + q_acc_2_0 = q_acc_2_0 + q_tmp_2_0; + q_acc_2_1 = q_acc_2_1 + q_tmp_2_1; + q_acc_2_2 = q_acc_2_2 + q_tmp_2_2; + q_acc_2_3 = q_acc_2_3 + q_tmp_2_3; + q_acc_2_4 = q_acc_2_4 + q_tmp_2_4; + q_acc_2_5 = q_acc_2_5 + q_tmp_2_5; + q_acc_3_0 = q_acc_3_0 + q_tmp_3_0; + q_acc_3_1 = q_acc_3_1 + q_tmp_3_1; + q_acc_3_2 = q_acc_3_2 + q_tmp_3_2; + q_acc_3_3 = q_acc_3_3 + q_tmp_3_3; + q_acc_3_4 = q_acc_3_4 + q_tmp_3_4; + q_acc_3_5 = q_acc_3_5 + q_tmp_3_5; + q_acc_4_0 = q_acc_4_0 + q_tmp_4_0; + q_acc_4_1 = q_acc_4_1 + q_tmp_4_1; + q_acc_4_2 = q_acc_4_2 + q_tmp_4_2; + q_acc_4_3 = q_acc_4_3 + q_tmp_4_3; + q_acc_4_4 = q_acc_4_4 + q_tmp_4_4; + q_acc_4_5 = q_acc_4_5 + q_tmp_4_5; + q_acc_5_0 = q_acc_5_0 + q_tmp_5_0; + q_acc_5_1 = q_acc_5_1 + q_tmp_5_1; + q_acc_5_2 = q_acc_5_2 + q_tmp_5_2; + q_acc_5_3 = q_acc_5_3 + q_tmp_5_3; + q_acc_5_4 = q_acc_5_4 + q_tmp_5_4; + q_acc_5_5 = q_acc_5_5 + q_tmp_5_5; + } + const real_t elMatVec_0 = q_acc_0_0*src_dof_0 + q_acc_0_1*src_dof_1 + q_acc_0_2*src_dof_2 + q_acc_0_3*src_dof_3 + q_acc_0_4*src_dof_4 + q_acc_0_5*src_dof_5; + const real_t elMatVec_1 = q_acc_1_0*src_dof_0 + q_acc_1_1*src_dof_1 + q_acc_1_2*src_dof_2 + q_acc_1_3*src_dof_3 + q_acc_1_4*src_dof_4 + q_acc_1_5*src_dof_5; + const real_t elMatVec_2 = q_acc_2_0*src_dof_0 + q_acc_2_1*src_dof_1 + q_acc_2_2*src_dof_2 + q_acc_2_3*src_dof_3 + q_acc_2_4*src_dof_4 + q_acc_2_5*src_dof_5; + const real_t elMatVec_3 = q_acc_3_0*src_dof_0 + q_acc_3_1*src_dof_1 + q_acc_3_2*src_dof_2 + q_acc_3_3*src_dof_3 + q_acc_3_4*src_dof_4 + q_acc_3_5*src_dof_5; + const real_t elMatVec_4 = q_acc_4_0*src_dof_0 + q_acc_4_1*src_dof_1 + q_acc_4_2*src_dof_2 + q_acc_4_3*src_dof_3 + q_acc_4_4*src_dof_4 + q_acc_4_5*src_dof_5; + const real_t elMatVec_5 = q_acc_5_0*src_dof_0 + q_acc_5_1*src_dof_1 + q_acc_5_2*src_dof_2 + q_acc_5_3*src_dof_3 + q_acc_5_4*src_dof_4 + q_acc_5_5*src_dof_5; + _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))] = elMatVec_0 + _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]; + _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1] = elMatVec_1 + _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatVec_2 + _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))] = elMatVec_3 + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))] = elMatVec_4 + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))] = elMatVec_5 + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]; + } + } + } + const real_t tmp_coords_jac_0_BLUE = 1.0 / (micro_edges_per_macro_edge_float)*1.0; + const real_t tmp_coords_jac_1_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0); + const real_t tmp_coords_jac_2_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1); + const real_t tmp_coords_jac_3_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0); + const real_t tmp_coords_jac_4_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1); + const real_t p_affine_const_0_0_BLUE = tmp_coords_jac_1_BLUE; + const real_t p_affine_const_0_1_BLUE = tmp_coords_jac_2_BLUE; + const real_t p_affine_const_1_0_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_3_BLUE; + const real_t p_affine_const_1_1_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_4_BLUE; + const real_t p_affine_const_2_0_BLUE = tmp_coords_jac_1_BLUE + tmp_coords_jac_3_BLUE; + const real_t p_affine_const_2_1_BLUE = tmp_coords_jac_2_BLUE + tmp_coords_jac_4_BLUE; + const real_t jac_affine_0_0_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_1_0_BLUE; + const real_t jac_affine_0_1_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_2_0_BLUE; + const real_t jac_affine_1_0_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_1_1_BLUE; + const real_t jac_affine_1_1_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_2_1_BLUE; + const real_t tmp_coords_jac_5_BLUE = jac_affine_0_0_BLUE*jac_affine_1_1_BLUE - jac_affine_0_1_BLUE*jac_affine_1_0_BLUE; + const real_t tmp_coords_jac_6_BLUE = 1.0 / (tmp_coords_jac_5_BLUE); + const real_t jac_affine_inv_0_0_BLUE = jac_affine_1_1_BLUE*tmp_coords_jac_6_BLUE; + const real_t jac_affine_inv_0_1_BLUE = -jac_affine_0_1_BLUE*tmp_coords_jac_6_BLUE; + const real_t jac_affine_inv_1_0_BLUE = -jac_affine_1_0_BLUE*tmp_coords_jac_6_BLUE; + const real_t jac_affine_inv_1_1_BLUE = jac_affine_0_0_BLUE*tmp_coords_jac_6_BLUE; + const real_t abs_det_jac_affine_BLUE = abs(tmp_coords_jac_5_BLUE); + { + /* FaceType.BLUE */ + for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1) + { + for (int64_t ctr_0 = 0; ctr_0 < (int64_t)((-ctr_1 + micro_edges_per_macro_edge - 1) / (4)) * (4); ctr_0 += 4) + { + + const int64_t phantom_ctr_0 = ctr_0; + real_t _data_float_loop_ctr_array_dim_0[4]; + _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0; + _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1; + _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2; + _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3; + real_t _data_float_loop_ctr_array_dim_1[4]; + _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1; + + const __m256d p_affine_0_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)); + const __m256d p_affine_0_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)); + const __m256d p_affine_1_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)); + const __m256d p_affine_1_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)); + const __m256d p_affine_2_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)); + const __m256d p_affine_2_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)); + const __m256d src_dof_0 = _mm256_loadu_pd(& _data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]); + const __m256d src_dof_1 = _mm256_loadu_pd(& _data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]); + const __m256d src_dof_2 = _mm256_loadu_pd(& _data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]); + const __m256d src_dof_3 = _mm256_loadu_pd(& _data_srcEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]); + const __m256d src_dof_4 = _mm256_loadu_pd(& _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]); + const __m256d src_dof_5 = _mm256_loadu_pd(& _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]); + const __m256d cp_dof_0 = _mm256_loadu_pd(& _data_cpVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]); + const __m256d cp_dof_1 = _mm256_loadu_pd(& _data_cpVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]); + const __m256d cp_dof_2 = _mm256_loadu_pd(& _data_cpVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]); + const __m256d cp_dof_3 = _mm256_loadu_pd(& _data_cpEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]); + const __m256d cp_dof_4 = _mm256_loadu_pd(& _data_cpEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]); + const __m256d cp_dof_5 = _mm256_loadu_pd(& _data_cpEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]); + const __m256d ux_dof_0 = _mm256_loadu_pd(& _data_uxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]); + const __m256d ux_dof_1 = _mm256_loadu_pd(& _data_uxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]); + const __m256d ux_dof_2 = _mm256_loadu_pd(& _data_uxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]); + const __m256d ux_dof_3 = _mm256_loadu_pd(& _data_uxEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]); + const __m256d ux_dof_4 = _mm256_loadu_pd(& _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]); + const __m256d ux_dof_5 = _mm256_loadu_pd(& _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]); + const __m256d uy_dof_0 = _mm256_loadu_pd(& _data_uyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]); + const __m256d uy_dof_1 = _mm256_loadu_pd(& _data_uyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]); + const __m256d uy_dof_2 = _mm256_loadu_pd(& _data_uyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]); + const __m256d uy_dof_3 = _mm256_loadu_pd(& _data_uyEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]); + const __m256d uy_dof_4 = _mm256_loadu_pd(& _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]); + const __m256d uy_dof_5 = _mm256_loadu_pd(& _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]); + __m256d q_acc_0_0 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_1 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_2 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_3 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_4 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_5 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_0 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_1 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_2 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_3 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_4 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_5 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_0 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_1 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_2 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_3 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_4 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_5 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_0 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_1 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_2 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_3 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_4 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_5 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_4_0 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_4_1 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_4_2 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_4_3 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_4_4 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_4_5 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_5_0 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_5_1 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_5_2 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_5_3 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_5_4 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_5_5 = _mm256_set_pd(0.0,0.0,0.0,0.0); + for (int64_t q = 0; q < 4; q += 1) + { + const __m256d tmp_qloop_0 = _mm256_mul_pd(_mm256_set_pd(4.0,4.0,4.0,4.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])); + const __m256d tmp_qloop_1 = _mm256_mul_pd(_mm256_set_pd(4.0,4.0,4.0,4.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])); + const __m256d tmp_qloop_2 = _mm256_add_pd(_mm256_add_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),tmp_qloop_0),tmp_qloop_1); + const __m256d tmp_qloop_3 = _mm256_mul_pd(tmp_qloop_0,_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])); + const __m256d tmp_qloop_4 = _mm256_mul_pd(_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])); + const __m256d tmp_qloop_5 = _mm256_mul_pd(tmp_qloop_4,_mm256_set_pd(2.0,2.0,2.0,2.0)); + const __m256d tmp_qloop_6 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),tmp_qloop_5); + const __m256d tmp_qloop_7 = _mm256_mul_pd(_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])); + const __m256d tmp_qloop_8 = _mm256_mul_pd(tmp_qloop_7,_mm256_set_pd(2.0,2.0,2.0,2.0)); + const __m256d tmp_qloop_9 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])),tmp_qloop_8); + const __m256d tmp_qloop_10 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_7,_mm256_set_pd(-4.0,-4.0,-4.0,-4.0)),_mm256_mul_pd(tmp_qloop_3,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_1); + const __m256d tmp_qloop_11 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_4,_mm256_set_pd(-4.0,-4.0,-4.0,-4.0)),_mm256_mul_pd(tmp_qloop_3,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_0); + const __m256d tmp_qloop_12 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),tmp_qloop_3),tmp_qloop_5),tmp_qloop_8); + const __m256d tmp_qloop_13 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_10,ux_dof_4),_mm256_mul_pd(tmp_qloop_11,ux_dof_5)),_mm256_mul_pd(tmp_qloop_12,ux_dof_0)),_mm256_mul_pd(tmp_qloop_3,ux_dof_3)),_mm256_mul_pd(tmp_qloop_6,ux_dof_1)),_mm256_mul_pd(tmp_qloop_9,ux_dof_2)); + const __m256d tmp_qloop_14 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_10,uy_dof_4),_mm256_mul_pd(tmp_qloop_11,uy_dof_5)),_mm256_mul_pd(tmp_qloop_12,uy_dof_0)),_mm256_mul_pd(tmp_qloop_3,uy_dof_3)),_mm256_mul_pd(tmp_qloop_6,uy_dof_1)),_mm256_mul_pd(tmp_qloop_9,uy_dof_2)); + const __m256d tmp_qloop_15 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_13,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_2,_mm256_set_pd(jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE)),_mm256_mul_pd(tmp_qloop_2,_mm256_set_pd(jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE)))),_mm256_mul_pd(tmp_qloop_14,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_2,_mm256_set_pd(jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE)),_mm256_mul_pd(tmp_qloop_2,_mm256_set_pd(jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE))))); + const __m256d tmp_qloop_16 = _mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(cp_dof_0,tmp_qloop_12),_mm256_mul_pd(cp_dof_1,tmp_qloop_6)),_mm256_mul_pd(cp_dof_2,tmp_qloop_9)),_mm256_mul_pd(cp_dof_3,tmp_qloop_3)),_mm256_mul_pd(cp_dof_4,tmp_qloop_10)),_mm256_mul_pd(cp_dof_5,tmp_qloop_11)),_mm256_set_pd(_data_q_w[q],_data_q_w[q],_data_q_w[q],_data_q_w[q])),_mm256_set_pd(abs_det_jac_affine_BLUE,abs_det_jac_affine_BLUE,abs_det_jac_affine_BLUE,abs_det_jac_affine_BLUE)); + const __m256d tmp_qloop_17 = _mm256_mul_pd(tmp_qloop_12,tmp_qloop_16); + const __m256d tmp_qloop_18 = _mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),tmp_qloop_0); + const __m256d tmp_qloop_19 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_13,tmp_qloop_18),_mm256_set_pd(jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_14,tmp_qloop_18),_mm256_set_pd(jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE))); + const __m256d tmp_qloop_20 = _mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),tmp_qloop_1); + const __m256d tmp_qloop_21 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_13,tmp_qloop_20),_mm256_set_pd(jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_14,tmp_qloop_20),_mm256_set_pd(jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE))); + const __m256d tmp_qloop_22 = _mm256_mul_pd(tmp_qloop_0,_mm256_set_pd(jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE)); + const __m256d tmp_qloop_23 = _mm256_mul_pd(tmp_qloop_1,_mm256_set_pd(jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE)); + const __m256d tmp_qloop_24 = _mm256_mul_pd(tmp_qloop_0,_mm256_set_pd(jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE)); + const __m256d tmp_qloop_25 = _mm256_mul_pd(tmp_qloop_1,_mm256_set_pd(jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE)); + const __m256d tmp_qloop_26 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_13,_mm256_add_pd(tmp_qloop_22,tmp_qloop_23)),_mm256_mul_pd(tmp_qloop_14,_mm256_add_pd(tmp_qloop_24,tmp_qloop_25))); + const __m256d tmp_qloop_27 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-8.0,-8.0,-8.0,-8.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])),_mm256_mul_pd(tmp_qloop_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_set_pd(4.0,4.0,4.0,4.0)); + const __m256d tmp_qloop_28 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_13,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_23,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_27,_mm256_set_pd(jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE)))),_mm256_mul_pd(tmp_qloop_14,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_25,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_27,_mm256_set_pd(jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE))))); + const __m256d tmp_qloop_29 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-8.0,-8.0,-8.0,-8.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(tmp_qloop_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_set_pd(4.0,4.0,4.0,4.0)); + const __m256d tmp_qloop_30 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_13,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_22,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_29,_mm256_set_pd(jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE)))),_mm256_mul_pd(tmp_qloop_14,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_24,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_29,_mm256_set_pd(jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE))))); + const __m256d tmp_qloop_31 = _mm256_mul_pd(tmp_qloop_16,tmp_qloop_6); + const __m256d tmp_qloop_32 = _mm256_mul_pd(tmp_qloop_16,tmp_qloop_9); + const __m256d tmp_qloop_33 = _mm256_mul_pd(tmp_qloop_16,tmp_qloop_3); + const __m256d tmp_qloop_34 = _mm256_mul_pd(tmp_qloop_10,tmp_qloop_16); + const __m256d tmp_qloop_35 = _mm256_mul_pd(tmp_qloop_11,tmp_qloop_16); + const __m256d q_tmp_0_0 = _mm256_mul_pd(tmp_qloop_15,tmp_qloop_17); + const __m256d q_tmp_0_1 = _mm256_mul_pd(tmp_qloop_17,tmp_qloop_19); + const __m256d q_tmp_0_2 = _mm256_mul_pd(tmp_qloop_17,tmp_qloop_21); + const __m256d q_tmp_0_3 = _mm256_mul_pd(tmp_qloop_17,tmp_qloop_26); + const __m256d q_tmp_0_4 = _mm256_mul_pd(tmp_qloop_17,tmp_qloop_28); + const __m256d q_tmp_0_5 = _mm256_mul_pd(tmp_qloop_17,tmp_qloop_30); + const __m256d q_tmp_1_0 = _mm256_mul_pd(tmp_qloop_15,tmp_qloop_31); + const __m256d q_tmp_1_1 = _mm256_mul_pd(tmp_qloop_19,tmp_qloop_31); + const __m256d q_tmp_1_2 = _mm256_mul_pd(tmp_qloop_21,tmp_qloop_31); + const __m256d q_tmp_1_3 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_31); + const __m256d q_tmp_1_4 = _mm256_mul_pd(tmp_qloop_28,tmp_qloop_31); + const __m256d q_tmp_1_5 = _mm256_mul_pd(tmp_qloop_30,tmp_qloop_31); + const __m256d q_tmp_2_0 = _mm256_mul_pd(tmp_qloop_15,tmp_qloop_32); + const __m256d q_tmp_2_1 = _mm256_mul_pd(tmp_qloop_19,tmp_qloop_32); + const __m256d q_tmp_2_2 = _mm256_mul_pd(tmp_qloop_21,tmp_qloop_32); + const __m256d q_tmp_2_3 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_32); + const __m256d q_tmp_2_4 = _mm256_mul_pd(tmp_qloop_28,tmp_qloop_32); + const __m256d q_tmp_2_5 = _mm256_mul_pd(tmp_qloop_30,tmp_qloop_32); + const __m256d q_tmp_3_0 = _mm256_mul_pd(tmp_qloop_15,tmp_qloop_33); + const __m256d q_tmp_3_1 = _mm256_mul_pd(tmp_qloop_19,tmp_qloop_33); + const __m256d q_tmp_3_2 = _mm256_mul_pd(tmp_qloop_21,tmp_qloop_33); + const __m256d q_tmp_3_3 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_33); + const __m256d q_tmp_3_4 = _mm256_mul_pd(tmp_qloop_28,tmp_qloop_33); + const __m256d q_tmp_3_5 = _mm256_mul_pd(tmp_qloop_30,tmp_qloop_33); + const __m256d q_tmp_4_0 = _mm256_mul_pd(tmp_qloop_15,tmp_qloop_34); + const __m256d q_tmp_4_1 = _mm256_mul_pd(tmp_qloop_19,tmp_qloop_34); + const __m256d q_tmp_4_2 = _mm256_mul_pd(tmp_qloop_21,tmp_qloop_34); + const __m256d q_tmp_4_3 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_34); + const __m256d q_tmp_4_4 = _mm256_mul_pd(tmp_qloop_28,tmp_qloop_34); + const __m256d q_tmp_4_5 = _mm256_mul_pd(tmp_qloop_30,tmp_qloop_34); + const __m256d q_tmp_5_0 = _mm256_mul_pd(tmp_qloop_15,tmp_qloop_35); + const __m256d q_tmp_5_1 = _mm256_mul_pd(tmp_qloop_19,tmp_qloop_35); + const __m256d q_tmp_5_2 = _mm256_mul_pd(tmp_qloop_21,tmp_qloop_35); + const __m256d q_tmp_5_3 = _mm256_mul_pd(tmp_qloop_26,tmp_qloop_35); + const __m256d q_tmp_5_4 = _mm256_mul_pd(tmp_qloop_28,tmp_qloop_35); + const __m256d q_tmp_5_5 = _mm256_mul_pd(tmp_qloop_30,tmp_qloop_35); + q_acc_0_0 = _mm256_add_pd(q_acc_0_0,q_tmp_0_0); + q_acc_0_1 = _mm256_add_pd(q_acc_0_1,q_tmp_0_1); + q_acc_0_2 = _mm256_add_pd(q_acc_0_2,q_tmp_0_2); + q_acc_0_3 = _mm256_add_pd(q_acc_0_3,q_tmp_0_3); + q_acc_0_4 = _mm256_add_pd(q_acc_0_4,q_tmp_0_4); + q_acc_0_5 = _mm256_add_pd(q_acc_0_5,q_tmp_0_5); + q_acc_1_0 = _mm256_add_pd(q_acc_1_0,q_tmp_1_0); + q_acc_1_1 = _mm256_add_pd(q_acc_1_1,q_tmp_1_1); + q_acc_1_2 = _mm256_add_pd(q_acc_1_2,q_tmp_1_2); + q_acc_1_3 = _mm256_add_pd(q_acc_1_3,q_tmp_1_3); + q_acc_1_4 = _mm256_add_pd(q_acc_1_4,q_tmp_1_4); + q_acc_1_5 = _mm256_add_pd(q_acc_1_5,q_tmp_1_5); + q_acc_2_0 = _mm256_add_pd(q_acc_2_0,q_tmp_2_0); + q_acc_2_1 = _mm256_add_pd(q_acc_2_1,q_tmp_2_1); + q_acc_2_2 = _mm256_add_pd(q_acc_2_2,q_tmp_2_2); + q_acc_2_3 = _mm256_add_pd(q_acc_2_3,q_tmp_2_3); + q_acc_2_4 = _mm256_add_pd(q_acc_2_4,q_tmp_2_4); + q_acc_2_5 = _mm256_add_pd(q_acc_2_5,q_tmp_2_5); + q_acc_3_0 = _mm256_add_pd(q_acc_3_0,q_tmp_3_0); + q_acc_3_1 = _mm256_add_pd(q_acc_3_1,q_tmp_3_1); + q_acc_3_2 = _mm256_add_pd(q_acc_3_2,q_tmp_3_2); + q_acc_3_3 = _mm256_add_pd(q_acc_3_3,q_tmp_3_3); + q_acc_3_4 = _mm256_add_pd(q_acc_3_4,q_tmp_3_4); + q_acc_3_5 = _mm256_add_pd(q_acc_3_5,q_tmp_3_5); + q_acc_4_0 = _mm256_add_pd(q_acc_4_0,q_tmp_4_0); + q_acc_4_1 = _mm256_add_pd(q_acc_4_1,q_tmp_4_1); + q_acc_4_2 = _mm256_add_pd(q_acc_4_2,q_tmp_4_2); + q_acc_4_3 = _mm256_add_pd(q_acc_4_3,q_tmp_4_3); + q_acc_4_4 = _mm256_add_pd(q_acc_4_4,q_tmp_4_4); + q_acc_4_5 = _mm256_add_pd(q_acc_4_5,q_tmp_4_5); + q_acc_5_0 = _mm256_add_pd(q_acc_5_0,q_tmp_5_0); + q_acc_5_1 = _mm256_add_pd(q_acc_5_1,q_tmp_5_1); + q_acc_5_2 = _mm256_add_pd(q_acc_5_2,q_tmp_5_2); + q_acc_5_3 = _mm256_add_pd(q_acc_5_3,q_tmp_5_3); + q_acc_5_4 = _mm256_add_pd(q_acc_5_4,q_tmp_5_4); + q_acc_5_5 = _mm256_add_pd(q_acc_5_5,q_tmp_5_5); + } + const __m256d elMatVec_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_0_0,src_dof_0),_mm256_mul_pd(q_acc_0_1,src_dof_1)),_mm256_mul_pd(q_acc_0_2,src_dof_2)),_mm256_mul_pd(q_acc_0_3,src_dof_3)),_mm256_mul_pd(q_acc_0_4,src_dof_4)),_mm256_mul_pd(q_acc_0_5,src_dof_5)); + const __m256d elMatVec_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_1_0,src_dof_0),_mm256_mul_pd(q_acc_1_1,src_dof_1)),_mm256_mul_pd(q_acc_1_2,src_dof_2)),_mm256_mul_pd(q_acc_1_3,src_dof_3)),_mm256_mul_pd(q_acc_1_4,src_dof_4)),_mm256_mul_pd(q_acc_1_5,src_dof_5)); + const __m256d elMatVec_2 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_2_0,src_dof_0),_mm256_mul_pd(q_acc_2_1,src_dof_1)),_mm256_mul_pd(q_acc_2_2,src_dof_2)),_mm256_mul_pd(q_acc_2_3,src_dof_3)),_mm256_mul_pd(q_acc_2_4,src_dof_4)),_mm256_mul_pd(q_acc_2_5,src_dof_5)); + const __m256d elMatVec_3 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_3_0,src_dof_0),_mm256_mul_pd(q_acc_3_1,src_dof_1)),_mm256_mul_pd(q_acc_3_2,src_dof_2)),_mm256_mul_pd(q_acc_3_3,src_dof_3)),_mm256_mul_pd(q_acc_3_4,src_dof_4)),_mm256_mul_pd(q_acc_3_5,src_dof_5)); + const __m256d elMatVec_4 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_4_0,src_dof_0),_mm256_mul_pd(q_acc_4_1,src_dof_1)),_mm256_mul_pd(q_acc_4_2,src_dof_2)),_mm256_mul_pd(q_acc_4_3,src_dof_3)),_mm256_mul_pd(q_acc_4_4,src_dof_4)),_mm256_mul_pd(q_acc_4_5,src_dof_5)); + const __m256d elMatVec_5 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_5_0,src_dof_0),_mm256_mul_pd(q_acc_5_1,src_dof_1)),_mm256_mul_pd(q_acc_5_2,src_dof_2)),_mm256_mul_pd(q_acc_5_3,src_dof_3)),_mm256_mul_pd(q_acc_5_4,src_dof_4)),_mm256_mul_pd(q_acc_5_5,src_dof_5)); + _mm256_storeu_pd(&_data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1],_mm256_add_pd(elMatVec_0,_mm256_loadu_pd(& _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]))); + _mm256_storeu_pd(&_data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))],_mm256_add_pd(elMatVec_1,_mm256_loadu_pd(& _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]))); + _mm256_storeu_pd(&_data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1],_mm256_add_pd(elMatVec_2,_mm256_loadu_pd(& _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]))); + _mm256_storeu_pd(&_data_dstEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))],_mm256_add_pd(elMatVec_3,_mm256_loadu_pd(& _data_dstEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]))); + _mm256_storeu_pd(&_data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1],_mm256_add_pd(elMatVec_4,_mm256_loadu_pd(& _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]))); + _mm256_storeu_pd(&_data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))],_mm256_add_pd(elMatVec_5,_mm256_loadu_pd(& _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]))); + } + for (int64_t ctr_0 = (int64_t)((-ctr_1 + micro_edges_per_macro_edge - 1) / (4)) * (4); ctr_0 < -ctr_1 + micro_edges_per_macro_edge - 1; ctr_0 += 1) + { + + const int64_t phantom_ctr_0 = ctr_0; + real_t _data_float_loop_ctr_array_dim_0[4]; + _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0; + _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1; + _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2; + _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3; + real_t _data_float_loop_ctr_array_dim_1[4]; + _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1; + + const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; + const real_t src_dof_0 = _data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + const real_t src_dof_1 = _data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t src_dof_2 = _data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]; + const real_t src_dof_3 = _data_srcEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t src_dof_4 = _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]; + const real_t src_dof_5 = _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t cp_dof_0 = _data_cpVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + const real_t cp_dof_1 = _data_cpVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t cp_dof_2 = _data_cpVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]; + const real_t cp_dof_3 = _data_cpEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t cp_dof_4 = _data_cpEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]; + const real_t cp_dof_5 = _data_cpEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t ux_dof_0 = _data_uxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + const real_t ux_dof_1 = _data_uxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t ux_dof_2 = _data_uxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]; + const real_t ux_dof_3 = _data_uxEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t ux_dof_4 = _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]; + const real_t ux_dof_5 = _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t uy_dof_0 = _data_uyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + const real_t uy_dof_1 = _data_uyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t uy_dof_2 = _data_uyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]; + const real_t uy_dof_3 = _data_uyEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t uy_dof_4 = _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]; + const real_t uy_dof_5 = _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + real_t q_acc_0_0 = 0.0; + real_t q_acc_0_1 = 0.0; + real_t q_acc_0_2 = 0.0; + real_t q_acc_0_3 = 0.0; + real_t q_acc_0_4 = 0.0; + real_t q_acc_0_5 = 0.0; + real_t q_acc_1_0 = 0.0; + real_t q_acc_1_1 = 0.0; + real_t q_acc_1_2 = 0.0; + real_t q_acc_1_3 = 0.0; + real_t q_acc_1_4 = 0.0; + real_t q_acc_1_5 = 0.0; + real_t q_acc_2_0 = 0.0; + real_t q_acc_2_1 = 0.0; + real_t q_acc_2_2 = 0.0; + real_t q_acc_2_3 = 0.0; + real_t q_acc_2_4 = 0.0; + real_t q_acc_2_5 = 0.0; + real_t q_acc_3_0 = 0.0; + real_t q_acc_3_1 = 0.0; + real_t q_acc_3_2 = 0.0; + real_t q_acc_3_3 = 0.0; + real_t q_acc_3_4 = 0.0; + real_t q_acc_3_5 = 0.0; + real_t q_acc_4_0 = 0.0; + real_t q_acc_4_1 = 0.0; + real_t q_acc_4_2 = 0.0; + real_t q_acc_4_3 = 0.0; + real_t q_acc_4_4 = 0.0; + real_t q_acc_4_5 = 0.0; + real_t q_acc_5_0 = 0.0; + real_t q_acc_5_1 = 0.0; + real_t q_acc_5_2 = 0.0; + real_t q_acc_5_3 = 0.0; + real_t q_acc_5_4 = 0.0; + real_t q_acc_5_5 = 0.0; + for (int64_t q = 0; q < 4; q += 1) + { + const real_t tmp_qloop_0 = 4.0*_data_q_p_0[q]; + const real_t tmp_qloop_1 = 4.0*_data_q_p_1[q]; + const real_t tmp_qloop_2 = tmp_qloop_0 + tmp_qloop_1 - 3.0; + const real_t tmp_qloop_3 = tmp_qloop_0*_data_q_p_1[q]; + const real_t tmp_qloop_4 = (_data_q_p_0[q]*_data_q_p_0[q]); + const real_t tmp_qloop_5 = tmp_qloop_4*2.0; + const real_t tmp_qloop_6 = tmp_qloop_5 - _data_q_p_0[q]; + const real_t tmp_qloop_7 = (_data_q_p_1[q]*_data_q_p_1[q]); + const real_t tmp_qloop_8 = tmp_qloop_7*2.0; + const real_t tmp_qloop_9 = tmp_qloop_8 - _data_q_p_1[q]; + const real_t tmp_qloop_10 = tmp_qloop_1 - tmp_qloop_3 + tmp_qloop_7*-4.0; + const real_t tmp_qloop_11 = tmp_qloop_0 - tmp_qloop_3 + tmp_qloop_4*-4.0; + const real_t tmp_qloop_12 = tmp_qloop_3 + tmp_qloop_5 + tmp_qloop_8 - 3.0*_data_q_p_0[q] - 3.0*_data_q_p_1[q] + 1.0; + const real_t tmp_qloop_13 = tmp_qloop_10*ux_dof_4 + tmp_qloop_11*ux_dof_5 + tmp_qloop_12*ux_dof_0 + tmp_qloop_3*ux_dof_3 + tmp_qloop_6*ux_dof_1 + tmp_qloop_9*ux_dof_2; + const real_t tmp_qloop_14 = tmp_qloop_10*uy_dof_4 + tmp_qloop_11*uy_dof_5 + tmp_qloop_12*uy_dof_0 + tmp_qloop_3*uy_dof_3 + tmp_qloop_6*uy_dof_1 + tmp_qloop_9*uy_dof_2; + const real_t tmp_qloop_15 = tmp_qloop_13*(jac_affine_inv_0_0_BLUE*tmp_qloop_2 + jac_affine_inv_1_0_BLUE*tmp_qloop_2) + tmp_qloop_14*(jac_affine_inv_0_1_BLUE*tmp_qloop_2 + jac_affine_inv_1_1_BLUE*tmp_qloop_2); + const real_t tmp_qloop_16 = abs_det_jac_affine_BLUE*(cp_dof_0*tmp_qloop_12 + cp_dof_1*tmp_qloop_6 + cp_dof_2*tmp_qloop_9 + cp_dof_3*tmp_qloop_3 + cp_dof_4*tmp_qloop_10 + cp_dof_5*tmp_qloop_11)*_data_q_w[q]; + const real_t tmp_qloop_17 = tmp_qloop_12*tmp_qloop_16; + const real_t tmp_qloop_18 = tmp_qloop_0 - 1.0; + const real_t tmp_qloop_19 = jac_affine_inv_0_0_BLUE*tmp_qloop_13*tmp_qloop_18 + jac_affine_inv_0_1_BLUE*tmp_qloop_14*tmp_qloop_18; + const real_t tmp_qloop_20 = tmp_qloop_1 - 1.0; + const real_t tmp_qloop_21 = jac_affine_inv_1_0_BLUE*tmp_qloop_13*tmp_qloop_20 + jac_affine_inv_1_1_BLUE*tmp_qloop_14*tmp_qloop_20; + const real_t tmp_qloop_22 = jac_affine_inv_1_0_BLUE*tmp_qloop_0; + const real_t tmp_qloop_23 = jac_affine_inv_0_0_BLUE*tmp_qloop_1; + const real_t tmp_qloop_24 = jac_affine_inv_1_1_BLUE*tmp_qloop_0; + const real_t tmp_qloop_25 = jac_affine_inv_0_1_BLUE*tmp_qloop_1; + const real_t tmp_qloop_26 = tmp_qloop_13*(tmp_qloop_22 + tmp_qloop_23) + tmp_qloop_14*(tmp_qloop_24 + tmp_qloop_25); + const real_t tmp_qloop_27 = -tmp_qloop_0 - 8.0*_data_q_p_1[q] + 4.0; + const real_t tmp_qloop_28 = tmp_qloop_13*(jac_affine_inv_1_0_BLUE*tmp_qloop_27 - tmp_qloop_23) + tmp_qloop_14*(jac_affine_inv_1_1_BLUE*tmp_qloop_27 - tmp_qloop_25); + const real_t tmp_qloop_29 = -tmp_qloop_1 - 8.0*_data_q_p_0[q] + 4.0; + const real_t tmp_qloop_30 = tmp_qloop_13*(jac_affine_inv_0_0_BLUE*tmp_qloop_29 - tmp_qloop_22) + tmp_qloop_14*(jac_affine_inv_0_1_BLUE*tmp_qloop_29 - tmp_qloop_24); + const real_t tmp_qloop_31 = tmp_qloop_16*tmp_qloop_6; + const real_t tmp_qloop_32 = tmp_qloop_16*tmp_qloop_9; + const real_t tmp_qloop_33 = tmp_qloop_16*tmp_qloop_3; + const real_t tmp_qloop_34 = tmp_qloop_10*tmp_qloop_16; + const real_t tmp_qloop_35 = tmp_qloop_11*tmp_qloop_16; + const real_t q_tmp_0_0 = tmp_qloop_15*tmp_qloop_17; + const real_t q_tmp_0_1 = tmp_qloop_17*tmp_qloop_19; + const real_t q_tmp_0_2 = tmp_qloop_17*tmp_qloop_21; + const real_t q_tmp_0_3 = tmp_qloop_17*tmp_qloop_26; + const real_t q_tmp_0_4 = tmp_qloop_17*tmp_qloop_28; + const real_t q_tmp_0_5 = tmp_qloop_17*tmp_qloop_30; + const real_t q_tmp_1_0 = tmp_qloop_15*tmp_qloop_31; + const real_t q_tmp_1_1 = tmp_qloop_19*tmp_qloop_31; + const real_t q_tmp_1_2 = tmp_qloop_21*tmp_qloop_31; + const real_t q_tmp_1_3 = tmp_qloop_26*tmp_qloop_31; + const real_t q_tmp_1_4 = tmp_qloop_28*tmp_qloop_31; + const real_t q_tmp_1_5 = tmp_qloop_30*tmp_qloop_31; + const real_t q_tmp_2_0 = tmp_qloop_15*tmp_qloop_32; + const real_t q_tmp_2_1 = tmp_qloop_19*tmp_qloop_32; + const real_t q_tmp_2_2 = tmp_qloop_21*tmp_qloop_32; + const real_t q_tmp_2_3 = tmp_qloop_26*tmp_qloop_32; + const real_t q_tmp_2_4 = tmp_qloop_28*tmp_qloop_32; + const real_t q_tmp_2_5 = tmp_qloop_30*tmp_qloop_32; + const real_t q_tmp_3_0 = tmp_qloop_15*tmp_qloop_33; + const real_t q_tmp_3_1 = tmp_qloop_19*tmp_qloop_33; + const real_t q_tmp_3_2 = tmp_qloop_21*tmp_qloop_33; + const real_t q_tmp_3_3 = tmp_qloop_26*tmp_qloop_33; + const real_t q_tmp_3_4 = tmp_qloop_28*tmp_qloop_33; + const real_t q_tmp_3_5 = tmp_qloop_30*tmp_qloop_33; + const real_t q_tmp_4_0 = tmp_qloop_15*tmp_qloop_34; + const real_t q_tmp_4_1 = tmp_qloop_19*tmp_qloop_34; + const real_t q_tmp_4_2 = tmp_qloop_21*tmp_qloop_34; + const real_t q_tmp_4_3 = tmp_qloop_26*tmp_qloop_34; + const real_t q_tmp_4_4 = tmp_qloop_28*tmp_qloop_34; + const real_t q_tmp_4_5 = tmp_qloop_30*tmp_qloop_34; + const real_t q_tmp_5_0 = tmp_qloop_15*tmp_qloop_35; + const real_t q_tmp_5_1 = tmp_qloop_19*tmp_qloop_35; + const real_t q_tmp_5_2 = tmp_qloop_21*tmp_qloop_35; + const real_t q_tmp_5_3 = tmp_qloop_26*tmp_qloop_35; + const real_t q_tmp_5_4 = tmp_qloop_28*tmp_qloop_35; + const real_t q_tmp_5_5 = tmp_qloop_30*tmp_qloop_35; + q_acc_0_0 = q_acc_0_0 + q_tmp_0_0; + q_acc_0_1 = q_acc_0_1 + q_tmp_0_1; + q_acc_0_2 = q_acc_0_2 + q_tmp_0_2; + q_acc_0_3 = q_acc_0_3 + q_tmp_0_3; + q_acc_0_4 = q_acc_0_4 + q_tmp_0_4; + q_acc_0_5 = q_acc_0_5 + q_tmp_0_5; + q_acc_1_0 = q_acc_1_0 + q_tmp_1_0; + q_acc_1_1 = q_acc_1_1 + q_tmp_1_1; + q_acc_1_2 = q_acc_1_2 + q_tmp_1_2; + q_acc_1_3 = q_acc_1_3 + q_tmp_1_3; + q_acc_1_4 = q_acc_1_4 + q_tmp_1_4; + q_acc_1_5 = q_acc_1_5 + q_tmp_1_5; + q_acc_2_0 = q_acc_2_0 + q_tmp_2_0; + q_acc_2_1 = q_acc_2_1 + q_tmp_2_1; + q_acc_2_2 = q_acc_2_2 + q_tmp_2_2; + q_acc_2_3 = q_acc_2_3 + q_tmp_2_3; + q_acc_2_4 = q_acc_2_4 + q_tmp_2_4; + q_acc_2_5 = q_acc_2_5 + q_tmp_2_5; + q_acc_3_0 = q_acc_3_0 + q_tmp_3_0; + q_acc_3_1 = q_acc_3_1 + q_tmp_3_1; + q_acc_3_2 = q_acc_3_2 + q_tmp_3_2; + q_acc_3_3 = q_acc_3_3 + q_tmp_3_3; + q_acc_3_4 = q_acc_3_4 + q_tmp_3_4; + q_acc_3_5 = q_acc_3_5 + q_tmp_3_5; + q_acc_4_0 = q_acc_4_0 + q_tmp_4_0; + q_acc_4_1 = q_acc_4_1 + q_tmp_4_1; + q_acc_4_2 = q_acc_4_2 + q_tmp_4_2; + q_acc_4_3 = q_acc_4_3 + q_tmp_4_3; + q_acc_4_4 = q_acc_4_4 + q_tmp_4_4; + q_acc_4_5 = q_acc_4_5 + q_tmp_4_5; + q_acc_5_0 = q_acc_5_0 + q_tmp_5_0; + q_acc_5_1 = q_acc_5_1 + q_tmp_5_1; + q_acc_5_2 = q_acc_5_2 + q_tmp_5_2; + q_acc_5_3 = q_acc_5_3 + q_tmp_5_3; + q_acc_5_4 = q_acc_5_4 + q_tmp_5_4; + q_acc_5_5 = q_acc_5_5 + q_tmp_5_5; + } + const real_t elMatVec_0 = q_acc_0_0*src_dof_0 + q_acc_0_1*src_dof_1 + q_acc_0_2*src_dof_2 + q_acc_0_3*src_dof_3 + q_acc_0_4*src_dof_4 + q_acc_0_5*src_dof_5; + const real_t elMatVec_1 = q_acc_1_0*src_dof_0 + q_acc_1_1*src_dof_1 + q_acc_1_2*src_dof_2 + q_acc_1_3*src_dof_3 + q_acc_1_4*src_dof_4 + q_acc_1_5*src_dof_5; + const real_t elMatVec_2 = q_acc_2_0*src_dof_0 + q_acc_2_1*src_dof_1 + q_acc_2_2*src_dof_2 + q_acc_2_3*src_dof_3 + q_acc_2_4*src_dof_4 + q_acc_2_5*src_dof_5; + const real_t elMatVec_3 = q_acc_3_0*src_dof_0 + q_acc_3_1*src_dof_1 + q_acc_3_2*src_dof_2 + q_acc_3_3*src_dof_3 + q_acc_3_4*src_dof_4 + q_acc_3_5*src_dof_5; + const real_t elMatVec_4 = q_acc_4_0*src_dof_0 + q_acc_4_1*src_dof_1 + q_acc_4_2*src_dof_2 + q_acc_4_3*src_dof_3 + q_acc_4_4*src_dof_4 + q_acc_4_5*src_dof_5; + const real_t elMatVec_5 = q_acc_5_0*src_dof_0 + q_acc_5_1*src_dof_1 + q_acc_5_2*src_dof_2 + q_acc_5_3*src_dof_3 + q_acc_5_4*src_dof_4 + q_acc_5_5*src_dof_5; + _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1] = elMatVec_0 + _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatVec_1 + _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1] = elMatVec_2 + _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]; + _data_dstEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatVec_3 + _data_dstEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1] = elMatVec_4 + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]; + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))] = elMatVec_5 + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + } + } + } + } +} +} // namespace operatorgeneration + +} // namespace hyteg diff --git a/operators/advection/avx/P2ElementwiseAdvection_computeInverseDiagonalOperatorValues_P2ElementwiseAdvection_macro_2D.cpp b/operators/advection/avx/P2ElementwiseAdvection_computeInverseDiagonalOperatorValues_P2ElementwiseAdvection_macro_2D.cpp new file mode 100644 index 0000000000000000000000000000000000000000..e93c26e5ed0553b97a1a0f40291f94a164b48e83 --- /dev/null +++ b/operators/advection/avx/P2ElementwiseAdvection_computeInverseDiagonalOperatorValues_P2ElementwiseAdvection_macro_2D.cpp @@ -0,0 +1,516 @@ +/* +* Copyright (c) 2017-2024 Nils Kohl, Daniel Bauer, Fabian Böhm. +* +* This file is part of HyTeG +* (see https://i10git.cs.fau.de/hyteg/hyteg). +* +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. +* +* You should have received a copy of the GNU General Public License +* along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +/* +* The entire file was generated with the HyTeG Operator Generator. +* +* Avoid modifying this file. If buggy, consider fixing the generator itself. +*/ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +#include "../P2ElementwiseAdvection.hpp" + +#define FUNC_PREFIX + +namespace hyteg { + +namespace operatorgeneration { + +void P2ElementwiseAdvection::computeInverseDiagonalOperatorValues_P2ElementwiseAdvection_macro_2D( real_t * RESTRICT _data_cpEdge, real_t * RESTRICT _data_cpVertex, real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_uxEdge, real_t * RESTRICT _data_uxVertex, real_t * RESTRICT _data_uyEdge, real_t * RESTRICT _data_uyVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +{ + { + const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669}; + + const real_t _data_q_p_0 [] = {0.33333333333333331, 0.20000000000000001, 0.59999999999999998, 0.20000000000000001}; + + const real_t _data_q_p_1 [] = {0.33333333333333331, 0.59999999999999998, 0.20000000000000001, 0.20000000000000001}; + + const real_t tmp_coords_jac_0_GRAY = 1.0 / (micro_edges_per_macro_edge_float)*1.0; + const real_t p_affine_const_0_0_GRAY = macro_vertex_coord_id_0comp0; + const real_t p_affine_const_0_1_GRAY = macro_vertex_coord_id_0comp1; + const real_t p_affine_const_1_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0); + const real_t p_affine_const_1_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1); + const real_t p_affine_const_2_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0); + const real_t p_affine_const_2_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1); + const real_t jac_affine_0_0_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_1_0_GRAY; + const real_t jac_affine_0_1_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_2_0_GRAY; + const real_t jac_affine_1_0_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_1_1_GRAY; + const real_t jac_affine_1_1_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_2_1_GRAY; + const real_t tmp_coords_jac_1_GRAY = jac_affine_0_0_GRAY*jac_affine_1_1_GRAY - jac_affine_0_1_GRAY*jac_affine_1_0_GRAY; + const real_t tmp_coords_jac_2_GRAY = 1.0 / (tmp_coords_jac_1_GRAY); + const real_t jac_affine_inv_0_0_GRAY = jac_affine_1_1_GRAY*tmp_coords_jac_2_GRAY; + const real_t jac_affine_inv_0_1_GRAY = -jac_affine_0_1_GRAY*tmp_coords_jac_2_GRAY; + const real_t jac_affine_inv_1_0_GRAY = -jac_affine_1_0_GRAY*tmp_coords_jac_2_GRAY; + const real_t jac_affine_inv_1_1_GRAY = jac_affine_0_0_GRAY*tmp_coords_jac_2_GRAY; + const real_t abs_det_jac_affine_GRAY = abs(tmp_coords_jac_1_GRAY); + { + /* FaceType.GRAY */ + for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1) + { + for (int64_t ctr_0 = 0; ctr_0 < (int64_t)((-ctr_1 + micro_edges_per_macro_edge) / (4)) * (4); ctr_0 += 4) + { + + const int64_t phantom_ctr_0 = ctr_0; + real_t _data_float_loop_ctr_array_dim_0[4]; + _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0; + _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1; + _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2; + _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3; + real_t _data_float_loop_ctr_array_dim_1[4]; + _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1; + + const __m256d p_affine_0_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)); + const __m256d p_affine_0_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)); + const __m256d p_affine_1_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)); + const __m256d p_affine_1_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)); + const __m256d p_affine_2_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)); + const __m256d p_affine_2_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)); + const __m256d cp_dof_0 = _mm256_loadu_pd(& _data_cpVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]); + const __m256d cp_dof_1 = _mm256_loadu_pd(& _data_cpVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]); + const __m256d cp_dof_2 = _mm256_loadu_pd(& _data_cpVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]); + const __m256d cp_dof_3 = _mm256_loadu_pd(& _data_cpEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]); + const __m256d cp_dof_4 = _mm256_loadu_pd(& _data_cpEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]); + const __m256d cp_dof_5 = _mm256_loadu_pd(& _data_cpEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]); + const __m256d ux_dof_0 = _mm256_loadu_pd(& _data_uxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]); + const __m256d ux_dof_1 = _mm256_loadu_pd(& _data_uxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]); + const __m256d ux_dof_2 = _mm256_loadu_pd(& _data_uxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]); + const __m256d ux_dof_3 = _mm256_loadu_pd(& _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]); + const __m256d ux_dof_4 = _mm256_loadu_pd(& _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]); + const __m256d ux_dof_5 = _mm256_loadu_pd(& _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]); + const __m256d uy_dof_0 = _mm256_loadu_pd(& _data_uyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]); + const __m256d uy_dof_1 = _mm256_loadu_pd(& _data_uyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]); + const __m256d uy_dof_2 = _mm256_loadu_pd(& _data_uyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]); + const __m256d uy_dof_3 = _mm256_loadu_pd(& _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]); + const __m256d uy_dof_4 = _mm256_loadu_pd(& _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]); + const __m256d uy_dof_5 = _mm256_loadu_pd(& _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]); + __m256d q_acc_0_0 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_1 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_2 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_3 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_4_4 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_5_5 = _mm256_set_pd(0.0,0.0,0.0,0.0); + for (int64_t q = 0; q < 4; q += 1) + { + const __m256d tmp_qloop_0 = _mm256_mul_pd(_mm256_set_pd(4.0,4.0,4.0,4.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])); + const __m256d tmp_qloop_1 = _mm256_mul_pd(tmp_qloop_0,_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])); + const __m256d tmp_qloop_2 = _mm256_mul_pd(_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])); + const __m256d tmp_qloop_3 = _mm256_mul_pd(tmp_qloop_2,_mm256_set_pd(2.0,2.0,2.0,2.0)); + const __m256d tmp_qloop_4 = _mm256_mul_pd(_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])); + const __m256d tmp_qloop_5 = _mm256_mul_pd(tmp_qloop_4,_mm256_set_pd(2.0,2.0,2.0,2.0)); + const __m256d tmp_qloop_6 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),tmp_qloop_1),tmp_qloop_3),tmp_qloop_5); + const __m256d tmp_qloop_7 = _mm256_mul_pd(_mm256_set_pd(4.0,4.0,4.0,4.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])); + const __m256d tmp_qloop_8 = _mm256_add_pd(_mm256_add_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),tmp_qloop_0),tmp_qloop_7); + const __m256d tmp_qloop_9 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),tmp_qloop_3); + const __m256d tmp_qloop_10 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])),tmp_qloop_5); + const __m256d tmp_qloop_11 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_4,_mm256_set_pd(-4.0,-4.0,-4.0,-4.0)),_mm256_mul_pd(tmp_qloop_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_7); + const __m256d tmp_qloop_12 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_2,_mm256_set_pd(-4.0,-4.0,-4.0,-4.0)),_mm256_mul_pd(tmp_qloop_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_0); + const __m256d tmp_qloop_13 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_1,ux_dof_3),_mm256_mul_pd(tmp_qloop_10,ux_dof_2)),_mm256_mul_pd(tmp_qloop_11,ux_dof_4)),_mm256_mul_pd(tmp_qloop_12,ux_dof_5)),_mm256_mul_pd(tmp_qloop_6,ux_dof_0)),_mm256_mul_pd(tmp_qloop_9,ux_dof_1)); + const __m256d tmp_qloop_14 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_1,uy_dof_3),_mm256_mul_pd(tmp_qloop_10,uy_dof_2)),_mm256_mul_pd(tmp_qloop_11,uy_dof_4)),_mm256_mul_pd(tmp_qloop_12,uy_dof_5)),_mm256_mul_pd(tmp_qloop_6,uy_dof_0)),_mm256_mul_pd(tmp_qloop_9,uy_dof_1)); + const __m256d tmp_qloop_15 = _mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(cp_dof_0,tmp_qloop_6),_mm256_mul_pd(cp_dof_1,tmp_qloop_9)),_mm256_mul_pd(cp_dof_2,tmp_qloop_10)),_mm256_mul_pd(cp_dof_3,tmp_qloop_1)),_mm256_mul_pd(cp_dof_4,tmp_qloop_11)),_mm256_mul_pd(cp_dof_5,tmp_qloop_12)),_mm256_set_pd(_data_q_w[q],_data_q_w[q],_data_q_w[q],_data_q_w[q])),_mm256_set_pd(abs_det_jac_affine_GRAY,abs_det_jac_affine_GRAY,abs_det_jac_affine_GRAY,abs_det_jac_affine_GRAY)); + const __m256d tmp_qloop_16 = _mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),tmp_qloop_0); + const __m256d tmp_qloop_17 = _mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),tmp_qloop_7); + const __m256d tmp_qloop_18 = _mm256_mul_pd(tmp_qloop_0,_mm256_set_pd(jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY)); + const __m256d tmp_qloop_19 = _mm256_mul_pd(tmp_qloop_7,_mm256_set_pd(jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY)); + const __m256d tmp_qloop_20 = _mm256_mul_pd(tmp_qloop_0,_mm256_set_pd(jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY)); + const __m256d tmp_qloop_21 = _mm256_mul_pd(tmp_qloop_7,_mm256_set_pd(jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY)); + const __m256d tmp_qloop_22 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-8.0,-8.0,-8.0,-8.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])),_mm256_mul_pd(tmp_qloop_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_set_pd(4.0,4.0,4.0,4.0)); + const __m256d tmp_qloop_23 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-8.0,-8.0,-8.0,-8.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(tmp_qloop_7,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_set_pd(4.0,4.0,4.0,4.0)); + const __m256d q_tmp_0_0 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_15,tmp_qloop_6),_mm256_add_pd(_mm256_mul_pd(tmp_qloop_13,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_8,_mm256_set_pd(jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY)),_mm256_mul_pd(tmp_qloop_8,_mm256_set_pd(jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY)))),_mm256_mul_pd(tmp_qloop_14,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_8,_mm256_set_pd(jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY)),_mm256_mul_pd(tmp_qloop_8,_mm256_set_pd(jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY)))))); + const __m256d q_tmp_1_1 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_15,tmp_qloop_9),_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_13,tmp_qloop_16),_mm256_set_pd(jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_14,tmp_qloop_16),_mm256_set_pd(jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY)))); + const __m256d q_tmp_2_2 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_10,tmp_qloop_15),_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_13,tmp_qloop_17),_mm256_set_pd(jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_14,tmp_qloop_17),_mm256_set_pd(jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY)))); + const __m256d q_tmp_3_3 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_1,tmp_qloop_15),_mm256_add_pd(_mm256_mul_pd(tmp_qloop_13,_mm256_add_pd(tmp_qloop_18,tmp_qloop_19)),_mm256_mul_pd(tmp_qloop_14,_mm256_add_pd(tmp_qloop_20,tmp_qloop_21)))); + const __m256d q_tmp_4_4 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_11,tmp_qloop_15),_mm256_add_pd(_mm256_mul_pd(tmp_qloop_13,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_19,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_22,_mm256_set_pd(jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY)))),_mm256_mul_pd(tmp_qloop_14,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_21,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_22,_mm256_set_pd(jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY)))))); + const __m256d q_tmp_5_5 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_12,tmp_qloop_15),_mm256_add_pd(_mm256_mul_pd(tmp_qloop_13,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_18,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_23,_mm256_set_pd(jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY)))),_mm256_mul_pd(tmp_qloop_14,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_20,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_23,_mm256_set_pd(jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY)))))); + q_acc_0_0 = _mm256_add_pd(q_acc_0_0,q_tmp_0_0); + q_acc_1_1 = _mm256_add_pd(q_acc_1_1,q_tmp_1_1); + q_acc_2_2 = _mm256_add_pd(q_acc_2_2,q_tmp_2_2); + q_acc_3_3 = _mm256_add_pd(q_acc_3_3,q_tmp_3_3); + q_acc_4_4 = _mm256_add_pd(q_acc_4_4,q_tmp_4_4); + q_acc_5_5 = _mm256_add_pd(q_acc_5_5,q_tmp_5_5); + } + const __m256d elMatDiag_0 = q_acc_0_0; + const __m256d elMatDiag_1 = q_acc_1_1; + const __m256d elMatDiag_2 = q_acc_2_2; + const __m256d elMatDiag_3 = q_acc_3_3; + const __m256d elMatDiag_4 = q_acc_4_4; + const __m256d elMatDiag_5 = q_acc_5_5; + _mm256_storeu_pd(&_data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))],_mm256_add_pd(elMatDiag_0,_mm256_loadu_pd(& _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]))); + _mm256_storeu_pd(&_data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1],_mm256_add_pd(elMatDiag_1,_mm256_loadu_pd(& _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]))); + _mm256_storeu_pd(&_data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))],_mm256_add_pd(elMatDiag_2,_mm256_loadu_pd(& _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]))); + _mm256_storeu_pd(&_data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))],_mm256_add_pd(elMatDiag_3,_mm256_loadu_pd(& _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]))); + _mm256_storeu_pd(&_data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))],_mm256_add_pd(elMatDiag_4,_mm256_loadu_pd(& _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]))); + _mm256_storeu_pd(&_data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))],_mm256_add_pd(elMatDiag_5,_mm256_loadu_pd(& _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]))); + } + for (int64_t ctr_0 = (int64_t)((-ctr_1 + micro_edges_per_macro_edge) / (4)) * (4); ctr_0 < -ctr_1 + micro_edges_per_macro_edge; ctr_0 += 1) + { + + const int64_t phantom_ctr_0 = ctr_0; + real_t _data_float_loop_ctr_array_dim_0[4]; + _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0; + _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1; + _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2; + _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3; + real_t _data_float_loop_ctr_array_dim_1[4]; + _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1; + + const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; + const real_t cp_dof_0 = _data_cpVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]; + const real_t cp_dof_1 = _data_cpVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + const real_t cp_dof_2 = _data_cpVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t cp_dof_3 = _data_cpEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t cp_dof_4 = _data_cpEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t cp_dof_5 = _data_cpEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]; + const real_t ux_dof_0 = _data_uxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]; + const real_t ux_dof_1 = _data_uxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + const real_t ux_dof_2 = _data_uxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t ux_dof_3 = _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t ux_dof_4 = _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t ux_dof_5 = _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]; + const real_t uy_dof_0 = _data_uyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]; + const real_t uy_dof_1 = _data_uyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + const real_t uy_dof_2 = _data_uyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t uy_dof_3 = _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t uy_dof_4 = _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t uy_dof_5 = _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]; + real_t q_acc_0_0 = 0.0; + real_t q_acc_1_1 = 0.0; + real_t q_acc_2_2 = 0.0; + real_t q_acc_3_3 = 0.0; + real_t q_acc_4_4 = 0.0; + real_t q_acc_5_5 = 0.0; + for (int64_t q = 0; q < 4; q += 1) + { + const real_t tmp_qloop_0 = 4.0*_data_q_p_0[q]; + const real_t tmp_qloop_1 = tmp_qloop_0*_data_q_p_1[q]; + const real_t tmp_qloop_2 = (_data_q_p_0[q]*_data_q_p_0[q]); + const real_t tmp_qloop_3 = tmp_qloop_2*2.0; + const real_t tmp_qloop_4 = (_data_q_p_1[q]*_data_q_p_1[q]); + const real_t tmp_qloop_5 = tmp_qloop_4*2.0; + const real_t tmp_qloop_6 = tmp_qloop_1 + tmp_qloop_3 + tmp_qloop_5 - 3.0*_data_q_p_0[q] - 3.0*_data_q_p_1[q] + 1.0; + const real_t tmp_qloop_7 = 4.0*_data_q_p_1[q]; + const real_t tmp_qloop_8 = tmp_qloop_0 + tmp_qloop_7 - 3.0; + const real_t tmp_qloop_9 = tmp_qloop_3 - _data_q_p_0[q]; + const real_t tmp_qloop_10 = tmp_qloop_5 - _data_q_p_1[q]; + const real_t tmp_qloop_11 = -tmp_qloop_1 + tmp_qloop_4*-4.0 + tmp_qloop_7; + const real_t tmp_qloop_12 = tmp_qloop_0 - tmp_qloop_1 + tmp_qloop_2*-4.0; + const real_t tmp_qloop_13 = tmp_qloop_1*ux_dof_3 + tmp_qloop_10*ux_dof_2 + tmp_qloop_11*ux_dof_4 + tmp_qloop_12*ux_dof_5 + tmp_qloop_6*ux_dof_0 + tmp_qloop_9*ux_dof_1; + const real_t tmp_qloop_14 = tmp_qloop_1*uy_dof_3 + tmp_qloop_10*uy_dof_2 + tmp_qloop_11*uy_dof_4 + tmp_qloop_12*uy_dof_5 + tmp_qloop_6*uy_dof_0 + tmp_qloop_9*uy_dof_1; + const real_t tmp_qloop_15 = abs_det_jac_affine_GRAY*(cp_dof_0*tmp_qloop_6 + cp_dof_1*tmp_qloop_9 + cp_dof_2*tmp_qloop_10 + cp_dof_3*tmp_qloop_1 + cp_dof_4*tmp_qloop_11 + cp_dof_5*tmp_qloop_12)*_data_q_w[q]; + const real_t tmp_qloop_16 = tmp_qloop_0 - 1.0; + const real_t tmp_qloop_17 = tmp_qloop_7 - 1.0; + const real_t tmp_qloop_18 = jac_affine_inv_1_0_GRAY*tmp_qloop_0; + const real_t tmp_qloop_19 = jac_affine_inv_0_0_GRAY*tmp_qloop_7; + const real_t tmp_qloop_20 = jac_affine_inv_1_1_GRAY*tmp_qloop_0; + const real_t tmp_qloop_21 = jac_affine_inv_0_1_GRAY*tmp_qloop_7; + const real_t tmp_qloop_22 = -tmp_qloop_0 - 8.0*_data_q_p_1[q] + 4.0; + const real_t tmp_qloop_23 = -tmp_qloop_7 - 8.0*_data_q_p_0[q] + 4.0; + const real_t q_tmp_0_0 = tmp_qloop_15*tmp_qloop_6*(tmp_qloop_13*(jac_affine_inv_0_0_GRAY*tmp_qloop_8 + jac_affine_inv_1_0_GRAY*tmp_qloop_8) + tmp_qloop_14*(jac_affine_inv_0_1_GRAY*tmp_qloop_8 + jac_affine_inv_1_1_GRAY*tmp_qloop_8)); + const real_t q_tmp_1_1 = tmp_qloop_15*tmp_qloop_9*(jac_affine_inv_0_0_GRAY*tmp_qloop_13*tmp_qloop_16 + jac_affine_inv_0_1_GRAY*tmp_qloop_14*tmp_qloop_16); + const real_t q_tmp_2_2 = tmp_qloop_10*tmp_qloop_15*(jac_affine_inv_1_0_GRAY*tmp_qloop_13*tmp_qloop_17 + jac_affine_inv_1_1_GRAY*tmp_qloop_14*tmp_qloop_17); + const real_t q_tmp_3_3 = tmp_qloop_1*tmp_qloop_15*(tmp_qloop_13*(tmp_qloop_18 + tmp_qloop_19) + tmp_qloop_14*(tmp_qloop_20 + tmp_qloop_21)); + const real_t q_tmp_4_4 = tmp_qloop_11*tmp_qloop_15*(tmp_qloop_13*(jac_affine_inv_1_0_GRAY*tmp_qloop_22 - tmp_qloop_19) + tmp_qloop_14*(jac_affine_inv_1_1_GRAY*tmp_qloop_22 - tmp_qloop_21)); + const real_t q_tmp_5_5 = tmp_qloop_12*tmp_qloop_15*(tmp_qloop_13*(jac_affine_inv_0_0_GRAY*tmp_qloop_23 - tmp_qloop_18) + tmp_qloop_14*(jac_affine_inv_0_1_GRAY*tmp_qloop_23 - tmp_qloop_20)); + q_acc_0_0 = q_acc_0_0 + q_tmp_0_0; + q_acc_1_1 = q_acc_1_1 + q_tmp_1_1; + q_acc_2_2 = q_acc_2_2 + q_tmp_2_2; + q_acc_3_3 = q_acc_3_3 + q_tmp_3_3; + q_acc_4_4 = q_acc_4_4 + q_tmp_4_4; + q_acc_5_5 = q_acc_5_5 + q_tmp_5_5; + } + const real_t elMatDiag_0 = q_acc_0_0; + const real_t elMatDiag_1 = q_acc_1_1; + const real_t elMatDiag_2 = q_acc_2_2; + const real_t elMatDiag_3 = q_acc_3_3; + const real_t elMatDiag_4 = q_acc_4_4; + const real_t elMatDiag_5 = q_acc_5_5; + _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))] = elMatDiag_0 + _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]; + _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1] = elMatDiag_1 + _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatDiag_2 + _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))] = elMatDiag_3 + _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))] = elMatDiag_4 + _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))] = elMatDiag_5 + _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]; + } + } + } + const real_t tmp_coords_jac_0_BLUE = 1.0 / (micro_edges_per_macro_edge_float)*1.0; + const real_t tmp_coords_jac_1_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0); + const real_t tmp_coords_jac_2_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1); + const real_t tmp_coords_jac_3_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0); + const real_t tmp_coords_jac_4_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1); + const real_t p_affine_const_0_0_BLUE = tmp_coords_jac_1_BLUE; + const real_t p_affine_const_0_1_BLUE = tmp_coords_jac_2_BLUE; + const real_t p_affine_const_1_0_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_3_BLUE; + const real_t p_affine_const_1_1_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_4_BLUE; + const real_t p_affine_const_2_0_BLUE = tmp_coords_jac_1_BLUE + tmp_coords_jac_3_BLUE; + const real_t p_affine_const_2_1_BLUE = tmp_coords_jac_2_BLUE + tmp_coords_jac_4_BLUE; + const real_t jac_affine_0_0_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_1_0_BLUE; + const real_t jac_affine_0_1_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_2_0_BLUE; + const real_t jac_affine_1_0_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_1_1_BLUE; + const real_t jac_affine_1_1_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_2_1_BLUE; + const real_t tmp_coords_jac_5_BLUE = jac_affine_0_0_BLUE*jac_affine_1_1_BLUE - jac_affine_0_1_BLUE*jac_affine_1_0_BLUE; + const real_t tmp_coords_jac_6_BLUE = 1.0 / (tmp_coords_jac_5_BLUE); + const real_t jac_affine_inv_0_0_BLUE = jac_affine_1_1_BLUE*tmp_coords_jac_6_BLUE; + const real_t jac_affine_inv_0_1_BLUE = -jac_affine_0_1_BLUE*tmp_coords_jac_6_BLUE; + const real_t jac_affine_inv_1_0_BLUE = -jac_affine_1_0_BLUE*tmp_coords_jac_6_BLUE; + const real_t jac_affine_inv_1_1_BLUE = jac_affine_0_0_BLUE*tmp_coords_jac_6_BLUE; + const real_t abs_det_jac_affine_BLUE = abs(tmp_coords_jac_5_BLUE); + { + /* FaceType.BLUE */ + for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1) + { + for (int64_t ctr_0 = 0; ctr_0 < (int64_t)((-ctr_1 + micro_edges_per_macro_edge - 1) / (4)) * (4); ctr_0 += 4) + { + + const int64_t phantom_ctr_0 = ctr_0; + real_t _data_float_loop_ctr_array_dim_0[4]; + _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0; + _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1; + _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2; + _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3; + real_t _data_float_loop_ctr_array_dim_1[4]; + _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1; + + const __m256d p_affine_0_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)); + const __m256d p_affine_0_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)); + const __m256d p_affine_1_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)); + const __m256d p_affine_1_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)); + const __m256d p_affine_2_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)); + const __m256d p_affine_2_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)); + const __m256d cp_dof_0 = _mm256_loadu_pd(& _data_cpVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]); + const __m256d cp_dof_1 = _mm256_loadu_pd(& _data_cpVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]); + const __m256d cp_dof_2 = _mm256_loadu_pd(& _data_cpVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]); + const __m256d cp_dof_3 = _mm256_loadu_pd(& _data_cpEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]); + const __m256d cp_dof_4 = _mm256_loadu_pd(& _data_cpEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]); + const __m256d cp_dof_5 = _mm256_loadu_pd(& _data_cpEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]); + const __m256d ux_dof_0 = _mm256_loadu_pd(& _data_uxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]); + const __m256d ux_dof_1 = _mm256_loadu_pd(& _data_uxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]); + const __m256d ux_dof_2 = _mm256_loadu_pd(& _data_uxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]); + const __m256d ux_dof_3 = _mm256_loadu_pd(& _data_uxEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]); + const __m256d ux_dof_4 = _mm256_loadu_pd(& _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]); + const __m256d ux_dof_5 = _mm256_loadu_pd(& _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]); + const __m256d uy_dof_0 = _mm256_loadu_pd(& _data_uyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]); + const __m256d uy_dof_1 = _mm256_loadu_pd(& _data_uyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]); + const __m256d uy_dof_2 = _mm256_loadu_pd(& _data_uyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]); + const __m256d uy_dof_3 = _mm256_loadu_pd(& _data_uyEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]); + const __m256d uy_dof_4 = _mm256_loadu_pd(& _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]); + const __m256d uy_dof_5 = _mm256_loadu_pd(& _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]); + __m256d q_acc_0_0 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_1 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_2 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_3 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_4_4 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_5_5 = _mm256_set_pd(0.0,0.0,0.0,0.0); + for (int64_t q = 0; q < 4; q += 1) + { + const __m256d tmp_qloop_0 = _mm256_mul_pd(_mm256_set_pd(4.0,4.0,4.0,4.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])); + const __m256d tmp_qloop_1 = _mm256_mul_pd(tmp_qloop_0,_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])); + const __m256d tmp_qloop_2 = _mm256_mul_pd(_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])); + const __m256d tmp_qloop_3 = _mm256_mul_pd(tmp_qloop_2,_mm256_set_pd(2.0,2.0,2.0,2.0)); + const __m256d tmp_qloop_4 = _mm256_mul_pd(_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])); + const __m256d tmp_qloop_5 = _mm256_mul_pd(tmp_qloop_4,_mm256_set_pd(2.0,2.0,2.0,2.0)); + const __m256d tmp_qloop_6 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),tmp_qloop_1),tmp_qloop_3),tmp_qloop_5); + const __m256d tmp_qloop_7 = _mm256_mul_pd(_mm256_set_pd(4.0,4.0,4.0,4.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])); + const __m256d tmp_qloop_8 = _mm256_add_pd(_mm256_add_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),tmp_qloop_0),tmp_qloop_7); + const __m256d tmp_qloop_9 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),tmp_qloop_3); + const __m256d tmp_qloop_10 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])),tmp_qloop_5); + const __m256d tmp_qloop_11 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_4,_mm256_set_pd(-4.0,-4.0,-4.0,-4.0)),_mm256_mul_pd(tmp_qloop_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_7); + const __m256d tmp_qloop_12 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_2,_mm256_set_pd(-4.0,-4.0,-4.0,-4.0)),_mm256_mul_pd(tmp_qloop_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_0); + const __m256d tmp_qloop_13 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_1,ux_dof_3),_mm256_mul_pd(tmp_qloop_10,ux_dof_2)),_mm256_mul_pd(tmp_qloop_11,ux_dof_4)),_mm256_mul_pd(tmp_qloop_12,ux_dof_5)),_mm256_mul_pd(tmp_qloop_6,ux_dof_0)),_mm256_mul_pd(tmp_qloop_9,ux_dof_1)); + const __m256d tmp_qloop_14 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_1,uy_dof_3),_mm256_mul_pd(tmp_qloop_10,uy_dof_2)),_mm256_mul_pd(tmp_qloop_11,uy_dof_4)),_mm256_mul_pd(tmp_qloop_12,uy_dof_5)),_mm256_mul_pd(tmp_qloop_6,uy_dof_0)),_mm256_mul_pd(tmp_qloop_9,uy_dof_1)); + const __m256d tmp_qloop_15 = _mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(cp_dof_0,tmp_qloop_6),_mm256_mul_pd(cp_dof_1,tmp_qloop_9)),_mm256_mul_pd(cp_dof_2,tmp_qloop_10)),_mm256_mul_pd(cp_dof_3,tmp_qloop_1)),_mm256_mul_pd(cp_dof_4,tmp_qloop_11)),_mm256_mul_pd(cp_dof_5,tmp_qloop_12)),_mm256_set_pd(_data_q_w[q],_data_q_w[q],_data_q_w[q],_data_q_w[q])),_mm256_set_pd(abs_det_jac_affine_BLUE,abs_det_jac_affine_BLUE,abs_det_jac_affine_BLUE,abs_det_jac_affine_BLUE)); + const __m256d tmp_qloop_16 = _mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),tmp_qloop_0); + const __m256d tmp_qloop_17 = _mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),tmp_qloop_7); + const __m256d tmp_qloop_18 = _mm256_mul_pd(tmp_qloop_0,_mm256_set_pd(jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE)); + const __m256d tmp_qloop_19 = _mm256_mul_pd(tmp_qloop_7,_mm256_set_pd(jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE)); + const __m256d tmp_qloop_20 = _mm256_mul_pd(tmp_qloop_0,_mm256_set_pd(jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE)); + const __m256d tmp_qloop_21 = _mm256_mul_pd(tmp_qloop_7,_mm256_set_pd(jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE)); + const __m256d tmp_qloop_22 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-8.0,-8.0,-8.0,-8.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])),_mm256_mul_pd(tmp_qloop_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_set_pd(4.0,4.0,4.0,4.0)); + const __m256d tmp_qloop_23 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-8.0,-8.0,-8.0,-8.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(tmp_qloop_7,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_set_pd(4.0,4.0,4.0,4.0)); + const __m256d q_tmp_0_0 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_15,tmp_qloop_6),_mm256_add_pd(_mm256_mul_pd(tmp_qloop_13,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_8,_mm256_set_pd(jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE)),_mm256_mul_pd(tmp_qloop_8,_mm256_set_pd(jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE)))),_mm256_mul_pd(tmp_qloop_14,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_8,_mm256_set_pd(jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE)),_mm256_mul_pd(tmp_qloop_8,_mm256_set_pd(jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE)))))); + const __m256d q_tmp_1_1 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_15,tmp_qloop_9),_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_13,tmp_qloop_16),_mm256_set_pd(jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_14,tmp_qloop_16),_mm256_set_pd(jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE)))); + const __m256d q_tmp_2_2 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_10,tmp_qloop_15),_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_13,tmp_qloop_17),_mm256_set_pd(jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_14,tmp_qloop_17),_mm256_set_pd(jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE)))); + const __m256d q_tmp_3_3 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_1,tmp_qloop_15),_mm256_add_pd(_mm256_mul_pd(tmp_qloop_13,_mm256_add_pd(tmp_qloop_18,tmp_qloop_19)),_mm256_mul_pd(tmp_qloop_14,_mm256_add_pd(tmp_qloop_20,tmp_qloop_21)))); + const __m256d q_tmp_4_4 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_11,tmp_qloop_15),_mm256_add_pd(_mm256_mul_pd(tmp_qloop_13,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_19,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_22,_mm256_set_pd(jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE)))),_mm256_mul_pd(tmp_qloop_14,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_21,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_22,_mm256_set_pd(jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE)))))); + const __m256d q_tmp_5_5 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_12,tmp_qloop_15),_mm256_add_pd(_mm256_mul_pd(tmp_qloop_13,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_18,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_23,_mm256_set_pd(jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE)))),_mm256_mul_pd(tmp_qloop_14,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_20,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_23,_mm256_set_pd(jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE)))))); + q_acc_0_0 = _mm256_add_pd(q_acc_0_0,q_tmp_0_0); + q_acc_1_1 = _mm256_add_pd(q_acc_1_1,q_tmp_1_1); + q_acc_2_2 = _mm256_add_pd(q_acc_2_2,q_tmp_2_2); + q_acc_3_3 = _mm256_add_pd(q_acc_3_3,q_tmp_3_3); + q_acc_4_4 = _mm256_add_pd(q_acc_4_4,q_tmp_4_4); + q_acc_5_5 = _mm256_add_pd(q_acc_5_5,q_tmp_5_5); + } + const __m256d elMatDiag_0 = q_acc_0_0; + const __m256d elMatDiag_1 = q_acc_1_1; + const __m256d elMatDiag_2 = q_acc_2_2; + const __m256d elMatDiag_3 = q_acc_3_3; + const __m256d elMatDiag_4 = q_acc_4_4; + const __m256d elMatDiag_5 = q_acc_5_5; + _mm256_storeu_pd(&_data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1],_mm256_add_pd(elMatDiag_0,_mm256_loadu_pd(& _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]))); + _mm256_storeu_pd(&_data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))],_mm256_add_pd(elMatDiag_1,_mm256_loadu_pd(& _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]))); + _mm256_storeu_pd(&_data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1],_mm256_add_pd(elMatDiag_2,_mm256_loadu_pd(& _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]))); + _mm256_storeu_pd(&_data_invDiag_Edge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))],_mm256_add_pd(elMatDiag_3,_mm256_loadu_pd(& _data_invDiag_Edge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]))); + _mm256_storeu_pd(&_data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1],_mm256_add_pd(elMatDiag_4,_mm256_loadu_pd(& _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]))); + _mm256_storeu_pd(&_data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))],_mm256_add_pd(elMatDiag_5,_mm256_loadu_pd(& _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]))); + } + for (int64_t ctr_0 = (int64_t)((-ctr_1 + micro_edges_per_macro_edge - 1) / (4)) * (4); ctr_0 < -ctr_1 + micro_edges_per_macro_edge - 1; ctr_0 += 1) + { + + const int64_t phantom_ctr_0 = ctr_0; + real_t _data_float_loop_ctr_array_dim_0[4]; + _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0; + _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1; + _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2; + _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3; + real_t _data_float_loop_ctr_array_dim_1[4]; + _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1; + + const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; + const real_t cp_dof_0 = _data_cpVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + const real_t cp_dof_1 = _data_cpVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t cp_dof_2 = _data_cpVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]; + const real_t cp_dof_3 = _data_cpEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t cp_dof_4 = _data_cpEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]; + const real_t cp_dof_5 = _data_cpEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t ux_dof_0 = _data_uxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + const real_t ux_dof_1 = _data_uxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t ux_dof_2 = _data_uxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]; + const real_t ux_dof_3 = _data_uxEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t ux_dof_4 = _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]; + const real_t ux_dof_5 = _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t uy_dof_0 = _data_uyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + const real_t uy_dof_1 = _data_uyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t uy_dof_2 = _data_uyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]; + const real_t uy_dof_3 = _data_uyEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t uy_dof_4 = _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]; + const real_t uy_dof_5 = _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + real_t q_acc_0_0 = 0.0; + real_t q_acc_1_1 = 0.0; + real_t q_acc_2_2 = 0.0; + real_t q_acc_3_3 = 0.0; + real_t q_acc_4_4 = 0.0; + real_t q_acc_5_5 = 0.0; + for (int64_t q = 0; q < 4; q += 1) + { + const real_t tmp_qloop_0 = 4.0*_data_q_p_0[q]; + const real_t tmp_qloop_1 = tmp_qloop_0*_data_q_p_1[q]; + const real_t tmp_qloop_2 = (_data_q_p_0[q]*_data_q_p_0[q]); + const real_t tmp_qloop_3 = tmp_qloop_2*2.0; + const real_t tmp_qloop_4 = (_data_q_p_1[q]*_data_q_p_1[q]); + const real_t tmp_qloop_5 = tmp_qloop_4*2.0; + const real_t tmp_qloop_6 = tmp_qloop_1 + tmp_qloop_3 + tmp_qloop_5 - 3.0*_data_q_p_0[q] - 3.0*_data_q_p_1[q] + 1.0; + const real_t tmp_qloop_7 = 4.0*_data_q_p_1[q]; + const real_t tmp_qloop_8 = tmp_qloop_0 + tmp_qloop_7 - 3.0; + const real_t tmp_qloop_9 = tmp_qloop_3 - _data_q_p_0[q]; + const real_t tmp_qloop_10 = tmp_qloop_5 - _data_q_p_1[q]; + const real_t tmp_qloop_11 = -tmp_qloop_1 + tmp_qloop_4*-4.0 + tmp_qloop_7; + const real_t tmp_qloop_12 = tmp_qloop_0 - tmp_qloop_1 + tmp_qloop_2*-4.0; + const real_t tmp_qloop_13 = tmp_qloop_1*ux_dof_3 + tmp_qloop_10*ux_dof_2 + tmp_qloop_11*ux_dof_4 + tmp_qloop_12*ux_dof_5 + tmp_qloop_6*ux_dof_0 + tmp_qloop_9*ux_dof_1; + const real_t tmp_qloop_14 = tmp_qloop_1*uy_dof_3 + tmp_qloop_10*uy_dof_2 + tmp_qloop_11*uy_dof_4 + tmp_qloop_12*uy_dof_5 + tmp_qloop_6*uy_dof_0 + tmp_qloop_9*uy_dof_1; + const real_t tmp_qloop_15 = abs_det_jac_affine_BLUE*(cp_dof_0*tmp_qloop_6 + cp_dof_1*tmp_qloop_9 + cp_dof_2*tmp_qloop_10 + cp_dof_3*tmp_qloop_1 + cp_dof_4*tmp_qloop_11 + cp_dof_5*tmp_qloop_12)*_data_q_w[q]; + const real_t tmp_qloop_16 = tmp_qloop_0 - 1.0; + const real_t tmp_qloop_17 = tmp_qloop_7 - 1.0; + const real_t tmp_qloop_18 = jac_affine_inv_1_0_BLUE*tmp_qloop_0; + const real_t tmp_qloop_19 = jac_affine_inv_0_0_BLUE*tmp_qloop_7; + const real_t tmp_qloop_20 = jac_affine_inv_1_1_BLUE*tmp_qloop_0; + const real_t tmp_qloop_21 = jac_affine_inv_0_1_BLUE*tmp_qloop_7; + const real_t tmp_qloop_22 = -tmp_qloop_0 - 8.0*_data_q_p_1[q] + 4.0; + const real_t tmp_qloop_23 = -tmp_qloop_7 - 8.0*_data_q_p_0[q] + 4.0; + const real_t q_tmp_0_0 = tmp_qloop_15*tmp_qloop_6*(tmp_qloop_13*(jac_affine_inv_0_0_BLUE*tmp_qloop_8 + jac_affine_inv_1_0_BLUE*tmp_qloop_8) + tmp_qloop_14*(jac_affine_inv_0_1_BLUE*tmp_qloop_8 + jac_affine_inv_1_1_BLUE*tmp_qloop_8)); + const real_t q_tmp_1_1 = tmp_qloop_15*tmp_qloop_9*(jac_affine_inv_0_0_BLUE*tmp_qloop_13*tmp_qloop_16 + jac_affine_inv_0_1_BLUE*tmp_qloop_14*tmp_qloop_16); + const real_t q_tmp_2_2 = tmp_qloop_10*tmp_qloop_15*(jac_affine_inv_1_0_BLUE*tmp_qloop_13*tmp_qloop_17 + jac_affine_inv_1_1_BLUE*tmp_qloop_14*tmp_qloop_17); + const real_t q_tmp_3_3 = tmp_qloop_1*tmp_qloop_15*(tmp_qloop_13*(tmp_qloop_18 + tmp_qloop_19) + tmp_qloop_14*(tmp_qloop_20 + tmp_qloop_21)); + const real_t q_tmp_4_4 = tmp_qloop_11*tmp_qloop_15*(tmp_qloop_13*(jac_affine_inv_1_0_BLUE*tmp_qloop_22 - tmp_qloop_19) + tmp_qloop_14*(jac_affine_inv_1_1_BLUE*tmp_qloop_22 - tmp_qloop_21)); + const real_t q_tmp_5_5 = tmp_qloop_12*tmp_qloop_15*(tmp_qloop_13*(jac_affine_inv_0_0_BLUE*tmp_qloop_23 - tmp_qloop_18) + tmp_qloop_14*(jac_affine_inv_0_1_BLUE*tmp_qloop_23 - tmp_qloop_20)); + q_acc_0_0 = q_acc_0_0 + q_tmp_0_0; + q_acc_1_1 = q_acc_1_1 + q_tmp_1_1; + q_acc_2_2 = q_acc_2_2 + q_tmp_2_2; + q_acc_3_3 = q_acc_3_3 + q_tmp_3_3; + q_acc_4_4 = q_acc_4_4 + q_tmp_4_4; + q_acc_5_5 = q_acc_5_5 + q_tmp_5_5; + } + const real_t elMatDiag_0 = q_acc_0_0; + const real_t elMatDiag_1 = q_acc_1_1; + const real_t elMatDiag_2 = q_acc_2_2; + const real_t elMatDiag_3 = q_acc_3_3; + const real_t elMatDiag_4 = q_acc_4_4; + const real_t elMatDiag_5 = q_acc_5_5; + _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1] = elMatDiag_0 + _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatDiag_1 + _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1] = elMatDiag_2 + _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]; + _data_invDiag_Edge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatDiag_3 + _data_invDiag_Edge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1] = elMatDiag_4 + _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]; + _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))] = elMatDiag_5 + _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + } + } + } + } +} +} // namespace operatorgeneration + +} // namespace hyteg diff --git a/operators/advection/noarch/P2ElementwiseAdvectionAnnulusMap_apply_P2ElementwiseAdvectionAnnulusMap_macro_2D.cpp b/operators/advection/noarch/P2ElementwiseAdvectionAnnulusMap_apply_P2ElementwiseAdvectionAnnulusMap_macro_2D.cpp new file mode 100644 index 0000000000000000000000000000000000000000..1fdbdb498081da0e27987c6f81e906e46e44fed4 --- /dev/null +++ b/operators/advection/noarch/P2ElementwiseAdvectionAnnulusMap_apply_P2ElementwiseAdvectionAnnulusMap_macro_2D.cpp @@ -0,0 +1,661 @@ +/* +* Copyright (c) 2017-2024 Nils Kohl, Daniel Bauer, Fabian Böhm. +* +* This file is part of HyTeG +* (see https://i10git.cs.fau.de/hyteg/hyteg). +* +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. +* +* You should have received a copy of the GNU General Public License +* along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +/* +* The entire file was generated with the HyTeG Operator Generator. +* +* Avoid modifying this file. If buggy, consider fixing the generator itself. +*/ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +#include "../P2ElementwiseAdvectionAnnulusMap.hpp" + +#define FUNC_PREFIX + +namespace hyteg { + +namespace operatorgeneration { + +void P2ElementwiseAdvectionAnnulusMap::apply_P2ElementwiseAdvectionAnnulusMap_macro_2D( real_t * RESTRICT _data_cpEdge, real_t * RESTRICT _data_cpVertex, real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t * RESTRICT _data_uxEdge, real_t * RESTRICT _data_uxVertex, real_t * RESTRICT _data_uyEdge, real_t * RESTRICT _data_uyVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const +{ + { + const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669}; + + const real_t _data_q_p_0 [] = {0.33333333333333331, 0.20000000000000001, 0.59999999999999998, 0.20000000000000001}; + + const real_t _data_q_p_1 [] = {0.33333333333333331, 0.59999999999999998, 0.20000000000000001, 0.20000000000000001}; + + const real_t tmp_coords_jac_0_GRAY = 1.0 / (micro_edges_per_macro_edge_float)*1.0; + const real_t p_affine_const_0_0_GRAY = macro_vertex_coord_id_0comp0; + const real_t p_affine_const_0_1_GRAY = macro_vertex_coord_id_0comp1; + const real_t p_affine_const_1_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0); + const real_t p_affine_const_1_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1); + const real_t p_affine_const_2_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0); + const real_t p_affine_const_2_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1); + const real_t jac_affine_0_0_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_1_0_GRAY; + const real_t jac_affine_0_1_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_2_0_GRAY; + const real_t jac_affine_1_0_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_1_1_GRAY; + const real_t jac_affine_1_1_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_2_1_GRAY; + const real_t tmp_coords_jac_1_GRAY = jac_affine_0_0_GRAY*jac_affine_1_1_GRAY - jac_affine_0_1_GRAY*jac_affine_1_0_GRAY; + const real_t tmp_coords_jac_2_GRAY = 1.0 / (tmp_coords_jac_1_GRAY); + const real_t jac_affine_inv_0_0_GRAY = jac_affine_1_1_GRAY*tmp_coords_jac_2_GRAY; + const real_t jac_affine_inv_0_1_GRAY = -jac_affine_0_1_GRAY*tmp_coords_jac_2_GRAY; + const real_t jac_affine_inv_1_0_GRAY = -jac_affine_1_0_GRAY*tmp_coords_jac_2_GRAY; + const real_t jac_affine_inv_1_1_GRAY = jac_affine_0_0_GRAY*tmp_coords_jac_2_GRAY; + const real_t abs_det_jac_affine_GRAY = abs(tmp_coords_jac_1_GRAY); + const real_t tmp_qloop_1 = -rayVertex_1 + thrVertex_1; + const real_t tmp_qloop_7 = rayVertex_0 - thrVertex_0; + const real_t tmp_qloop_8 = -tmp_qloop_7; + const real_t tmp_qloop_9 = 1.0 / (tmp_qloop_1*(-rayVertex_0 + refVertex_0) - tmp_qloop_8*(-rayVertex_1 + refVertex_1)); + const real_t tmp_qloop_10 = -radRayVertex + radRefVertex; + const real_t tmp_qloop_11 = tmp_qloop_10*tmp_qloop_9; + const real_t tmp_qloop_12 = tmp_qloop_11*1.0; + { + /* FaceType.GRAY */ + for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1) + for (int64_t ctr_0 = 0; ctr_0 < -ctr_1 + micro_edges_per_macro_edge; ctr_0 += 1) + { + + const int64_t phantom_ctr_0 = ctr_0; + real_t _data_float_loop_ctr_array_dim_0[4]; + _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0; + _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1; + _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2; + _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3; + real_t _data_float_loop_ctr_array_dim_1[4]; + _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1; + + const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; + const real_t src_dof_0 = _data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]; + const real_t src_dof_1 = _data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + const real_t src_dof_2 = _data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t src_dof_3 = _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t src_dof_4 = _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t src_dof_5 = _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]; + const real_t cp_dof_0 = _data_cpVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]; + const real_t cp_dof_1 = _data_cpVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + const real_t cp_dof_2 = _data_cpVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t cp_dof_3 = _data_cpEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t cp_dof_4 = _data_cpEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t cp_dof_5 = _data_cpEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]; + const real_t ux_dof_0 = _data_uxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]; + const real_t ux_dof_1 = _data_uxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + const real_t ux_dof_2 = _data_uxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t ux_dof_3 = _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t ux_dof_4 = _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t ux_dof_5 = _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]; + const real_t uy_dof_0 = _data_uyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]; + const real_t uy_dof_1 = _data_uyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + const real_t uy_dof_2 = _data_uyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t uy_dof_3 = _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t uy_dof_4 = _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t uy_dof_5 = _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]; + real_t q_acc_0_0 = 0.0; + real_t q_acc_0_1 = 0.0; + real_t q_acc_0_2 = 0.0; + real_t q_acc_0_3 = 0.0; + real_t q_acc_0_4 = 0.0; + real_t q_acc_0_5 = 0.0; + real_t q_acc_1_0 = 0.0; + real_t q_acc_1_1 = 0.0; + real_t q_acc_1_2 = 0.0; + real_t q_acc_1_3 = 0.0; + real_t q_acc_1_4 = 0.0; + real_t q_acc_1_5 = 0.0; + real_t q_acc_2_0 = 0.0; + real_t q_acc_2_1 = 0.0; + real_t q_acc_2_2 = 0.0; + real_t q_acc_2_3 = 0.0; + real_t q_acc_2_4 = 0.0; + real_t q_acc_2_5 = 0.0; + real_t q_acc_3_0 = 0.0; + real_t q_acc_3_1 = 0.0; + real_t q_acc_3_2 = 0.0; + real_t q_acc_3_3 = 0.0; + real_t q_acc_3_4 = 0.0; + real_t q_acc_3_5 = 0.0; + real_t q_acc_4_0 = 0.0; + real_t q_acc_4_1 = 0.0; + real_t q_acc_4_2 = 0.0; + real_t q_acc_4_3 = 0.0; + real_t q_acc_4_4 = 0.0; + real_t q_acc_4_5 = 0.0; + real_t q_acc_5_0 = 0.0; + real_t q_acc_5_1 = 0.0; + real_t q_acc_5_2 = 0.0; + real_t q_acc_5_3 = 0.0; + real_t q_acc_5_4 = 0.0; + real_t q_acc_5_5 = 0.0; + for (int64_t q = 0; q < 4; q += 1) + { + const real_t tmp_qloop_0 = p_affine_0_0 + (-p_affine_0_0 + p_affine_1_0)*_data_q_p_0[q] + (-p_affine_0_0 + p_affine_2_0)*_data_q_p_1[q]; + const real_t tmp_qloop_2 = (tmp_qloop_0*tmp_qloop_0); + const real_t tmp_qloop_3 = p_affine_0_1 + (-p_affine_0_1 + p_affine_1_1)*_data_q_p_0[q] + (-p_affine_0_1 + p_affine_2_1)*_data_q_p_1[q]; + const real_t tmp_qloop_4 = (tmp_qloop_3*tmp_qloop_3); + const real_t tmp_qloop_5 = tmp_qloop_2 + tmp_qloop_4; + const real_t tmp_qloop_6 = pow(tmp_qloop_5, -0.50000000000000000); + const real_t tmp_qloop_13 = tmp_qloop_12*tmp_qloop_6; + const real_t tmp_qloop_14 = tmp_qloop_1*tmp_qloop_13; + const real_t tmp_qloop_15 = pow(tmp_qloop_5, -1.5000000000000000); + const real_t tmp_qloop_16 = radRayVertex + tmp_qloop_11*(tmp_qloop_1*(-rayVertex_0 + tmp_qloop_0) - tmp_qloop_8*(-rayVertex_1 + tmp_qloop_3)); + const real_t tmp_qloop_17 = tmp_qloop_15*tmp_qloop_16; + const real_t tmp_qloop_18 = tmp_qloop_17*1.0; + const real_t tmp_qloop_19 = tmp_qloop_13*tmp_qloop_8; + const real_t tmp_qloop_20 = tmp_qloop_0*tmp_qloop_18; + const real_t tmp_qloop_23 = tmp_qloop_12*tmp_qloop_15; + const real_t tmp_qloop_24 = tmp_qloop_1*tmp_qloop_23; + const real_t tmp_qloop_25 = tmp_qloop_2*tmp_qloop_24; + const real_t tmp_qloop_26 = tmp_qloop_16*pow(tmp_qloop_5, -2.5000000000000000)*3.0; + const real_t tmp_qloop_27 = tmp_qloop_0*tmp_qloop_26*tmp_qloop_4; + const real_t tmp_qloop_28 = tmp_qloop_24*tmp_qloop_4 - tmp_qloop_27; + const real_t tmp_qloop_29 = tmp_qloop_23*tmp_qloop_7; + const real_t tmp_qloop_30 = tmp_qloop_0*tmp_qloop_3; + const real_t tmp_qloop_31 = tmp_qloop_24*tmp_qloop_30; + const real_t tmp_qloop_32 = tmp_qloop_17*2.0; + const real_t tmp_qloop_33 = tmp_qloop_2*tmp_qloop_26*tmp_qloop_3; + const real_t tmp_qloop_34 = tmp_qloop_18*tmp_qloop_3 - tmp_qloop_33; + const real_t tmp_qloop_35 = tmp_qloop_29*tmp_qloop_30; + const real_t tmp_qloop_36 = tmp_qloop_23*tmp_qloop_30*tmp_qloop_8; + const real_t tmp_qloop_37 = 4.0*_data_q_p_0[q]; + const real_t tmp_qloop_38 = 4.0*_data_q_p_1[q]; + const real_t tmp_qloop_39 = tmp_qloop_37 + tmp_qloop_38 - 3.0; + const real_t tmp_qloop_40 = jac_affine_inv_0_0_GRAY*tmp_qloop_39 + jac_affine_inv_1_0_GRAY*tmp_qloop_39; + const real_t tmp_qloop_41 = jac_affine_inv_0_1_GRAY*tmp_qloop_39 + jac_affine_inv_1_1_GRAY*tmp_qloop_39; + const real_t tmp_qloop_42 = tmp_qloop_37*_data_q_p_1[q]; + const real_t tmp_qloop_43 = (_data_q_p_0[q]*_data_q_p_0[q]); + const real_t tmp_qloop_44 = tmp_qloop_43*2.0; + const real_t tmp_qloop_45 = tmp_qloop_44 - _data_q_p_0[q]; + const real_t tmp_qloop_46 = (_data_q_p_1[q]*_data_q_p_1[q]); + const real_t tmp_qloop_47 = tmp_qloop_46*2.0; + const real_t tmp_qloop_48 = tmp_qloop_47 - _data_q_p_1[q]; + const real_t tmp_qloop_49 = tmp_qloop_38 - tmp_qloop_42 + tmp_qloop_46*-4.0; + const real_t tmp_qloop_50 = tmp_qloop_37 - tmp_qloop_42 + tmp_qloop_43*-4.0; + const real_t tmp_qloop_51 = tmp_qloop_42 + tmp_qloop_44 + tmp_qloop_47 - 3.0*_data_q_p_0[q] - 3.0*_data_q_p_1[q] + 1.0; + const real_t tmp_qloop_52 = tmp_qloop_42*ux_dof_3 + tmp_qloop_45*ux_dof_1 + tmp_qloop_48*ux_dof_2 + tmp_qloop_49*ux_dof_4 + tmp_qloop_50*ux_dof_5 + tmp_qloop_51*ux_dof_0; + const real_t tmp_qloop_53 = tmp_qloop_42*uy_dof_3 + tmp_qloop_45*uy_dof_1 + tmp_qloop_48*uy_dof_2 + tmp_qloop_49*uy_dof_4 + tmp_qloop_50*uy_dof_5 + tmp_qloop_51*uy_dof_0; + const real_t tmp_qloop_57 = tmp_qloop_37 - 1.0; + const real_t tmp_qloop_58 = jac_affine_inv_0_0_GRAY*tmp_qloop_57; + const real_t tmp_qloop_59 = jac_affine_inv_0_1_GRAY*tmp_qloop_57; + const real_t tmp_qloop_61 = tmp_qloop_38 - 1.0; + const real_t tmp_qloop_62 = jac_affine_inv_1_0_GRAY*tmp_qloop_61; + const real_t tmp_qloop_63 = jac_affine_inv_1_1_GRAY*tmp_qloop_61; + const real_t tmp_qloop_65 = jac_affine_inv_1_0_GRAY*tmp_qloop_37; + const real_t tmp_qloop_66 = jac_affine_inv_0_0_GRAY*tmp_qloop_38; + const real_t tmp_qloop_67 = tmp_qloop_65 + tmp_qloop_66; + const real_t tmp_qloop_68 = jac_affine_inv_1_1_GRAY*tmp_qloop_37; + const real_t tmp_qloop_69 = jac_affine_inv_0_1_GRAY*tmp_qloop_38; + const real_t tmp_qloop_70 = tmp_qloop_68 + tmp_qloop_69; + const real_t tmp_qloop_72 = -tmp_qloop_37 - 8.0*_data_q_p_1[q] + 4.0; + const real_t tmp_qloop_73 = jac_affine_inv_1_0_GRAY*tmp_qloop_72 - tmp_qloop_66; + const real_t tmp_qloop_74 = jac_affine_inv_1_1_GRAY*tmp_qloop_72 - tmp_qloop_69; + const real_t tmp_qloop_76 = -tmp_qloop_38 - 8.0*_data_q_p_0[q] + 4.0; + const real_t tmp_qloop_77 = jac_affine_inv_0_0_GRAY*tmp_qloop_76 - tmp_qloop_65; + const real_t tmp_qloop_78 = jac_affine_inv_0_1_GRAY*tmp_qloop_76 - tmp_qloop_68; + const real_t jac_blending_0_0 = tmp_qloop_0*tmp_qloop_14 + tmp_qloop_18*tmp_qloop_4; + const real_t jac_blending_0_1 = -tmp_qloop_0*tmp_qloop_15*tmp_qloop_16*tmp_qloop_3 - tmp_qloop_0*tmp_qloop_19; + const real_t jac_blending_1_0 = tmp_qloop_14*tmp_qloop_3 - tmp_qloop_20*tmp_qloop_3; + const real_t jac_blending_1_1 = tmp_qloop_15*tmp_qloop_16*tmp_qloop_2*1.0 - tmp_qloop_19*tmp_qloop_3; + const real_t tmp_qloop_21 = jac_blending_0_0*jac_blending_1_1 - jac_blending_0_1*jac_blending_1_0; + const real_t tmp_qloop_22 = 1.0 / (tmp_qloop_21); + const real_t abs_det_jac_blending = tmp_qloop_21; + const real_t tmp_qloop_55 = abs_det_jac_affine_GRAY*abs_det_jac_blending*(cp_dof_0*tmp_qloop_51 + cp_dof_1*tmp_qloop_45 + cp_dof_2*tmp_qloop_48 + cp_dof_3*tmp_qloop_42 + cp_dof_4*tmp_qloop_49 + cp_dof_5*tmp_qloop_50)*_data_q_w[q]; + const real_t tmp_qloop_56 = tmp_qloop_51*tmp_qloop_55; + const real_t tmp_qloop_80 = tmp_qloop_45*tmp_qloop_55; + const real_t tmp_qloop_81 = tmp_qloop_48*tmp_qloop_55; + const real_t tmp_qloop_82 = tmp_qloop_42*tmp_qloop_55; + const real_t tmp_qloop_83 = tmp_qloop_49*tmp_qloop_55; + const real_t tmp_qloop_84 = tmp_qloop_50*tmp_qloop_55; + const real_t jac_blending_inv_0_0 = jac_blending_1_1*tmp_qloop_22; + const real_t jac_blending_inv_0_1 = -jac_blending_0_1*tmp_qloop_22; + const real_t jac_blending_inv_1_0 = -jac_blending_1_0*tmp_qloop_22; + const real_t jac_blending_inv_1_1 = jac_blending_0_0*tmp_qloop_22; + const real_t tmp_qloop_54 = tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_40 + jac_blending_inv_1_0*tmp_qloop_41) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_40 + jac_blending_inv_1_1*tmp_qloop_41); + const real_t tmp_qloop_60 = tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_58 + jac_blending_inv_1_0*tmp_qloop_59) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_58 + jac_blending_inv_1_1*tmp_qloop_59); + const real_t tmp_qloop_64 = tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_62 + jac_blending_inv_1_0*tmp_qloop_63) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_62 + jac_blending_inv_1_1*tmp_qloop_63); + const real_t tmp_qloop_71 = tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_67 + jac_blending_inv_1_0*tmp_qloop_70) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_67 + jac_blending_inv_1_1*tmp_qloop_70); + const real_t tmp_qloop_75 = tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_73 + jac_blending_inv_1_0*tmp_qloop_74) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_73 + jac_blending_inv_1_1*tmp_qloop_74); + const real_t tmp_qloop_79 = tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_77 + jac_blending_inv_1_0*tmp_qloop_78) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_77 + jac_blending_inv_1_1*tmp_qloop_78); + const real_t hessian_blending_0_0_0 = tmp_qloop_14 - tmp_qloop_25 + tmp_qloop_28; + const real_t hessian_blending_1_0_0 = -tmp_qloop_26*(tmp_qloop_3*tmp_qloop_3*tmp_qloop_3) + tmp_qloop_29*tmp_qloop_4 + tmp_qloop_3*tmp_qloop_32 - tmp_qloop_31; + const real_t hessian_blending_0_0_1 = tmp_qloop_1*tmp_qloop_11*tmp_qloop_15*tmp_qloop_30*-2.0 - tmp_qloop_34; + const real_t hessian_blending_1_0_1 = tmp_qloop_1*tmp_qloop_10*tmp_qloop_6*tmp_qloop_9*1.0 - tmp_qloop_20 - tmp_qloop_28 - tmp_qloop_35; + const real_t hessian_blending_0_1_0 = tmp_qloop_10*tmp_qloop_15*tmp_qloop_2*tmp_qloop_8*tmp_qloop_9*1.0 - tmp_qloop_19 - tmp_qloop_31 - tmp_qloop_34; + const real_t hessian_blending_1_1_0 = -tmp_qloop_20 + tmp_qloop_27 - tmp_qloop_35 + tmp_qloop_36; + const real_t hessian_blending_0_1_1 = tmp_qloop_0*tmp_qloop_32 - (tmp_qloop_0*tmp_qloop_0*tmp_qloop_0)*tmp_qloop_26 + tmp_qloop_25 + tmp_qloop_36; + const real_t hessian_blending_1_1_1 = tmp_qloop_10*tmp_qloop_15*tmp_qloop_2*tmp_qloop_7*tmp_qloop_9*1.0 + tmp_qloop_10*tmp_qloop_15*tmp_qloop_4*tmp_qloop_8*tmp_qloop_9*1.0 - tmp_qloop_19 - tmp_qloop_33; + const real_t q_tmp_0_0 = tmp_qloop_54*tmp_qloop_56; + const real_t q_tmp_0_1 = tmp_qloop_56*tmp_qloop_60; + const real_t q_tmp_0_2 = tmp_qloop_56*tmp_qloop_64; + const real_t q_tmp_0_3 = tmp_qloop_56*tmp_qloop_71; + const real_t q_tmp_0_4 = tmp_qloop_56*tmp_qloop_75; + const real_t q_tmp_0_5 = tmp_qloop_56*tmp_qloop_79; + const real_t q_tmp_1_0 = tmp_qloop_54*tmp_qloop_80; + const real_t q_tmp_1_1 = tmp_qloop_60*tmp_qloop_80; + const real_t q_tmp_1_2 = tmp_qloop_64*tmp_qloop_80; + const real_t q_tmp_1_3 = tmp_qloop_71*tmp_qloop_80; + const real_t q_tmp_1_4 = tmp_qloop_75*tmp_qloop_80; + const real_t q_tmp_1_5 = tmp_qloop_79*tmp_qloop_80; + const real_t q_tmp_2_0 = tmp_qloop_54*tmp_qloop_81; + const real_t q_tmp_2_1 = tmp_qloop_60*tmp_qloop_81; + const real_t q_tmp_2_2 = tmp_qloop_64*tmp_qloop_81; + const real_t q_tmp_2_3 = tmp_qloop_71*tmp_qloop_81; + const real_t q_tmp_2_4 = tmp_qloop_75*tmp_qloop_81; + const real_t q_tmp_2_5 = tmp_qloop_79*tmp_qloop_81; + const real_t q_tmp_3_0 = tmp_qloop_54*tmp_qloop_82; + const real_t q_tmp_3_1 = tmp_qloop_60*tmp_qloop_82; + const real_t q_tmp_3_2 = tmp_qloop_64*tmp_qloop_82; + const real_t q_tmp_3_3 = tmp_qloop_71*tmp_qloop_82; + const real_t q_tmp_3_4 = tmp_qloop_75*tmp_qloop_82; + const real_t q_tmp_3_5 = tmp_qloop_79*tmp_qloop_82; + const real_t q_tmp_4_0 = tmp_qloop_54*tmp_qloop_83; + const real_t q_tmp_4_1 = tmp_qloop_60*tmp_qloop_83; + const real_t q_tmp_4_2 = tmp_qloop_64*tmp_qloop_83; + const real_t q_tmp_4_3 = tmp_qloop_71*tmp_qloop_83; + const real_t q_tmp_4_4 = tmp_qloop_75*tmp_qloop_83; + const real_t q_tmp_4_5 = tmp_qloop_79*tmp_qloop_83; + const real_t q_tmp_5_0 = tmp_qloop_54*tmp_qloop_84; + const real_t q_tmp_5_1 = tmp_qloop_60*tmp_qloop_84; + const real_t q_tmp_5_2 = tmp_qloop_64*tmp_qloop_84; + const real_t q_tmp_5_3 = tmp_qloop_71*tmp_qloop_84; + const real_t q_tmp_5_4 = tmp_qloop_75*tmp_qloop_84; + const real_t q_tmp_5_5 = tmp_qloop_79*tmp_qloop_84; + q_acc_0_0 = q_acc_0_0 + q_tmp_0_0; + q_acc_0_1 = q_acc_0_1 + q_tmp_0_1; + q_acc_0_2 = q_acc_0_2 + q_tmp_0_2; + q_acc_0_3 = q_acc_0_3 + q_tmp_0_3; + q_acc_0_4 = q_acc_0_4 + q_tmp_0_4; + q_acc_0_5 = q_acc_0_5 + q_tmp_0_5; + q_acc_1_0 = q_acc_1_0 + q_tmp_1_0; + q_acc_1_1 = q_acc_1_1 + q_tmp_1_1; + q_acc_1_2 = q_acc_1_2 + q_tmp_1_2; + q_acc_1_3 = q_acc_1_3 + q_tmp_1_3; + q_acc_1_4 = q_acc_1_4 + q_tmp_1_4; + q_acc_1_5 = q_acc_1_5 + q_tmp_1_5; + q_acc_2_0 = q_acc_2_0 + q_tmp_2_0; + q_acc_2_1 = q_acc_2_1 + q_tmp_2_1; + q_acc_2_2 = q_acc_2_2 + q_tmp_2_2; + q_acc_2_3 = q_acc_2_3 + q_tmp_2_3; + q_acc_2_4 = q_acc_2_4 + q_tmp_2_4; + q_acc_2_5 = q_acc_2_5 + q_tmp_2_5; + q_acc_3_0 = q_acc_3_0 + q_tmp_3_0; + q_acc_3_1 = q_acc_3_1 + q_tmp_3_1; + q_acc_3_2 = q_acc_3_2 + q_tmp_3_2; + q_acc_3_3 = q_acc_3_3 + q_tmp_3_3; + q_acc_3_4 = q_acc_3_4 + q_tmp_3_4; + q_acc_3_5 = q_acc_3_5 + q_tmp_3_5; + q_acc_4_0 = q_acc_4_0 + q_tmp_4_0; + q_acc_4_1 = q_acc_4_1 + q_tmp_4_1; + q_acc_4_2 = q_acc_4_2 + q_tmp_4_2; + q_acc_4_3 = q_acc_4_3 + q_tmp_4_3; + q_acc_4_4 = q_acc_4_4 + q_tmp_4_4; + q_acc_4_5 = q_acc_4_5 + q_tmp_4_5; + q_acc_5_0 = q_acc_5_0 + q_tmp_5_0; + q_acc_5_1 = q_acc_5_1 + q_tmp_5_1; + q_acc_5_2 = q_acc_5_2 + q_tmp_5_2; + q_acc_5_3 = q_acc_5_3 + q_tmp_5_3; + q_acc_5_4 = q_acc_5_4 + q_tmp_5_4; + q_acc_5_5 = q_acc_5_5 + q_tmp_5_5; + } + const real_t elMatVec_0 = q_acc_0_0*src_dof_0 + q_acc_0_1*src_dof_1 + q_acc_0_2*src_dof_2 + q_acc_0_3*src_dof_3 + q_acc_0_4*src_dof_4 + q_acc_0_5*src_dof_5; + const real_t elMatVec_1 = q_acc_1_0*src_dof_0 + q_acc_1_1*src_dof_1 + q_acc_1_2*src_dof_2 + q_acc_1_3*src_dof_3 + q_acc_1_4*src_dof_4 + q_acc_1_5*src_dof_5; + const real_t elMatVec_2 = q_acc_2_0*src_dof_0 + q_acc_2_1*src_dof_1 + q_acc_2_2*src_dof_2 + q_acc_2_3*src_dof_3 + q_acc_2_4*src_dof_4 + q_acc_2_5*src_dof_5; + const real_t elMatVec_3 = q_acc_3_0*src_dof_0 + q_acc_3_1*src_dof_1 + q_acc_3_2*src_dof_2 + q_acc_3_3*src_dof_3 + q_acc_3_4*src_dof_4 + q_acc_3_5*src_dof_5; + const real_t elMatVec_4 = q_acc_4_0*src_dof_0 + q_acc_4_1*src_dof_1 + q_acc_4_2*src_dof_2 + q_acc_4_3*src_dof_3 + q_acc_4_4*src_dof_4 + q_acc_4_5*src_dof_5; + const real_t elMatVec_5 = q_acc_5_0*src_dof_0 + q_acc_5_1*src_dof_1 + q_acc_5_2*src_dof_2 + q_acc_5_3*src_dof_3 + q_acc_5_4*src_dof_4 + q_acc_5_5*src_dof_5; + _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))] = elMatVec_0 + _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]; + _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1] = elMatVec_1 + _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatVec_2 + _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))] = elMatVec_3 + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))] = elMatVec_4 + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))] = elMatVec_5 + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]; + } + } + const real_t tmp_coords_jac_0_BLUE = 1.0 / (micro_edges_per_macro_edge_float)*1.0; + const real_t tmp_coords_jac_1_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0); + const real_t tmp_coords_jac_2_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1); + const real_t tmp_coords_jac_3_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0); + const real_t tmp_coords_jac_4_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1); + const real_t p_affine_const_0_0_BLUE = tmp_coords_jac_1_BLUE; + const real_t p_affine_const_0_1_BLUE = tmp_coords_jac_2_BLUE; + const real_t p_affine_const_1_0_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_3_BLUE; + const real_t p_affine_const_1_1_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_4_BLUE; + const real_t p_affine_const_2_0_BLUE = tmp_coords_jac_1_BLUE + tmp_coords_jac_3_BLUE; + const real_t p_affine_const_2_1_BLUE = tmp_coords_jac_2_BLUE + tmp_coords_jac_4_BLUE; + const real_t jac_affine_0_0_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_1_0_BLUE; + const real_t jac_affine_0_1_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_2_0_BLUE; + const real_t jac_affine_1_0_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_1_1_BLUE; + const real_t jac_affine_1_1_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_2_1_BLUE; + const real_t tmp_coords_jac_5_BLUE = jac_affine_0_0_BLUE*jac_affine_1_1_BLUE - jac_affine_0_1_BLUE*jac_affine_1_0_BLUE; + const real_t tmp_coords_jac_6_BLUE = 1.0 / (tmp_coords_jac_5_BLUE); + const real_t jac_affine_inv_0_0_BLUE = jac_affine_1_1_BLUE*tmp_coords_jac_6_BLUE; + const real_t jac_affine_inv_0_1_BLUE = -jac_affine_0_1_BLUE*tmp_coords_jac_6_BLUE; + const real_t jac_affine_inv_1_0_BLUE = -jac_affine_1_0_BLUE*tmp_coords_jac_6_BLUE; + const real_t jac_affine_inv_1_1_BLUE = jac_affine_0_0_BLUE*tmp_coords_jac_6_BLUE; + const real_t abs_det_jac_affine_BLUE = abs(tmp_coords_jac_5_BLUE); + { + /* FaceType.BLUE */ + for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1) + for (int64_t ctr_0 = 0; ctr_0 < -ctr_1 + micro_edges_per_macro_edge - 1; ctr_0 += 1) + { + + const int64_t phantom_ctr_0 = ctr_0; + real_t _data_float_loop_ctr_array_dim_0[4]; + _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0; + _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1; + _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2; + _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3; + real_t _data_float_loop_ctr_array_dim_1[4]; + _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1; + + const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; + const real_t src_dof_0 = _data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + const real_t src_dof_1 = _data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t src_dof_2 = _data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]; + const real_t src_dof_3 = _data_srcEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t src_dof_4 = _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]; + const real_t src_dof_5 = _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t cp_dof_0 = _data_cpVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + const real_t cp_dof_1 = _data_cpVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t cp_dof_2 = _data_cpVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]; + const real_t cp_dof_3 = _data_cpEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t cp_dof_4 = _data_cpEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]; + const real_t cp_dof_5 = _data_cpEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t ux_dof_0 = _data_uxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + const real_t ux_dof_1 = _data_uxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t ux_dof_2 = _data_uxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]; + const real_t ux_dof_3 = _data_uxEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t ux_dof_4 = _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]; + const real_t ux_dof_5 = _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t uy_dof_0 = _data_uyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + const real_t uy_dof_1 = _data_uyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t uy_dof_2 = _data_uyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]; + const real_t uy_dof_3 = _data_uyEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t uy_dof_4 = _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]; + const real_t uy_dof_5 = _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + real_t q_acc_0_0 = 0.0; + real_t q_acc_0_1 = 0.0; + real_t q_acc_0_2 = 0.0; + real_t q_acc_0_3 = 0.0; + real_t q_acc_0_4 = 0.0; + real_t q_acc_0_5 = 0.0; + real_t q_acc_1_0 = 0.0; + real_t q_acc_1_1 = 0.0; + real_t q_acc_1_2 = 0.0; + real_t q_acc_1_3 = 0.0; + real_t q_acc_1_4 = 0.0; + real_t q_acc_1_5 = 0.0; + real_t q_acc_2_0 = 0.0; + real_t q_acc_2_1 = 0.0; + real_t q_acc_2_2 = 0.0; + real_t q_acc_2_3 = 0.0; + real_t q_acc_2_4 = 0.0; + real_t q_acc_2_5 = 0.0; + real_t q_acc_3_0 = 0.0; + real_t q_acc_3_1 = 0.0; + real_t q_acc_3_2 = 0.0; + real_t q_acc_3_3 = 0.0; + real_t q_acc_3_4 = 0.0; + real_t q_acc_3_5 = 0.0; + real_t q_acc_4_0 = 0.0; + real_t q_acc_4_1 = 0.0; + real_t q_acc_4_2 = 0.0; + real_t q_acc_4_3 = 0.0; + real_t q_acc_4_4 = 0.0; + real_t q_acc_4_5 = 0.0; + real_t q_acc_5_0 = 0.0; + real_t q_acc_5_1 = 0.0; + real_t q_acc_5_2 = 0.0; + real_t q_acc_5_3 = 0.0; + real_t q_acc_5_4 = 0.0; + real_t q_acc_5_5 = 0.0; + for (int64_t q = 0; q < 4; q += 1) + { + const real_t tmp_qloop_0 = p_affine_0_0 + (-p_affine_0_0 + p_affine_1_0)*_data_q_p_0[q] + (-p_affine_0_0 + p_affine_2_0)*_data_q_p_1[q]; + const real_t tmp_qloop_2 = (tmp_qloop_0*tmp_qloop_0); + const real_t tmp_qloop_3 = p_affine_0_1 + (-p_affine_0_1 + p_affine_1_1)*_data_q_p_0[q] + (-p_affine_0_1 + p_affine_2_1)*_data_q_p_1[q]; + const real_t tmp_qloop_4 = (tmp_qloop_3*tmp_qloop_3); + const real_t tmp_qloop_5 = tmp_qloop_2 + tmp_qloop_4; + const real_t tmp_qloop_6 = pow(tmp_qloop_5, -0.50000000000000000); + const real_t tmp_qloop_13 = tmp_qloop_12*tmp_qloop_6; + const real_t tmp_qloop_14 = tmp_qloop_1*tmp_qloop_13; + const real_t tmp_qloop_15 = pow(tmp_qloop_5, -1.5000000000000000); + const real_t tmp_qloop_16 = radRayVertex + tmp_qloop_11*(tmp_qloop_1*(-rayVertex_0 + tmp_qloop_0) - tmp_qloop_8*(-rayVertex_1 + tmp_qloop_3)); + const real_t tmp_qloop_17 = tmp_qloop_15*tmp_qloop_16; + const real_t tmp_qloop_18 = tmp_qloop_17*1.0; + const real_t tmp_qloop_19 = tmp_qloop_13*tmp_qloop_8; + const real_t tmp_qloop_20 = tmp_qloop_0*tmp_qloop_18; + const real_t tmp_qloop_23 = tmp_qloop_12*tmp_qloop_15; + const real_t tmp_qloop_24 = tmp_qloop_1*tmp_qloop_23; + const real_t tmp_qloop_25 = tmp_qloop_2*tmp_qloop_24; + const real_t tmp_qloop_26 = tmp_qloop_16*pow(tmp_qloop_5, -2.5000000000000000)*3.0; + const real_t tmp_qloop_27 = tmp_qloop_0*tmp_qloop_26*tmp_qloop_4; + const real_t tmp_qloop_28 = tmp_qloop_24*tmp_qloop_4 - tmp_qloop_27; + const real_t tmp_qloop_29 = tmp_qloop_23*tmp_qloop_7; + const real_t tmp_qloop_30 = tmp_qloop_0*tmp_qloop_3; + const real_t tmp_qloop_31 = tmp_qloop_24*tmp_qloop_30; + const real_t tmp_qloop_32 = tmp_qloop_17*2.0; + const real_t tmp_qloop_33 = tmp_qloop_2*tmp_qloop_26*tmp_qloop_3; + const real_t tmp_qloop_34 = tmp_qloop_18*tmp_qloop_3 - tmp_qloop_33; + const real_t tmp_qloop_35 = tmp_qloop_29*tmp_qloop_30; + const real_t tmp_qloop_36 = tmp_qloop_23*tmp_qloop_30*tmp_qloop_8; + const real_t tmp_qloop_37 = 4.0*_data_q_p_0[q]; + const real_t tmp_qloop_38 = 4.0*_data_q_p_1[q]; + const real_t tmp_qloop_39 = tmp_qloop_37 + tmp_qloop_38 - 3.0; + const real_t tmp_qloop_40 = jac_affine_inv_0_0_BLUE*tmp_qloop_39 + jac_affine_inv_1_0_BLUE*tmp_qloop_39; + const real_t tmp_qloop_41 = jac_affine_inv_0_1_BLUE*tmp_qloop_39 + jac_affine_inv_1_1_BLUE*tmp_qloop_39; + const real_t tmp_qloop_42 = tmp_qloop_37*_data_q_p_1[q]; + const real_t tmp_qloop_43 = (_data_q_p_0[q]*_data_q_p_0[q]); + const real_t tmp_qloop_44 = tmp_qloop_43*2.0; + const real_t tmp_qloop_45 = tmp_qloop_44 - _data_q_p_0[q]; + const real_t tmp_qloop_46 = (_data_q_p_1[q]*_data_q_p_1[q]); + const real_t tmp_qloop_47 = tmp_qloop_46*2.0; + const real_t tmp_qloop_48 = tmp_qloop_47 - _data_q_p_1[q]; + const real_t tmp_qloop_49 = tmp_qloop_38 - tmp_qloop_42 + tmp_qloop_46*-4.0; + const real_t tmp_qloop_50 = tmp_qloop_37 - tmp_qloop_42 + tmp_qloop_43*-4.0; + const real_t tmp_qloop_51 = tmp_qloop_42 + tmp_qloop_44 + tmp_qloop_47 - 3.0*_data_q_p_0[q] - 3.0*_data_q_p_1[q] + 1.0; + const real_t tmp_qloop_52 = tmp_qloop_42*ux_dof_3 + tmp_qloop_45*ux_dof_1 + tmp_qloop_48*ux_dof_2 + tmp_qloop_49*ux_dof_4 + tmp_qloop_50*ux_dof_5 + tmp_qloop_51*ux_dof_0; + const real_t tmp_qloop_53 = tmp_qloop_42*uy_dof_3 + tmp_qloop_45*uy_dof_1 + tmp_qloop_48*uy_dof_2 + tmp_qloop_49*uy_dof_4 + tmp_qloop_50*uy_dof_5 + tmp_qloop_51*uy_dof_0; + const real_t tmp_qloop_57 = tmp_qloop_37 - 1.0; + const real_t tmp_qloop_58 = jac_affine_inv_0_0_BLUE*tmp_qloop_57; + const real_t tmp_qloop_59 = jac_affine_inv_0_1_BLUE*tmp_qloop_57; + const real_t tmp_qloop_61 = tmp_qloop_38 - 1.0; + const real_t tmp_qloop_62 = jac_affine_inv_1_0_BLUE*tmp_qloop_61; + const real_t tmp_qloop_63 = jac_affine_inv_1_1_BLUE*tmp_qloop_61; + const real_t tmp_qloop_65 = jac_affine_inv_1_0_BLUE*tmp_qloop_37; + const real_t tmp_qloop_66 = jac_affine_inv_0_0_BLUE*tmp_qloop_38; + const real_t tmp_qloop_67 = tmp_qloop_65 + tmp_qloop_66; + const real_t tmp_qloop_68 = jac_affine_inv_1_1_BLUE*tmp_qloop_37; + const real_t tmp_qloop_69 = jac_affine_inv_0_1_BLUE*tmp_qloop_38; + const real_t tmp_qloop_70 = tmp_qloop_68 + tmp_qloop_69; + const real_t tmp_qloop_72 = -tmp_qloop_37 - 8.0*_data_q_p_1[q] + 4.0; + const real_t tmp_qloop_73 = jac_affine_inv_1_0_BLUE*tmp_qloop_72 - tmp_qloop_66; + const real_t tmp_qloop_74 = jac_affine_inv_1_1_BLUE*tmp_qloop_72 - tmp_qloop_69; + const real_t tmp_qloop_76 = -tmp_qloop_38 - 8.0*_data_q_p_0[q] + 4.0; + const real_t tmp_qloop_77 = jac_affine_inv_0_0_BLUE*tmp_qloop_76 - tmp_qloop_65; + const real_t tmp_qloop_78 = jac_affine_inv_0_1_BLUE*tmp_qloop_76 - tmp_qloop_68; + const real_t jac_blending_0_0 = tmp_qloop_0*tmp_qloop_14 + tmp_qloop_18*tmp_qloop_4; + const real_t jac_blending_0_1 = -tmp_qloop_0*tmp_qloop_15*tmp_qloop_16*tmp_qloop_3 - tmp_qloop_0*tmp_qloop_19; + const real_t jac_blending_1_0 = tmp_qloop_14*tmp_qloop_3 - tmp_qloop_20*tmp_qloop_3; + const real_t jac_blending_1_1 = tmp_qloop_15*tmp_qloop_16*tmp_qloop_2*1.0 - tmp_qloop_19*tmp_qloop_3; + const real_t tmp_qloop_21 = jac_blending_0_0*jac_blending_1_1 - jac_blending_0_1*jac_blending_1_0; + const real_t tmp_qloop_22 = 1.0 / (tmp_qloop_21); + const real_t abs_det_jac_blending = tmp_qloop_21; + const real_t tmp_qloop_55 = abs_det_jac_affine_BLUE*abs_det_jac_blending*(cp_dof_0*tmp_qloop_51 + cp_dof_1*tmp_qloop_45 + cp_dof_2*tmp_qloop_48 + cp_dof_3*tmp_qloop_42 + cp_dof_4*tmp_qloop_49 + cp_dof_5*tmp_qloop_50)*_data_q_w[q]; + const real_t tmp_qloop_56 = tmp_qloop_51*tmp_qloop_55; + const real_t tmp_qloop_80 = tmp_qloop_45*tmp_qloop_55; + const real_t tmp_qloop_81 = tmp_qloop_48*tmp_qloop_55; + const real_t tmp_qloop_82 = tmp_qloop_42*tmp_qloop_55; + const real_t tmp_qloop_83 = tmp_qloop_49*tmp_qloop_55; + const real_t tmp_qloop_84 = tmp_qloop_50*tmp_qloop_55; + const real_t jac_blending_inv_0_0 = jac_blending_1_1*tmp_qloop_22; + const real_t jac_blending_inv_0_1 = -jac_blending_0_1*tmp_qloop_22; + const real_t jac_blending_inv_1_0 = -jac_blending_1_0*tmp_qloop_22; + const real_t jac_blending_inv_1_1 = jac_blending_0_0*tmp_qloop_22; + const real_t tmp_qloop_54 = tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_40 + jac_blending_inv_1_0*tmp_qloop_41) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_40 + jac_blending_inv_1_1*tmp_qloop_41); + const real_t tmp_qloop_60 = tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_58 + jac_blending_inv_1_0*tmp_qloop_59) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_58 + jac_blending_inv_1_1*tmp_qloop_59); + const real_t tmp_qloop_64 = tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_62 + jac_blending_inv_1_0*tmp_qloop_63) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_62 + jac_blending_inv_1_1*tmp_qloop_63); + const real_t tmp_qloop_71 = tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_67 + jac_blending_inv_1_0*tmp_qloop_70) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_67 + jac_blending_inv_1_1*tmp_qloop_70); + const real_t tmp_qloop_75 = tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_73 + jac_blending_inv_1_0*tmp_qloop_74) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_73 + jac_blending_inv_1_1*tmp_qloop_74); + const real_t tmp_qloop_79 = tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_77 + jac_blending_inv_1_0*tmp_qloop_78) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_77 + jac_blending_inv_1_1*tmp_qloop_78); + const real_t hessian_blending_0_0_0 = tmp_qloop_14 - tmp_qloop_25 + tmp_qloop_28; + const real_t hessian_blending_1_0_0 = -tmp_qloop_26*(tmp_qloop_3*tmp_qloop_3*tmp_qloop_3) + tmp_qloop_29*tmp_qloop_4 + tmp_qloop_3*tmp_qloop_32 - tmp_qloop_31; + const real_t hessian_blending_0_0_1 = tmp_qloop_1*tmp_qloop_11*tmp_qloop_15*tmp_qloop_30*-2.0 - tmp_qloop_34; + const real_t hessian_blending_1_0_1 = tmp_qloop_1*tmp_qloop_10*tmp_qloop_6*tmp_qloop_9*1.0 - tmp_qloop_20 - tmp_qloop_28 - tmp_qloop_35; + const real_t hessian_blending_0_1_0 = tmp_qloop_10*tmp_qloop_15*tmp_qloop_2*tmp_qloop_8*tmp_qloop_9*1.0 - tmp_qloop_19 - tmp_qloop_31 - tmp_qloop_34; + const real_t hessian_blending_1_1_0 = -tmp_qloop_20 + tmp_qloop_27 - tmp_qloop_35 + tmp_qloop_36; + const real_t hessian_blending_0_1_1 = tmp_qloop_0*tmp_qloop_32 - (tmp_qloop_0*tmp_qloop_0*tmp_qloop_0)*tmp_qloop_26 + tmp_qloop_25 + tmp_qloop_36; + const real_t hessian_blending_1_1_1 = tmp_qloop_10*tmp_qloop_15*tmp_qloop_2*tmp_qloop_7*tmp_qloop_9*1.0 + tmp_qloop_10*tmp_qloop_15*tmp_qloop_4*tmp_qloop_8*tmp_qloop_9*1.0 - tmp_qloop_19 - tmp_qloop_33; + const real_t q_tmp_0_0 = tmp_qloop_54*tmp_qloop_56; + const real_t q_tmp_0_1 = tmp_qloop_56*tmp_qloop_60; + const real_t q_tmp_0_2 = tmp_qloop_56*tmp_qloop_64; + const real_t q_tmp_0_3 = tmp_qloop_56*tmp_qloop_71; + const real_t q_tmp_0_4 = tmp_qloop_56*tmp_qloop_75; + const real_t q_tmp_0_5 = tmp_qloop_56*tmp_qloop_79; + const real_t q_tmp_1_0 = tmp_qloop_54*tmp_qloop_80; + const real_t q_tmp_1_1 = tmp_qloop_60*tmp_qloop_80; + const real_t q_tmp_1_2 = tmp_qloop_64*tmp_qloop_80; + const real_t q_tmp_1_3 = tmp_qloop_71*tmp_qloop_80; + const real_t q_tmp_1_4 = tmp_qloop_75*tmp_qloop_80; + const real_t q_tmp_1_5 = tmp_qloop_79*tmp_qloop_80; + const real_t q_tmp_2_0 = tmp_qloop_54*tmp_qloop_81; + const real_t q_tmp_2_1 = tmp_qloop_60*tmp_qloop_81; + const real_t q_tmp_2_2 = tmp_qloop_64*tmp_qloop_81; + const real_t q_tmp_2_3 = tmp_qloop_71*tmp_qloop_81; + const real_t q_tmp_2_4 = tmp_qloop_75*tmp_qloop_81; + const real_t q_tmp_2_5 = tmp_qloop_79*tmp_qloop_81; + const real_t q_tmp_3_0 = tmp_qloop_54*tmp_qloop_82; + const real_t q_tmp_3_1 = tmp_qloop_60*tmp_qloop_82; + const real_t q_tmp_3_2 = tmp_qloop_64*tmp_qloop_82; + const real_t q_tmp_3_3 = tmp_qloop_71*tmp_qloop_82; + const real_t q_tmp_3_4 = tmp_qloop_75*tmp_qloop_82; + const real_t q_tmp_3_5 = tmp_qloop_79*tmp_qloop_82; + const real_t q_tmp_4_0 = tmp_qloop_54*tmp_qloop_83; + const real_t q_tmp_4_1 = tmp_qloop_60*tmp_qloop_83; + const real_t q_tmp_4_2 = tmp_qloop_64*tmp_qloop_83; + const real_t q_tmp_4_3 = tmp_qloop_71*tmp_qloop_83; + const real_t q_tmp_4_4 = tmp_qloop_75*tmp_qloop_83; + const real_t q_tmp_4_5 = tmp_qloop_79*tmp_qloop_83; + const real_t q_tmp_5_0 = tmp_qloop_54*tmp_qloop_84; + const real_t q_tmp_5_1 = tmp_qloop_60*tmp_qloop_84; + const real_t q_tmp_5_2 = tmp_qloop_64*tmp_qloop_84; + const real_t q_tmp_5_3 = tmp_qloop_71*tmp_qloop_84; + const real_t q_tmp_5_4 = tmp_qloop_75*tmp_qloop_84; + const real_t q_tmp_5_5 = tmp_qloop_79*tmp_qloop_84; + q_acc_0_0 = q_acc_0_0 + q_tmp_0_0; + q_acc_0_1 = q_acc_0_1 + q_tmp_0_1; + q_acc_0_2 = q_acc_0_2 + q_tmp_0_2; + q_acc_0_3 = q_acc_0_3 + q_tmp_0_3; + q_acc_0_4 = q_acc_0_4 + q_tmp_0_4; + q_acc_0_5 = q_acc_0_5 + q_tmp_0_5; + q_acc_1_0 = q_acc_1_0 + q_tmp_1_0; + q_acc_1_1 = q_acc_1_1 + q_tmp_1_1; + q_acc_1_2 = q_acc_1_2 + q_tmp_1_2; + q_acc_1_3 = q_acc_1_3 + q_tmp_1_3; + q_acc_1_4 = q_acc_1_4 + q_tmp_1_4; + q_acc_1_5 = q_acc_1_5 + q_tmp_1_5; + q_acc_2_0 = q_acc_2_0 + q_tmp_2_0; + q_acc_2_1 = q_acc_2_1 + q_tmp_2_1; + q_acc_2_2 = q_acc_2_2 + q_tmp_2_2; + q_acc_2_3 = q_acc_2_3 + q_tmp_2_3; + q_acc_2_4 = q_acc_2_4 + q_tmp_2_4; + q_acc_2_5 = q_acc_2_5 + q_tmp_2_5; + q_acc_3_0 = q_acc_3_0 + q_tmp_3_0; + q_acc_3_1 = q_acc_3_1 + q_tmp_3_1; + q_acc_3_2 = q_acc_3_2 + q_tmp_3_2; + q_acc_3_3 = q_acc_3_3 + q_tmp_3_3; + q_acc_3_4 = q_acc_3_4 + q_tmp_3_4; + q_acc_3_5 = q_acc_3_5 + q_tmp_3_5; + q_acc_4_0 = q_acc_4_0 + q_tmp_4_0; + q_acc_4_1 = q_acc_4_1 + q_tmp_4_1; + q_acc_4_2 = q_acc_4_2 + q_tmp_4_2; + q_acc_4_3 = q_acc_4_3 + q_tmp_4_3; + q_acc_4_4 = q_acc_4_4 + q_tmp_4_4; + q_acc_4_5 = q_acc_4_5 + q_tmp_4_5; + q_acc_5_0 = q_acc_5_0 + q_tmp_5_0; + q_acc_5_1 = q_acc_5_1 + q_tmp_5_1; + q_acc_5_2 = q_acc_5_2 + q_tmp_5_2; + q_acc_5_3 = q_acc_5_3 + q_tmp_5_3; + q_acc_5_4 = q_acc_5_4 + q_tmp_5_4; + q_acc_5_5 = q_acc_5_5 + q_tmp_5_5; + } + const real_t elMatVec_0 = q_acc_0_0*src_dof_0 + q_acc_0_1*src_dof_1 + q_acc_0_2*src_dof_2 + q_acc_0_3*src_dof_3 + q_acc_0_4*src_dof_4 + q_acc_0_5*src_dof_5; + const real_t elMatVec_1 = q_acc_1_0*src_dof_0 + q_acc_1_1*src_dof_1 + q_acc_1_2*src_dof_2 + q_acc_1_3*src_dof_3 + q_acc_1_4*src_dof_4 + q_acc_1_5*src_dof_5; + const real_t elMatVec_2 = q_acc_2_0*src_dof_0 + q_acc_2_1*src_dof_1 + q_acc_2_2*src_dof_2 + q_acc_2_3*src_dof_3 + q_acc_2_4*src_dof_4 + q_acc_2_5*src_dof_5; + const real_t elMatVec_3 = q_acc_3_0*src_dof_0 + q_acc_3_1*src_dof_1 + q_acc_3_2*src_dof_2 + q_acc_3_3*src_dof_3 + q_acc_3_4*src_dof_4 + q_acc_3_5*src_dof_5; + const real_t elMatVec_4 = q_acc_4_0*src_dof_0 + q_acc_4_1*src_dof_1 + q_acc_4_2*src_dof_2 + q_acc_4_3*src_dof_3 + q_acc_4_4*src_dof_4 + q_acc_4_5*src_dof_5; + const real_t elMatVec_5 = q_acc_5_0*src_dof_0 + q_acc_5_1*src_dof_1 + q_acc_5_2*src_dof_2 + q_acc_5_3*src_dof_3 + q_acc_5_4*src_dof_4 + q_acc_5_5*src_dof_5; + _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1] = elMatVec_0 + _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatVec_1 + _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1] = elMatVec_2 + _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]; + _data_dstEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatVec_3 + _data_dstEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1] = elMatVec_4 + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]; + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))] = elMatVec_5 + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + } + } + } +} +} // namespace operatorgeneration + +} // namespace hyteg diff --git a/operators/advection/noarch/P2ElementwiseAdvectionAnnulusMap_computeInverseDiagonalOperatorValues_P2ElementwiseAdvectionAnnulusMap_macro_2D.cpp b/operators/advection/noarch/P2ElementwiseAdvectionAnnulusMap_computeInverseDiagonalOperatorValues_P2ElementwiseAdvectionAnnulusMap_macro_2D.cpp new file mode 100644 index 0000000000000000000000000000000000000000..b2432dfc1d0fdc704d974e2d0a00c805f8202010 --- /dev/null +++ b/operators/advection/noarch/P2ElementwiseAdvectionAnnulusMap_computeInverseDiagonalOperatorValues_P2ElementwiseAdvectionAnnulusMap_macro_2D.cpp @@ -0,0 +1,445 @@ +/* +* Copyright (c) 2017-2024 Nils Kohl, Daniel Bauer, Fabian Böhm. +* +* This file is part of HyTeG +* (see https://i10git.cs.fau.de/hyteg/hyteg). +* +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. +* +* You should have received a copy of the GNU General Public License +* along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +/* +* The entire file was generated with the HyTeG Operator Generator. +* +* Avoid modifying this file. If buggy, consider fixing the generator itself. +*/ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +#include "../P2ElementwiseAdvectionAnnulusMap.hpp" + +#define FUNC_PREFIX + +namespace hyteg { + +namespace operatorgeneration { + +void P2ElementwiseAdvectionAnnulusMap::computeInverseDiagonalOperatorValues_P2ElementwiseAdvectionAnnulusMap_macro_2D( real_t * RESTRICT _data_cpEdge, real_t * RESTRICT _data_cpVertex, real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_uxEdge, real_t * RESTRICT _data_uxVertex, real_t * RESTRICT _data_uyEdge, real_t * RESTRICT _data_uyVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const +{ + { + const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669}; + + const real_t _data_q_p_0 [] = {0.33333333333333331, 0.20000000000000001, 0.59999999999999998, 0.20000000000000001}; + + const real_t _data_q_p_1 [] = {0.33333333333333331, 0.59999999999999998, 0.20000000000000001, 0.20000000000000001}; + + const real_t tmp_coords_jac_0_GRAY = 1.0 / (micro_edges_per_macro_edge_float)*1.0; + const real_t p_affine_const_0_0_GRAY = macro_vertex_coord_id_0comp0; + const real_t p_affine_const_0_1_GRAY = macro_vertex_coord_id_0comp1; + const real_t p_affine_const_1_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0); + const real_t p_affine_const_1_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1); + const real_t p_affine_const_2_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0); + const real_t p_affine_const_2_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1); + const real_t jac_affine_0_0_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_1_0_GRAY; + const real_t jac_affine_0_1_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_2_0_GRAY; + const real_t jac_affine_1_0_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_1_1_GRAY; + const real_t jac_affine_1_1_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_2_1_GRAY; + const real_t tmp_coords_jac_1_GRAY = jac_affine_0_0_GRAY*jac_affine_1_1_GRAY - jac_affine_0_1_GRAY*jac_affine_1_0_GRAY; + const real_t tmp_coords_jac_2_GRAY = 1.0 / (tmp_coords_jac_1_GRAY); + const real_t jac_affine_inv_0_0_GRAY = jac_affine_1_1_GRAY*tmp_coords_jac_2_GRAY; + const real_t jac_affine_inv_0_1_GRAY = -jac_affine_0_1_GRAY*tmp_coords_jac_2_GRAY; + const real_t jac_affine_inv_1_0_GRAY = -jac_affine_1_0_GRAY*tmp_coords_jac_2_GRAY; + const real_t jac_affine_inv_1_1_GRAY = jac_affine_0_0_GRAY*tmp_coords_jac_2_GRAY; + const real_t abs_det_jac_affine_GRAY = abs(tmp_coords_jac_1_GRAY); + const real_t tmp_qloop_1 = -rayVertex_1 + thrVertex_1; + const real_t tmp_qloop_7 = rayVertex_0 - thrVertex_0; + const real_t tmp_qloop_8 = -tmp_qloop_7; + const real_t tmp_qloop_9 = 1.0 / (tmp_qloop_1*(-rayVertex_0 + refVertex_0) - tmp_qloop_8*(-rayVertex_1 + refVertex_1)); + const real_t tmp_qloop_10 = -radRayVertex + radRefVertex; + const real_t tmp_qloop_11 = tmp_qloop_10*tmp_qloop_9; + const real_t tmp_qloop_12 = tmp_qloop_11*1.0; + { + /* FaceType.GRAY */ + for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1) + for (int64_t ctr_0 = 0; ctr_0 < -ctr_1 + micro_edges_per_macro_edge; ctr_0 += 1) + { + + const int64_t phantom_ctr_0 = ctr_0; + real_t _data_float_loop_ctr_array_dim_0[4]; + _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0; + _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1; + _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2; + _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3; + real_t _data_float_loop_ctr_array_dim_1[4]; + _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1; + + const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; + const real_t cp_dof_0 = _data_cpVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]; + const real_t cp_dof_1 = _data_cpVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + const real_t cp_dof_2 = _data_cpVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t cp_dof_3 = _data_cpEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t cp_dof_4 = _data_cpEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t cp_dof_5 = _data_cpEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]; + const real_t ux_dof_0 = _data_uxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]; + const real_t ux_dof_1 = _data_uxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + const real_t ux_dof_2 = _data_uxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t ux_dof_3 = _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t ux_dof_4 = _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t ux_dof_5 = _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]; + const real_t uy_dof_0 = _data_uyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]; + const real_t uy_dof_1 = _data_uyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + const real_t uy_dof_2 = _data_uyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t uy_dof_3 = _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t uy_dof_4 = _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t uy_dof_5 = _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]; + real_t q_acc_0_0 = 0.0; + real_t q_acc_1_1 = 0.0; + real_t q_acc_2_2 = 0.0; + real_t q_acc_3_3 = 0.0; + real_t q_acc_4_4 = 0.0; + real_t q_acc_5_5 = 0.0; + for (int64_t q = 0; q < 4; q += 1) + { + const real_t tmp_qloop_0 = p_affine_0_0 + (-p_affine_0_0 + p_affine_1_0)*_data_q_p_0[q] + (-p_affine_0_0 + p_affine_2_0)*_data_q_p_1[q]; + const real_t tmp_qloop_2 = (tmp_qloop_0*tmp_qloop_0); + const real_t tmp_qloop_3 = p_affine_0_1 + (-p_affine_0_1 + p_affine_1_1)*_data_q_p_0[q] + (-p_affine_0_1 + p_affine_2_1)*_data_q_p_1[q]; + const real_t tmp_qloop_4 = (tmp_qloop_3*tmp_qloop_3); + const real_t tmp_qloop_5 = tmp_qloop_2 + tmp_qloop_4; + const real_t tmp_qloop_6 = pow(tmp_qloop_5, -0.50000000000000000); + const real_t tmp_qloop_13 = tmp_qloop_12*tmp_qloop_6; + const real_t tmp_qloop_14 = tmp_qloop_1*tmp_qloop_13; + const real_t tmp_qloop_15 = pow(tmp_qloop_5, -1.5000000000000000); + const real_t tmp_qloop_16 = radRayVertex + tmp_qloop_11*(tmp_qloop_1*(-rayVertex_0 + tmp_qloop_0) - tmp_qloop_8*(-rayVertex_1 + tmp_qloop_3)); + const real_t tmp_qloop_17 = tmp_qloop_15*tmp_qloop_16; + const real_t tmp_qloop_18 = tmp_qloop_17*1.0; + const real_t tmp_qloop_19 = tmp_qloop_13*tmp_qloop_8; + const real_t tmp_qloop_20 = tmp_qloop_0*tmp_qloop_18; + const real_t tmp_qloop_23 = tmp_qloop_12*tmp_qloop_15; + const real_t tmp_qloop_24 = tmp_qloop_1*tmp_qloop_23; + const real_t tmp_qloop_25 = tmp_qloop_2*tmp_qloop_24; + const real_t tmp_qloop_26 = tmp_qloop_16*pow(tmp_qloop_5, -2.5000000000000000)*3.0; + const real_t tmp_qloop_27 = tmp_qloop_0*tmp_qloop_26*tmp_qloop_4; + const real_t tmp_qloop_28 = tmp_qloop_24*tmp_qloop_4 - tmp_qloop_27; + const real_t tmp_qloop_29 = tmp_qloop_23*tmp_qloop_7; + const real_t tmp_qloop_30 = tmp_qloop_0*tmp_qloop_3; + const real_t tmp_qloop_31 = tmp_qloop_24*tmp_qloop_30; + const real_t tmp_qloop_32 = tmp_qloop_17*2.0; + const real_t tmp_qloop_33 = tmp_qloop_2*tmp_qloop_26*tmp_qloop_3; + const real_t tmp_qloop_34 = tmp_qloop_18*tmp_qloop_3 - tmp_qloop_33; + const real_t tmp_qloop_35 = tmp_qloop_29*tmp_qloop_30; + const real_t tmp_qloop_36 = tmp_qloop_23*tmp_qloop_30*tmp_qloop_8; + const real_t tmp_qloop_37 = 4.0*_data_q_p_0[q]; + const real_t tmp_qloop_38 = tmp_qloop_37*_data_q_p_1[q]; + const real_t tmp_qloop_39 = (_data_q_p_0[q]*_data_q_p_0[q]); + const real_t tmp_qloop_40 = tmp_qloop_39*2.0; + const real_t tmp_qloop_41 = (_data_q_p_1[q]*_data_q_p_1[q]); + const real_t tmp_qloop_42 = tmp_qloop_41*2.0; + const real_t tmp_qloop_43 = tmp_qloop_38 + tmp_qloop_40 + tmp_qloop_42 - 3.0*_data_q_p_0[q] - 3.0*_data_q_p_1[q] + 1.0; + const real_t tmp_qloop_44 = 4.0*_data_q_p_1[q]; + const real_t tmp_qloop_45 = tmp_qloop_37 + tmp_qloop_44 - 3.0; + const real_t tmp_qloop_46 = jac_affine_inv_0_0_GRAY*tmp_qloop_45 + jac_affine_inv_1_0_GRAY*tmp_qloop_45; + const real_t tmp_qloop_47 = jac_affine_inv_0_1_GRAY*tmp_qloop_45 + jac_affine_inv_1_1_GRAY*tmp_qloop_45; + const real_t tmp_qloop_48 = tmp_qloop_40 - _data_q_p_0[q]; + const real_t tmp_qloop_49 = tmp_qloop_42 - _data_q_p_1[q]; + const real_t tmp_qloop_50 = -tmp_qloop_38 + tmp_qloop_41*-4.0 + tmp_qloop_44; + const real_t tmp_qloop_51 = tmp_qloop_37 - tmp_qloop_38 + tmp_qloop_39*-4.0; + const real_t tmp_qloop_52 = tmp_qloop_38*ux_dof_3 + tmp_qloop_43*ux_dof_0 + tmp_qloop_48*ux_dof_1 + tmp_qloop_49*ux_dof_2 + tmp_qloop_50*ux_dof_4 + tmp_qloop_51*ux_dof_5; + const real_t tmp_qloop_53 = tmp_qloop_38*uy_dof_3 + tmp_qloop_43*uy_dof_0 + tmp_qloop_48*uy_dof_1 + tmp_qloop_49*uy_dof_2 + tmp_qloop_50*uy_dof_4 + tmp_qloop_51*uy_dof_5; + const real_t tmp_qloop_55 = tmp_qloop_37 - 1.0; + const real_t tmp_qloop_56 = jac_affine_inv_0_0_GRAY*tmp_qloop_55; + const real_t tmp_qloop_57 = jac_affine_inv_0_1_GRAY*tmp_qloop_55; + const real_t tmp_qloop_58 = tmp_qloop_44 - 1.0; + const real_t tmp_qloop_59 = jac_affine_inv_1_0_GRAY*tmp_qloop_58; + const real_t tmp_qloop_60 = jac_affine_inv_1_1_GRAY*tmp_qloop_58; + const real_t tmp_qloop_61 = jac_affine_inv_1_0_GRAY*tmp_qloop_37; + const real_t tmp_qloop_62 = jac_affine_inv_0_0_GRAY*tmp_qloop_44; + const real_t tmp_qloop_63 = tmp_qloop_61 + tmp_qloop_62; + const real_t tmp_qloop_64 = jac_affine_inv_1_1_GRAY*tmp_qloop_37; + const real_t tmp_qloop_65 = jac_affine_inv_0_1_GRAY*tmp_qloop_44; + const real_t tmp_qloop_66 = tmp_qloop_64 + tmp_qloop_65; + const real_t tmp_qloop_67 = -tmp_qloop_37 - 8.0*_data_q_p_1[q] + 4.0; + const real_t tmp_qloop_68 = jac_affine_inv_1_0_GRAY*tmp_qloop_67 - tmp_qloop_62; + const real_t tmp_qloop_69 = jac_affine_inv_1_1_GRAY*tmp_qloop_67 - tmp_qloop_65; + const real_t tmp_qloop_70 = -tmp_qloop_44 - 8.0*_data_q_p_0[q] + 4.0; + const real_t tmp_qloop_71 = jac_affine_inv_0_0_GRAY*tmp_qloop_70 - tmp_qloop_61; + const real_t tmp_qloop_72 = jac_affine_inv_0_1_GRAY*tmp_qloop_70 - tmp_qloop_64; + const real_t jac_blending_0_0 = tmp_qloop_0*tmp_qloop_14 + tmp_qloop_18*tmp_qloop_4; + const real_t jac_blending_0_1 = -tmp_qloop_0*tmp_qloop_15*tmp_qloop_16*tmp_qloop_3 - tmp_qloop_0*tmp_qloop_19; + const real_t jac_blending_1_0 = tmp_qloop_14*tmp_qloop_3 - tmp_qloop_20*tmp_qloop_3; + const real_t jac_blending_1_1 = tmp_qloop_15*tmp_qloop_16*tmp_qloop_2*1.0 - tmp_qloop_19*tmp_qloop_3; + const real_t tmp_qloop_21 = jac_blending_0_0*jac_blending_1_1 - jac_blending_0_1*jac_blending_1_0; + const real_t tmp_qloop_22 = 1.0 / (tmp_qloop_21); + const real_t abs_det_jac_blending = tmp_qloop_21; + const real_t tmp_qloop_54 = abs_det_jac_affine_GRAY*abs_det_jac_blending*(cp_dof_0*tmp_qloop_43 + cp_dof_1*tmp_qloop_48 + cp_dof_2*tmp_qloop_49 + cp_dof_3*tmp_qloop_38 + cp_dof_4*tmp_qloop_50 + cp_dof_5*tmp_qloop_51)*_data_q_w[q]; + const real_t jac_blending_inv_0_0 = jac_blending_1_1*tmp_qloop_22; + const real_t jac_blending_inv_0_1 = -jac_blending_0_1*tmp_qloop_22; + const real_t jac_blending_inv_1_0 = -jac_blending_1_0*tmp_qloop_22; + const real_t jac_blending_inv_1_1 = jac_blending_0_0*tmp_qloop_22; + const real_t hessian_blending_0_0_0 = tmp_qloop_14 - tmp_qloop_25 + tmp_qloop_28; + const real_t hessian_blending_1_0_0 = -tmp_qloop_26*(tmp_qloop_3*tmp_qloop_3*tmp_qloop_3) + tmp_qloop_29*tmp_qloop_4 + tmp_qloop_3*tmp_qloop_32 - tmp_qloop_31; + const real_t hessian_blending_0_0_1 = tmp_qloop_1*tmp_qloop_11*tmp_qloop_15*tmp_qloop_30*-2.0 - tmp_qloop_34; + const real_t hessian_blending_1_0_1 = tmp_qloop_1*tmp_qloop_10*tmp_qloop_6*tmp_qloop_9*1.0 - tmp_qloop_20 - tmp_qloop_28 - tmp_qloop_35; + const real_t hessian_blending_0_1_0 = tmp_qloop_10*tmp_qloop_15*tmp_qloop_2*tmp_qloop_8*tmp_qloop_9*1.0 - tmp_qloop_19 - tmp_qloop_31 - tmp_qloop_34; + const real_t hessian_blending_1_1_0 = -tmp_qloop_20 + tmp_qloop_27 - tmp_qloop_35 + tmp_qloop_36; + const real_t hessian_blending_0_1_1 = tmp_qloop_0*tmp_qloop_32 - (tmp_qloop_0*tmp_qloop_0*tmp_qloop_0)*tmp_qloop_26 + tmp_qloop_25 + tmp_qloop_36; + const real_t hessian_blending_1_1_1 = tmp_qloop_10*tmp_qloop_15*tmp_qloop_2*tmp_qloop_7*tmp_qloop_9*1.0 + tmp_qloop_10*tmp_qloop_15*tmp_qloop_4*tmp_qloop_8*tmp_qloop_9*1.0 - tmp_qloop_19 - tmp_qloop_33; + const real_t q_tmp_0_0 = tmp_qloop_43*tmp_qloop_54*(tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_46 + jac_blending_inv_1_0*tmp_qloop_47) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_46 + jac_blending_inv_1_1*tmp_qloop_47)); + const real_t q_tmp_1_1 = tmp_qloop_48*tmp_qloop_54*(tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_56 + jac_blending_inv_1_0*tmp_qloop_57) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_56 + jac_blending_inv_1_1*tmp_qloop_57)); + const real_t q_tmp_2_2 = tmp_qloop_49*tmp_qloop_54*(tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_59 + jac_blending_inv_1_0*tmp_qloop_60) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_59 + jac_blending_inv_1_1*tmp_qloop_60)); + const real_t q_tmp_3_3 = tmp_qloop_38*tmp_qloop_54*(tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_63 + jac_blending_inv_1_0*tmp_qloop_66) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_63 + jac_blending_inv_1_1*tmp_qloop_66)); + const real_t q_tmp_4_4 = tmp_qloop_50*tmp_qloop_54*(tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_68 + jac_blending_inv_1_0*tmp_qloop_69) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_68 + jac_blending_inv_1_1*tmp_qloop_69)); + const real_t q_tmp_5_5 = tmp_qloop_51*tmp_qloop_54*(tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_71 + jac_blending_inv_1_0*tmp_qloop_72) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_71 + jac_blending_inv_1_1*tmp_qloop_72)); + q_acc_0_0 = q_acc_0_0 + q_tmp_0_0; + q_acc_1_1 = q_acc_1_1 + q_tmp_1_1; + q_acc_2_2 = q_acc_2_2 + q_tmp_2_2; + q_acc_3_3 = q_acc_3_3 + q_tmp_3_3; + q_acc_4_4 = q_acc_4_4 + q_tmp_4_4; + q_acc_5_5 = q_acc_5_5 + q_tmp_5_5; + } + const real_t elMatDiag_0 = q_acc_0_0; + const real_t elMatDiag_1 = q_acc_1_1; + const real_t elMatDiag_2 = q_acc_2_2; + const real_t elMatDiag_3 = q_acc_3_3; + const real_t elMatDiag_4 = q_acc_4_4; + const real_t elMatDiag_5 = q_acc_5_5; + _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))] = elMatDiag_0 + _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]; + _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1] = elMatDiag_1 + _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatDiag_2 + _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))] = elMatDiag_3 + _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))] = elMatDiag_4 + _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))] = elMatDiag_5 + _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]; + } + } + const real_t tmp_coords_jac_0_BLUE = 1.0 / (micro_edges_per_macro_edge_float)*1.0; + const real_t tmp_coords_jac_1_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0); + const real_t tmp_coords_jac_2_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1); + const real_t tmp_coords_jac_3_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0); + const real_t tmp_coords_jac_4_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1); + const real_t p_affine_const_0_0_BLUE = tmp_coords_jac_1_BLUE; + const real_t p_affine_const_0_1_BLUE = tmp_coords_jac_2_BLUE; + const real_t p_affine_const_1_0_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_3_BLUE; + const real_t p_affine_const_1_1_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_4_BLUE; + const real_t p_affine_const_2_0_BLUE = tmp_coords_jac_1_BLUE + tmp_coords_jac_3_BLUE; + const real_t p_affine_const_2_1_BLUE = tmp_coords_jac_2_BLUE + tmp_coords_jac_4_BLUE; + const real_t jac_affine_0_0_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_1_0_BLUE; + const real_t jac_affine_0_1_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_2_0_BLUE; + const real_t jac_affine_1_0_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_1_1_BLUE; + const real_t jac_affine_1_1_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_2_1_BLUE; + const real_t tmp_coords_jac_5_BLUE = jac_affine_0_0_BLUE*jac_affine_1_1_BLUE - jac_affine_0_1_BLUE*jac_affine_1_0_BLUE; + const real_t tmp_coords_jac_6_BLUE = 1.0 / (tmp_coords_jac_5_BLUE); + const real_t jac_affine_inv_0_0_BLUE = jac_affine_1_1_BLUE*tmp_coords_jac_6_BLUE; + const real_t jac_affine_inv_0_1_BLUE = -jac_affine_0_1_BLUE*tmp_coords_jac_6_BLUE; + const real_t jac_affine_inv_1_0_BLUE = -jac_affine_1_0_BLUE*tmp_coords_jac_6_BLUE; + const real_t jac_affine_inv_1_1_BLUE = jac_affine_0_0_BLUE*tmp_coords_jac_6_BLUE; + const real_t abs_det_jac_affine_BLUE = abs(tmp_coords_jac_5_BLUE); + { + /* FaceType.BLUE */ + for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1) + for (int64_t ctr_0 = 0; ctr_0 < -ctr_1 + micro_edges_per_macro_edge - 1; ctr_0 += 1) + { + + const int64_t phantom_ctr_0 = ctr_0; + real_t _data_float_loop_ctr_array_dim_0[4]; + _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0; + _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1; + _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2; + _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3; + real_t _data_float_loop_ctr_array_dim_1[4]; + _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1; + + const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; + const real_t cp_dof_0 = _data_cpVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + const real_t cp_dof_1 = _data_cpVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t cp_dof_2 = _data_cpVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]; + const real_t cp_dof_3 = _data_cpEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t cp_dof_4 = _data_cpEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]; + const real_t cp_dof_5 = _data_cpEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t ux_dof_0 = _data_uxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + const real_t ux_dof_1 = _data_uxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t ux_dof_2 = _data_uxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]; + const real_t ux_dof_3 = _data_uxEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t ux_dof_4 = _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]; + const real_t ux_dof_5 = _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t uy_dof_0 = _data_uyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + const real_t uy_dof_1 = _data_uyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t uy_dof_2 = _data_uyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]; + const real_t uy_dof_3 = _data_uyEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t uy_dof_4 = _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]; + const real_t uy_dof_5 = _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + real_t q_acc_0_0 = 0.0; + real_t q_acc_1_1 = 0.0; + real_t q_acc_2_2 = 0.0; + real_t q_acc_3_3 = 0.0; + real_t q_acc_4_4 = 0.0; + real_t q_acc_5_5 = 0.0; + for (int64_t q = 0; q < 4; q += 1) + { + const real_t tmp_qloop_0 = p_affine_0_0 + (-p_affine_0_0 + p_affine_1_0)*_data_q_p_0[q] + (-p_affine_0_0 + p_affine_2_0)*_data_q_p_1[q]; + const real_t tmp_qloop_2 = (tmp_qloop_0*tmp_qloop_0); + const real_t tmp_qloop_3 = p_affine_0_1 + (-p_affine_0_1 + p_affine_1_1)*_data_q_p_0[q] + (-p_affine_0_1 + p_affine_2_1)*_data_q_p_1[q]; + const real_t tmp_qloop_4 = (tmp_qloop_3*tmp_qloop_3); + const real_t tmp_qloop_5 = tmp_qloop_2 + tmp_qloop_4; + const real_t tmp_qloop_6 = pow(tmp_qloop_5, -0.50000000000000000); + const real_t tmp_qloop_13 = tmp_qloop_12*tmp_qloop_6; + const real_t tmp_qloop_14 = tmp_qloop_1*tmp_qloop_13; + const real_t tmp_qloop_15 = pow(tmp_qloop_5, -1.5000000000000000); + const real_t tmp_qloop_16 = radRayVertex + tmp_qloop_11*(tmp_qloop_1*(-rayVertex_0 + tmp_qloop_0) - tmp_qloop_8*(-rayVertex_1 + tmp_qloop_3)); + const real_t tmp_qloop_17 = tmp_qloop_15*tmp_qloop_16; + const real_t tmp_qloop_18 = tmp_qloop_17*1.0; + const real_t tmp_qloop_19 = tmp_qloop_13*tmp_qloop_8; + const real_t tmp_qloop_20 = tmp_qloop_0*tmp_qloop_18; + const real_t tmp_qloop_23 = tmp_qloop_12*tmp_qloop_15; + const real_t tmp_qloop_24 = tmp_qloop_1*tmp_qloop_23; + const real_t tmp_qloop_25 = tmp_qloop_2*tmp_qloop_24; + const real_t tmp_qloop_26 = tmp_qloop_16*pow(tmp_qloop_5, -2.5000000000000000)*3.0; + const real_t tmp_qloop_27 = tmp_qloop_0*tmp_qloop_26*tmp_qloop_4; + const real_t tmp_qloop_28 = tmp_qloop_24*tmp_qloop_4 - tmp_qloop_27; + const real_t tmp_qloop_29 = tmp_qloop_23*tmp_qloop_7; + const real_t tmp_qloop_30 = tmp_qloop_0*tmp_qloop_3; + const real_t tmp_qloop_31 = tmp_qloop_24*tmp_qloop_30; + const real_t tmp_qloop_32 = tmp_qloop_17*2.0; + const real_t tmp_qloop_33 = tmp_qloop_2*tmp_qloop_26*tmp_qloop_3; + const real_t tmp_qloop_34 = tmp_qloop_18*tmp_qloop_3 - tmp_qloop_33; + const real_t tmp_qloop_35 = tmp_qloop_29*tmp_qloop_30; + const real_t tmp_qloop_36 = tmp_qloop_23*tmp_qloop_30*tmp_qloop_8; + const real_t tmp_qloop_37 = 4.0*_data_q_p_0[q]; + const real_t tmp_qloop_38 = tmp_qloop_37*_data_q_p_1[q]; + const real_t tmp_qloop_39 = (_data_q_p_0[q]*_data_q_p_0[q]); + const real_t tmp_qloop_40 = tmp_qloop_39*2.0; + const real_t tmp_qloop_41 = (_data_q_p_1[q]*_data_q_p_1[q]); + const real_t tmp_qloop_42 = tmp_qloop_41*2.0; + const real_t tmp_qloop_43 = tmp_qloop_38 + tmp_qloop_40 + tmp_qloop_42 - 3.0*_data_q_p_0[q] - 3.0*_data_q_p_1[q] + 1.0; + const real_t tmp_qloop_44 = 4.0*_data_q_p_1[q]; + const real_t tmp_qloop_45 = tmp_qloop_37 + tmp_qloop_44 - 3.0; + const real_t tmp_qloop_46 = jac_affine_inv_0_0_BLUE*tmp_qloop_45 + jac_affine_inv_1_0_BLUE*tmp_qloop_45; + const real_t tmp_qloop_47 = jac_affine_inv_0_1_BLUE*tmp_qloop_45 + jac_affine_inv_1_1_BLUE*tmp_qloop_45; + const real_t tmp_qloop_48 = tmp_qloop_40 - _data_q_p_0[q]; + const real_t tmp_qloop_49 = tmp_qloop_42 - _data_q_p_1[q]; + const real_t tmp_qloop_50 = -tmp_qloop_38 + tmp_qloop_41*-4.0 + tmp_qloop_44; + const real_t tmp_qloop_51 = tmp_qloop_37 - tmp_qloop_38 + tmp_qloop_39*-4.0; + const real_t tmp_qloop_52 = tmp_qloop_38*ux_dof_3 + tmp_qloop_43*ux_dof_0 + tmp_qloop_48*ux_dof_1 + tmp_qloop_49*ux_dof_2 + tmp_qloop_50*ux_dof_4 + tmp_qloop_51*ux_dof_5; + const real_t tmp_qloop_53 = tmp_qloop_38*uy_dof_3 + tmp_qloop_43*uy_dof_0 + tmp_qloop_48*uy_dof_1 + tmp_qloop_49*uy_dof_2 + tmp_qloop_50*uy_dof_4 + tmp_qloop_51*uy_dof_5; + const real_t tmp_qloop_55 = tmp_qloop_37 - 1.0; + const real_t tmp_qloop_56 = jac_affine_inv_0_0_BLUE*tmp_qloop_55; + const real_t tmp_qloop_57 = jac_affine_inv_0_1_BLUE*tmp_qloop_55; + const real_t tmp_qloop_58 = tmp_qloop_44 - 1.0; + const real_t tmp_qloop_59 = jac_affine_inv_1_0_BLUE*tmp_qloop_58; + const real_t tmp_qloop_60 = jac_affine_inv_1_1_BLUE*tmp_qloop_58; + const real_t tmp_qloop_61 = jac_affine_inv_1_0_BLUE*tmp_qloop_37; + const real_t tmp_qloop_62 = jac_affine_inv_0_0_BLUE*tmp_qloop_44; + const real_t tmp_qloop_63 = tmp_qloop_61 + tmp_qloop_62; + const real_t tmp_qloop_64 = jac_affine_inv_1_1_BLUE*tmp_qloop_37; + const real_t tmp_qloop_65 = jac_affine_inv_0_1_BLUE*tmp_qloop_44; + const real_t tmp_qloop_66 = tmp_qloop_64 + tmp_qloop_65; + const real_t tmp_qloop_67 = -tmp_qloop_37 - 8.0*_data_q_p_1[q] + 4.0; + const real_t tmp_qloop_68 = jac_affine_inv_1_0_BLUE*tmp_qloop_67 - tmp_qloop_62; + const real_t tmp_qloop_69 = jac_affine_inv_1_1_BLUE*tmp_qloop_67 - tmp_qloop_65; + const real_t tmp_qloop_70 = -tmp_qloop_44 - 8.0*_data_q_p_0[q] + 4.0; + const real_t tmp_qloop_71 = jac_affine_inv_0_0_BLUE*tmp_qloop_70 - tmp_qloop_61; + const real_t tmp_qloop_72 = jac_affine_inv_0_1_BLUE*tmp_qloop_70 - tmp_qloop_64; + const real_t jac_blending_0_0 = tmp_qloop_0*tmp_qloop_14 + tmp_qloop_18*tmp_qloop_4; + const real_t jac_blending_0_1 = -tmp_qloop_0*tmp_qloop_15*tmp_qloop_16*tmp_qloop_3 - tmp_qloop_0*tmp_qloop_19; + const real_t jac_blending_1_0 = tmp_qloop_14*tmp_qloop_3 - tmp_qloop_20*tmp_qloop_3; + const real_t jac_blending_1_1 = tmp_qloop_15*tmp_qloop_16*tmp_qloop_2*1.0 - tmp_qloop_19*tmp_qloop_3; + const real_t tmp_qloop_21 = jac_blending_0_0*jac_blending_1_1 - jac_blending_0_1*jac_blending_1_0; + const real_t tmp_qloop_22 = 1.0 / (tmp_qloop_21); + const real_t abs_det_jac_blending = tmp_qloop_21; + const real_t tmp_qloop_54 = abs_det_jac_affine_BLUE*abs_det_jac_blending*(cp_dof_0*tmp_qloop_43 + cp_dof_1*tmp_qloop_48 + cp_dof_2*tmp_qloop_49 + cp_dof_3*tmp_qloop_38 + cp_dof_4*tmp_qloop_50 + cp_dof_5*tmp_qloop_51)*_data_q_w[q]; + const real_t jac_blending_inv_0_0 = jac_blending_1_1*tmp_qloop_22; + const real_t jac_blending_inv_0_1 = -jac_blending_0_1*tmp_qloop_22; + const real_t jac_blending_inv_1_0 = -jac_blending_1_0*tmp_qloop_22; + const real_t jac_blending_inv_1_1 = jac_blending_0_0*tmp_qloop_22; + const real_t hessian_blending_0_0_0 = tmp_qloop_14 - tmp_qloop_25 + tmp_qloop_28; + const real_t hessian_blending_1_0_0 = -tmp_qloop_26*(tmp_qloop_3*tmp_qloop_3*tmp_qloop_3) + tmp_qloop_29*tmp_qloop_4 + tmp_qloop_3*tmp_qloop_32 - tmp_qloop_31; + const real_t hessian_blending_0_0_1 = tmp_qloop_1*tmp_qloop_11*tmp_qloop_15*tmp_qloop_30*-2.0 - tmp_qloop_34; + const real_t hessian_blending_1_0_1 = tmp_qloop_1*tmp_qloop_10*tmp_qloop_6*tmp_qloop_9*1.0 - tmp_qloop_20 - tmp_qloop_28 - tmp_qloop_35; + const real_t hessian_blending_0_1_0 = tmp_qloop_10*tmp_qloop_15*tmp_qloop_2*tmp_qloop_8*tmp_qloop_9*1.0 - tmp_qloop_19 - tmp_qloop_31 - tmp_qloop_34; + const real_t hessian_blending_1_1_0 = -tmp_qloop_20 + tmp_qloop_27 - tmp_qloop_35 + tmp_qloop_36; + const real_t hessian_blending_0_1_1 = tmp_qloop_0*tmp_qloop_32 - (tmp_qloop_0*tmp_qloop_0*tmp_qloop_0)*tmp_qloop_26 + tmp_qloop_25 + tmp_qloop_36; + const real_t hessian_blending_1_1_1 = tmp_qloop_10*tmp_qloop_15*tmp_qloop_2*tmp_qloop_7*tmp_qloop_9*1.0 + tmp_qloop_10*tmp_qloop_15*tmp_qloop_4*tmp_qloop_8*tmp_qloop_9*1.0 - tmp_qloop_19 - tmp_qloop_33; + const real_t q_tmp_0_0 = tmp_qloop_43*tmp_qloop_54*(tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_46 + jac_blending_inv_1_0*tmp_qloop_47) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_46 + jac_blending_inv_1_1*tmp_qloop_47)); + const real_t q_tmp_1_1 = tmp_qloop_48*tmp_qloop_54*(tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_56 + jac_blending_inv_1_0*tmp_qloop_57) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_56 + jac_blending_inv_1_1*tmp_qloop_57)); + const real_t q_tmp_2_2 = tmp_qloop_49*tmp_qloop_54*(tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_59 + jac_blending_inv_1_0*tmp_qloop_60) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_59 + jac_blending_inv_1_1*tmp_qloop_60)); + const real_t q_tmp_3_3 = tmp_qloop_38*tmp_qloop_54*(tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_63 + jac_blending_inv_1_0*tmp_qloop_66) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_63 + jac_blending_inv_1_1*tmp_qloop_66)); + const real_t q_tmp_4_4 = tmp_qloop_50*tmp_qloop_54*(tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_68 + jac_blending_inv_1_0*tmp_qloop_69) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_68 + jac_blending_inv_1_1*tmp_qloop_69)); + const real_t q_tmp_5_5 = tmp_qloop_51*tmp_qloop_54*(tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_71 + jac_blending_inv_1_0*tmp_qloop_72) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_71 + jac_blending_inv_1_1*tmp_qloop_72)); + q_acc_0_0 = q_acc_0_0 + q_tmp_0_0; + q_acc_1_1 = q_acc_1_1 + q_tmp_1_1; + q_acc_2_2 = q_acc_2_2 + q_tmp_2_2; + q_acc_3_3 = q_acc_3_3 + q_tmp_3_3; + q_acc_4_4 = q_acc_4_4 + q_tmp_4_4; + q_acc_5_5 = q_acc_5_5 + q_tmp_5_5; + } + const real_t elMatDiag_0 = q_acc_0_0; + const real_t elMatDiag_1 = q_acc_1_1; + const real_t elMatDiag_2 = q_acc_2_2; + const real_t elMatDiag_3 = q_acc_3_3; + const real_t elMatDiag_4 = q_acc_4_4; + const real_t elMatDiag_5 = q_acc_5_5; + _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1] = elMatDiag_0 + _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatDiag_1 + _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1] = elMatDiag_2 + _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]; + _data_invDiag_Edge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatDiag_3 + _data_invDiag_Edge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1] = elMatDiag_4 + _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]; + _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))] = elMatDiag_5 + _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + } + } + } +} +} // namespace operatorgeneration + +} // namespace hyteg diff --git a/operators/advection/noarch/P2ElementwiseAdvectionAnnulusMap_toMatrix_P2ElementwiseAdvectionAnnulusMap_macro_2D.cpp b/operators/advection/noarch/P2ElementwiseAdvectionAnnulusMap_toMatrix_P2ElementwiseAdvectionAnnulusMap_macro_2D.cpp new file mode 100644 index 0000000000000000000000000000000000000000..82d532646aacee8a0d5747e26cb572f8f8be1a79 --- /dev/null +++ b/operators/advection/noarch/P2ElementwiseAdvectionAnnulusMap_toMatrix_P2ElementwiseAdvectionAnnulusMap_macro_2D.cpp @@ -0,0 +1,819 @@ +/* +* Copyright (c) 2017-2024 Nils Kohl, Daniel Bauer, Fabian Böhm. +* +* This file is part of HyTeG +* (see https://i10git.cs.fau.de/hyteg/hyteg). +* +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. +* +* You should have received a copy of the GNU General Public License +* along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +/* +* The entire file was generated with the HyTeG Operator Generator. +* +* Avoid modifying this file. If buggy, consider fixing the generator itself. +*/ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +#include "../P2ElementwiseAdvectionAnnulusMap.hpp" + +#define FUNC_PREFIX + +namespace hyteg { + +namespace operatorgeneration { + +void P2ElementwiseAdvectionAnnulusMap::toMatrix_P2ElementwiseAdvectionAnnulusMap_macro_2D( real_t * RESTRICT _data_cpEdge, real_t * RESTRICT _data_cpVertex, idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t * RESTRICT _data_uxEdge, real_t * RESTRICT _data_uxVertex, real_t * RESTRICT _data_uyEdge, real_t * RESTRICT _data_uyVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const +{ + { + const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669}; + + const real_t _data_q_p_0 [] = {0.33333333333333331, 0.20000000000000001, 0.59999999999999998, 0.20000000000000001}; + + const real_t _data_q_p_1 [] = {0.33333333333333331, 0.59999999999999998, 0.20000000000000001, 0.20000000000000001}; + + const real_t tmp_coords_jac_0_GRAY = 1.0 / (micro_edges_per_macro_edge_float)*1.0; + const real_t p_affine_const_0_0_GRAY = macro_vertex_coord_id_0comp0; + const real_t p_affine_const_0_1_GRAY = macro_vertex_coord_id_0comp1; + const real_t p_affine_const_1_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0); + const real_t p_affine_const_1_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1); + const real_t p_affine_const_2_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0); + const real_t p_affine_const_2_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1); + const real_t jac_affine_0_0_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_1_0_GRAY; + const real_t jac_affine_0_1_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_2_0_GRAY; + const real_t jac_affine_1_0_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_1_1_GRAY; + const real_t jac_affine_1_1_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_2_1_GRAY; + const real_t tmp_coords_jac_1_GRAY = jac_affine_0_0_GRAY*jac_affine_1_1_GRAY - jac_affine_0_1_GRAY*jac_affine_1_0_GRAY; + const real_t tmp_coords_jac_2_GRAY = 1.0 / (tmp_coords_jac_1_GRAY); + const real_t jac_affine_inv_0_0_GRAY = jac_affine_1_1_GRAY*tmp_coords_jac_2_GRAY; + const real_t jac_affine_inv_0_1_GRAY = -jac_affine_0_1_GRAY*tmp_coords_jac_2_GRAY; + const real_t jac_affine_inv_1_0_GRAY = -jac_affine_1_0_GRAY*tmp_coords_jac_2_GRAY; + const real_t jac_affine_inv_1_1_GRAY = jac_affine_0_0_GRAY*tmp_coords_jac_2_GRAY; + const real_t abs_det_jac_affine_GRAY = abs(tmp_coords_jac_1_GRAY); + const real_t tmp_qloop_1 = -rayVertex_1 + thrVertex_1; + const real_t tmp_qloop_7 = rayVertex_0 - thrVertex_0; + const real_t tmp_qloop_8 = -tmp_qloop_7; + const real_t tmp_qloop_9 = 1.0 / (tmp_qloop_1*(-rayVertex_0 + refVertex_0) - tmp_qloop_8*(-rayVertex_1 + refVertex_1)); + const real_t tmp_qloop_10 = -radRayVertex + radRefVertex; + const real_t tmp_qloop_11 = tmp_qloop_10*tmp_qloop_9; + const real_t tmp_qloop_12 = tmp_qloop_11*1.0; + { + /* FaceType.GRAY */ + for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1) + for (int64_t ctr_0 = 0; ctr_0 < -ctr_1 + micro_edges_per_macro_edge; ctr_0 += 1) + { + + const int64_t phantom_ctr_0 = ctr_0; + real_t _data_float_loop_ctr_array_dim_0[4]; + _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0; + _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1; + _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2; + _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3; + real_t _data_float_loop_ctr_array_dim_1[4]; + _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1; + + const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; + const real_t cp_dof_0 = _data_cpVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]; + const real_t cp_dof_1 = _data_cpVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + const real_t cp_dof_2 = _data_cpVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t cp_dof_3 = _data_cpEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t cp_dof_4 = _data_cpEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t cp_dof_5 = _data_cpEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]; + const real_t ux_dof_0 = _data_uxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]; + const real_t ux_dof_1 = _data_uxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + const real_t ux_dof_2 = _data_uxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t ux_dof_3 = _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t ux_dof_4 = _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t ux_dof_5 = _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]; + const real_t uy_dof_0 = _data_uyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]; + const real_t uy_dof_1 = _data_uyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + const real_t uy_dof_2 = _data_uyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t uy_dof_3 = _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t uy_dof_4 = _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t uy_dof_5 = _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]; + real_t q_acc_0_0 = 0.0; + real_t q_acc_0_1 = 0.0; + real_t q_acc_0_2 = 0.0; + real_t q_acc_0_3 = 0.0; + real_t q_acc_0_4 = 0.0; + real_t q_acc_0_5 = 0.0; + real_t q_acc_1_0 = 0.0; + real_t q_acc_1_1 = 0.0; + real_t q_acc_1_2 = 0.0; + real_t q_acc_1_3 = 0.0; + real_t q_acc_1_4 = 0.0; + real_t q_acc_1_5 = 0.0; + real_t q_acc_2_0 = 0.0; + real_t q_acc_2_1 = 0.0; + real_t q_acc_2_2 = 0.0; + real_t q_acc_2_3 = 0.0; + real_t q_acc_2_4 = 0.0; + real_t q_acc_2_5 = 0.0; + real_t q_acc_3_0 = 0.0; + real_t q_acc_3_1 = 0.0; + real_t q_acc_3_2 = 0.0; + real_t q_acc_3_3 = 0.0; + real_t q_acc_3_4 = 0.0; + real_t q_acc_3_5 = 0.0; + real_t q_acc_4_0 = 0.0; + real_t q_acc_4_1 = 0.0; + real_t q_acc_4_2 = 0.0; + real_t q_acc_4_3 = 0.0; + real_t q_acc_4_4 = 0.0; + real_t q_acc_4_5 = 0.0; + real_t q_acc_5_0 = 0.0; + real_t q_acc_5_1 = 0.0; + real_t q_acc_5_2 = 0.0; + real_t q_acc_5_3 = 0.0; + real_t q_acc_5_4 = 0.0; + real_t q_acc_5_5 = 0.0; + for (int64_t q = 0; q < 4; q += 1) + { + const real_t tmp_qloop_0 = p_affine_0_0 + (-p_affine_0_0 + p_affine_1_0)*_data_q_p_0[q] + (-p_affine_0_0 + p_affine_2_0)*_data_q_p_1[q]; + const real_t tmp_qloop_2 = (tmp_qloop_0*tmp_qloop_0); + const real_t tmp_qloop_3 = p_affine_0_1 + (-p_affine_0_1 + p_affine_1_1)*_data_q_p_0[q] + (-p_affine_0_1 + p_affine_2_1)*_data_q_p_1[q]; + const real_t tmp_qloop_4 = (tmp_qloop_3*tmp_qloop_3); + const real_t tmp_qloop_5 = tmp_qloop_2 + tmp_qloop_4; + const real_t tmp_qloop_6 = pow(tmp_qloop_5, -0.50000000000000000); + const real_t tmp_qloop_13 = tmp_qloop_12*tmp_qloop_6; + const real_t tmp_qloop_14 = tmp_qloop_1*tmp_qloop_13; + const real_t tmp_qloop_15 = pow(tmp_qloop_5, -1.5000000000000000); + const real_t tmp_qloop_16 = radRayVertex + tmp_qloop_11*(tmp_qloop_1*(-rayVertex_0 + tmp_qloop_0) - tmp_qloop_8*(-rayVertex_1 + tmp_qloop_3)); + const real_t tmp_qloop_17 = tmp_qloop_15*tmp_qloop_16; + const real_t tmp_qloop_18 = tmp_qloop_17*1.0; + const real_t tmp_qloop_19 = tmp_qloop_13*tmp_qloop_8; + const real_t tmp_qloop_20 = tmp_qloop_0*tmp_qloop_18; + const real_t tmp_qloop_23 = tmp_qloop_12*tmp_qloop_15; + const real_t tmp_qloop_24 = tmp_qloop_1*tmp_qloop_23; + const real_t tmp_qloop_25 = tmp_qloop_2*tmp_qloop_24; + const real_t tmp_qloop_26 = tmp_qloop_16*pow(tmp_qloop_5, -2.5000000000000000)*3.0; + const real_t tmp_qloop_27 = tmp_qloop_0*tmp_qloop_26*tmp_qloop_4; + const real_t tmp_qloop_28 = tmp_qloop_24*tmp_qloop_4 - tmp_qloop_27; + const real_t tmp_qloop_29 = tmp_qloop_23*tmp_qloop_7; + const real_t tmp_qloop_30 = tmp_qloop_0*tmp_qloop_3; + const real_t tmp_qloop_31 = tmp_qloop_24*tmp_qloop_30; + const real_t tmp_qloop_32 = tmp_qloop_17*2.0; + const real_t tmp_qloop_33 = tmp_qloop_2*tmp_qloop_26*tmp_qloop_3; + const real_t tmp_qloop_34 = tmp_qloop_18*tmp_qloop_3 - tmp_qloop_33; + const real_t tmp_qloop_35 = tmp_qloop_29*tmp_qloop_30; + const real_t tmp_qloop_36 = tmp_qloop_23*tmp_qloop_30*tmp_qloop_8; + const real_t tmp_qloop_37 = 4.0*_data_q_p_0[q]; + const real_t tmp_qloop_38 = 4.0*_data_q_p_1[q]; + const real_t tmp_qloop_39 = tmp_qloop_37 + tmp_qloop_38 - 3.0; + const real_t tmp_qloop_40 = jac_affine_inv_0_0_GRAY*tmp_qloop_39 + jac_affine_inv_1_0_GRAY*tmp_qloop_39; + const real_t tmp_qloop_41 = jac_affine_inv_0_1_GRAY*tmp_qloop_39 + jac_affine_inv_1_1_GRAY*tmp_qloop_39; + const real_t tmp_qloop_42 = tmp_qloop_37*_data_q_p_1[q]; + const real_t tmp_qloop_43 = (_data_q_p_0[q]*_data_q_p_0[q]); + const real_t tmp_qloop_44 = tmp_qloop_43*2.0; + const real_t tmp_qloop_45 = tmp_qloop_44 - _data_q_p_0[q]; + const real_t tmp_qloop_46 = (_data_q_p_1[q]*_data_q_p_1[q]); + const real_t tmp_qloop_47 = tmp_qloop_46*2.0; + const real_t tmp_qloop_48 = tmp_qloop_47 - _data_q_p_1[q]; + const real_t tmp_qloop_49 = tmp_qloop_38 - tmp_qloop_42 + tmp_qloop_46*-4.0; + const real_t tmp_qloop_50 = tmp_qloop_37 - tmp_qloop_42 + tmp_qloop_43*-4.0; + const real_t tmp_qloop_51 = tmp_qloop_42 + tmp_qloop_44 + tmp_qloop_47 - 3.0*_data_q_p_0[q] - 3.0*_data_q_p_1[q] + 1.0; + const real_t tmp_qloop_52 = tmp_qloop_42*ux_dof_3 + tmp_qloop_45*ux_dof_1 + tmp_qloop_48*ux_dof_2 + tmp_qloop_49*ux_dof_4 + tmp_qloop_50*ux_dof_5 + tmp_qloop_51*ux_dof_0; + const real_t tmp_qloop_53 = tmp_qloop_42*uy_dof_3 + tmp_qloop_45*uy_dof_1 + tmp_qloop_48*uy_dof_2 + tmp_qloop_49*uy_dof_4 + tmp_qloop_50*uy_dof_5 + tmp_qloop_51*uy_dof_0; + const real_t tmp_qloop_57 = tmp_qloop_37 - 1.0; + const real_t tmp_qloop_58 = jac_affine_inv_0_0_GRAY*tmp_qloop_57; + const real_t tmp_qloop_59 = jac_affine_inv_0_1_GRAY*tmp_qloop_57; + const real_t tmp_qloop_61 = tmp_qloop_38 - 1.0; + const real_t tmp_qloop_62 = jac_affine_inv_1_0_GRAY*tmp_qloop_61; + const real_t tmp_qloop_63 = jac_affine_inv_1_1_GRAY*tmp_qloop_61; + const real_t tmp_qloop_65 = jac_affine_inv_1_0_GRAY*tmp_qloop_37; + const real_t tmp_qloop_66 = jac_affine_inv_0_0_GRAY*tmp_qloop_38; + const real_t tmp_qloop_67 = tmp_qloop_65 + tmp_qloop_66; + const real_t tmp_qloop_68 = jac_affine_inv_1_1_GRAY*tmp_qloop_37; + const real_t tmp_qloop_69 = jac_affine_inv_0_1_GRAY*tmp_qloop_38; + const real_t tmp_qloop_70 = tmp_qloop_68 + tmp_qloop_69; + const real_t tmp_qloop_72 = -tmp_qloop_37 - 8.0*_data_q_p_1[q] + 4.0; + const real_t tmp_qloop_73 = jac_affine_inv_1_0_GRAY*tmp_qloop_72 - tmp_qloop_66; + const real_t tmp_qloop_74 = jac_affine_inv_1_1_GRAY*tmp_qloop_72 - tmp_qloop_69; + const real_t tmp_qloop_76 = -tmp_qloop_38 - 8.0*_data_q_p_0[q] + 4.0; + const real_t tmp_qloop_77 = jac_affine_inv_0_0_GRAY*tmp_qloop_76 - tmp_qloop_65; + const real_t tmp_qloop_78 = jac_affine_inv_0_1_GRAY*tmp_qloop_76 - tmp_qloop_68; + const real_t jac_blending_0_0 = tmp_qloop_0*tmp_qloop_14 + tmp_qloop_18*tmp_qloop_4; + const real_t jac_blending_0_1 = -tmp_qloop_0*tmp_qloop_15*tmp_qloop_16*tmp_qloop_3 - tmp_qloop_0*tmp_qloop_19; + const real_t jac_blending_1_0 = tmp_qloop_14*tmp_qloop_3 - tmp_qloop_20*tmp_qloop_3; + const real_t jac_blending_1_1 = tmp_qloop_15*tmp_qloop_16*tmp_qloop_2*1.0 - tmp_qloop_19*tmp_qloop_3; + const real_t tmp_qloop_21 = jac_blending_0_0*jac_blending_1_1 - jac_blending_0_1*jac_blending_1_0; + const real_t tmp_qloop_22 = 1.0 / (tmp_qloop_21); + const real_t abs_det_jac_blending = tmp_qloop_21; + const real_t tmp_qloop_55 = abs_det_jac_affine_GRAY*abs_det_jac_blending*(cp_dof_0*tmp_qloop_51 + cp_dof_1*tmp_qloop_45 + cp_dof_2*tmp_qloop_48 + cp_dof_3*tmp_qloop_42 + cp_dof_4*tmp_qloop_49 + cp_dof_5*tmp_qloop_50)*_data_q_w[q]; + const real_t tmp_qloop_56 = tmp_qloop_51*tmp_qloop_55; + const real_t tmp_qloop_80 = tmp_qloop_45*tmp_qloop_55; + const real_t tmp_qloop_81 = tmp_qloop_48*tmp_qloop_55; + const real_t tmp_qloop_82 = tmp_qloop_42*tmp_qloop_55; + const real_t tmp_qloop_83 = tmp_qloop_49*tmp_qloop_55; + const real_t tmp_qloop_84 = tmp_qloop_50*tmp_qloop_55; + const real_t jac_blending_inv_0_0 = jac_blending_1_1*tmp_qloop_22; + const real_t jac_blending_inv_0_1 = -jac_blending_0_1*tmp_qloop_22; + const real_t jac_blending_inv_1_0 = -jac_blending_1_0*tmp_qloop_22; + const real_t jac_blending_inv_1_1 = jac_blending_0_0*tmp_qloop_22; + const real_t tmp_qloop_54 = tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_40 + jac_blending_inv_1_0*tmp_qloop_41) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_40 + jac_blending_inv_1_1*tmp_qloop_41); + const real_t tmp_qloop_60 = tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_58 + jac_blending_inv_1_0*tmp_qloop_59) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_58 + jac_blending_inv_1_1*tmp_qloop_59); + const real_t tmp_qloop_64 = tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_62 + jac_blending_inv_1_0*tmp_qloop_63) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_62 + jac_blending_inv_1_1*tmp_qloop_63); + const real_t tmp_qloop_71 = tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_67 + jac_blending_inv_1_0*tmp_qloop_70) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_67 + jac_blending_inv_1_1*tmp_qloop_70); + const real_t tmp_qloop_75 = tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_73 + jac_blending_inv_1_0*tmp_qloop_74) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_73 + jac_blending_inv_1_1*tmp_qloop_74); + const real_t tmp_qloop_79 = tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_77 + jac_blending_inv_1_0*tmp_qloop_78) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_77 + jac_blending_inv_1_1*tmp_qloop_78); + const real_t hessian_blending_0_0_0 = tmp_qloop_14 - tmp_qloop_25 + tmp_qloop_28; + const real_t hessian_blending_1_0_0 = -tmp_qloop_26*(tmp_qloop_3*tmp_qloop_3*tmp_qloop_3) + tmp_qloop_29*tmp_qloop_4 + tmp_qloop_3*tmp_qloop_32 - tmp_qloop_31; + const real_t hessian_blending_0_0_1 = tmp_qloop_1*tmp_qloop_11*tmp_qloop_15*tmp_qloop_30*-2.0 - tmp_qloop_34; + const real_t hessian_blending_1_0_1 = tmp_qloop_1*tmp_qloop_10*tmp_qloop_6*tmp_qloop_9*1.0 - tmp_qloop_20 - tmp_qloop_28 - tmp_qloop_35; + const real_t hessian_blending_0_1_0 = tmp_qloop_10*tmp_qloop_15*tmp_qloop_2*tmp_qloop_8*tmp_qloop_9*1.0 - tmp_qloop_19 - tmp_qloop_31 - tmp_qloop_34; + const real_t hessian_blending_1_1_0 = -tmp_qloop_20 + tmp_qloop_27 - tmp_qloop_35 + tmp_qloop_36; + const real_t hessian_blending_0_1_1 = tmp_qloop_0*tmp_qloop_32 - (tmp_qloop_0*tmp_qloop_0*tmp_qloop_0)*tmp_qloop_26 + tmp_qloop_25 + tmp_qloop_36; + const real_t hessian_blending_1_1_1 = tmp_qloop_10*tmp_qloop_15*tmp_qloop_2*tmp_qloop_7*tmp_qloop_9*1.0 + tmp_qloop_10*tmp_qloop_15*tmp_qloop_4*tmp_qloop_8*tmp_qloop_9*1.0 - tmp_qloop_19 - tmp_qloop_33; + const real_t q_tmp_0_0 = tmp_qloop_54*tmp_qloop_56; + const real_t q_tmp_0_1 = tmp_qloop_56*tmp_qloop_60; + const real_t q_tmp_0_2 = tmp_qloop_56*tmp_qloop_64; + const real_t q_tmp_0_3 = tmp_qloop_56*tmp_qloop_71; + const real_t q_tmp_0_4 = tmp_qloop_56*tmp_qloop_75; + const real_t q_tmp_0_5 = tmp_qloop_56*tmp_qloop_79; + const real_t q_tmp_1_0 = tmp_qloop_54*tmp_qloop_80; + const real_t q_tmp_1_1 = tmp_qloop_60*tmp_qloop_80; + const real_t q_tmp_1_2 = tmp_qloop_64*tmp_qloop_80; + const real_t q_tmp_1_3 = tmp_qloop_71*tmp_qloop_80; + const real_t q_tmp_1_4 = tmp_qloop_75*tmp_qloop_80; + const real_t q_tmp_1_5 = tmp_qloop_79*tmp_qloop_80; + const real_t q_tmp_2_0 = tmp_qloop_54*tmp_qloop_81; + const real_t q_tmp_2_1 = tmp_qloop_60*tmp_qloop_81; + const real_t q_tmp_2_2 = tmp_qloop_64*tmp_qloop_81; + const real_t q_tmp_2_3 = tmp_qloop_71*tmp_qloop_81; + const real_t q_tmp_2_4 = tmp_qloop_75*tmp_qloop_81; + const real_t q_tmp_2_5 = tmp_qloop_79*tmp_qloop_81; + const real_t q_tmp_3_0 = tmp_qloop_54*tmp_qloop_82; + const real_t q_tmp_3_1 = tmp_qloop_60*tmp_qloop_82; + const real_t q_tmp_3_2 = tmp_qloop_64*tmp_qloop_82; + const real_t q_tmp_3_3 = tmp_qloop_71*tmp_qloop_82; + const real_t q_tmp_3_4 = tmp_qloop_75*tmp_qloop_82; + const real_t q_tmp_3_5 = tmp_qloop_79*tmp_qloop_82; + const real_t q_tmp_4_0 = tmp_qloop_54*tmp_qloop_83; + const real_t q_tmp_4_1 = tmp_qloop_60*tmp_qloop_83; + const real_t q_tmp_4_2 = tmp_qloop_64*tmp_qloop_83; + const real_t q_tmp_4_3 = tmp_qloop_71*tmp_qloop_83; + const real_t q_tmp_4_4 = tmp_qloop_75*tmp_qloop_83; + const real_t q_tmp_4_5 = tmp_qloop_79*tmp_qloop_83; + const real_t q_tmp_5_0 = tmp_qloop_54*tmp_qloop_84; + const real_t q_tmp_5_1 = tmp_qloop_60*tmp_qloop_84; + const real_t q_tmp_5_2 = tmp_qloop_64*tmp_qloop_84; + const real_t q_tmp_5_3 = tmp_qloop_71*tmp_qloop_84; + const real_t q_tmp_5_4 = tmp_qloop_75*tmp_qloop_84; + const real_t q_tmp_5_5 = tmp_qloop_79*tmp_qloop_84; + q_acc_0_0 = q_acc_0_0 + q_tmp_0_0; + q_acc_0_1 = q_acc_0_1 + q_tmp_0_1; + q_acc_0_2 = q_acc_0_2 + q_tmp_0_2; + q_acc_0_3 = q_acc_0_3 + q_tmp_0_3; + q_acc_0_4 = q_acc_0_4 + q_tmp_0_4; + q_acc_0_5 = q_acc_0_5 + q_tmp_0_5; + q_acc_1_0 = q_acc_1_0 + q_tmp_1_0; + q_acc_1_1 = q_acc_1_1 + q_tmp_1_1; + q_acc_1_2 = q_acc_1_2 + q_tmp_1_2; + q_acc_1_3 = q_acc_1_3 + q_tmp_1_3; + q_acc_1_4 = q_acc_1_4 + q_tmp_1_4; + q_acc_1_5 = q_acc_1_5 + q_tmp_1_5; + q_acc_2_0 = q_acc_2_0 + q_tmp_2_0; + q_acc_2_1 = q_acc_2_1 + q_tmp_2_1; + q_acc_2_2 = q_acc_2_2 + q_tmp_2_2; + q_acc_2_3 = q_acc_2_3 + q_tmp_2_3; + q_acc_2_4 = q_acc_2_4 + q_tmp_2_4; + q_acc_2_5 = q_acc_2_5 + q_tmp_2_5; + q_acc_3_0 = q_acc_3_0 + q_tmp_3_0; + q_acc_3_1 = q_acc_3_1 + q_tmp_3_1; + q_acc_3_2 = q_acc_3_2 + q_tmp_3_2; + q_acc_3_3 = q_acc_3_3 + q_tmp_3_3; + q_acc_3_4 = q_acc_3_4 + q_tmp_3_4; + q_acc_3_5 = q_acc_3_5 + q_tmp_3_5; + q_acc_4_0 = q_acc_4_0 + q_tmp_4_0; + q_acc_4_1 = q_acc_4_1 + q_tmp_4_1; + q_acc_4_2 = q_acc_4_2 + q_tmp_4_2; + q_acc_4_3 = q_acc_4_3 + q_tmp_4_3; + q_acc_4_4 = q_acc_4_4 + q_tmp_4_4; + q_acc_4_5 = q_acc_4_5 + q_tmp_4_5; + q_acc_5_0 = q_acc_5_0 + q_tmp_5_0; + q_acc_5_1 = q_acc_5_1 + q_tmp_5_1; + q_acc_5_2 = q_acc_5_2 + q_tmp_5_2; + q_acc_5_3 = q_acc_5_3 + q_tmp_5_3; + q_acc_5_4 = q_acc_5_4 + q_tmp_5_4; + q_acc_5_5 = q_acc_5_5 + q_tmp_5_5; + } + const real_t elMat_0_0 = q_acc_0_0; + const real_t elMat_0_1 = q_acc_0_1; + const real_t elMat_0_2 = q_acc_0_2; + const real_t elMat_0_3 = q_acc_0_3; + const real_t elMat_0_4 = q_acc_0_4; + const real_t elMat_0_5 = q_acc_0_5; + const real_t elMat_1_0 = q_acc_1_0; + const real_t elMat_1_1 = q_acc_1_1; + const real_t elMat_1_2 = q_acc_1_2; + const real_t elMat_1_3 = q_acc_1_3; + const real_t elMat_1_4 = q_acc_1_4; + const real_t elMat_1_5 = q_acc_1_5; + const real_t elMat_2_0 = q_acc_2_0; + const real_t elMat_2_1 = q_acc_2_1; + const real_t elMat_2_2 = q_acc_2_2; + const real_t elMat_2_3 = q_acc_2_3; + const real_t elMat_2_4 = q_acc_2_4; + const real_t elMat_2_5 = q_acc_2_5; + const real_t elMat_3_0 = q_acc_3_0; + const real_t elMat_3_1 = q_acc_3_1; + const real_t elMat_3_2 = q_acc_3_2; + const real_t elMat_3_3 = q_acc_3_3; + const real_t elMat_3_4 = q_acc_3_4; + const real_t elMat_3_5 = q_acc_3_5; + const real_t elMat_4_0 = q_acc_4_0; + const real_t elMat_4_1 = q_acc_4_1; + const real_t elMat_4_2 = q_acc_4_2; + const real_t elMat_4_3 = q_acc_4_3; + const real_t elMat_4_4 = q_acc_4_4; + const real_t elMat_4_5 = q_acc_4_5; + const real_t elMat_5_0 = q_acc_5_0; + const real_t elMat_5_1 = q_acc_5_1; + const real_t elMat_5_2 = q_acc_5_2; + const real_t elMat_5_3 = q_acc_5_3; + const real_t elMat_5_4 = q_acc_5_4; + const real_t elMat_5_5 = q_acc_5_5; + + std::vector< uint_t > _data_rowIdx( 6 ); + std::vector< uint_t > _data_colIdx( 6 ); + std::vector< real_t > _data_mat( 36 ); + + _data_rowIdx[0] = ((uint64_t)(_data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))])); + _data_rowIdx[1] = ((uint64_t)(_data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1])); + _data_rowIdx[2] = ((uint64_t)(_data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))])); + _data_rowIdx[3] = ((uint64_t)(_data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))])); + _data_rowIdx[4] = ((uint64_t)(_data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))])); + _data_rowIdx[5] = ((uint64_t)(_data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))])); + _data_colIdx[0] = ((uint64_t)(_data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))])); + _data_colIdx[1] = ((uint64_t)(_data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1])); + _data_colIdx[2] = ((uint64_t)(_data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))])); + _data_colIdx[3] = ((uint64_t)(_data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))])); + _data_colIdx[4] = ((uint64_t)(_data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))])); + _data_colIdx[5] = ((uint64_t)(_data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))])); + + /* Apply basis transformation */ + + + + _data_mat[0] = ((real_t)(elMat_0_0)); + _data_mat[1] = ((real_t)(elMat_0_1)); + _data_mat[2] = ((real_t)(elMat_0_2)); + _data_mat[3] = ((real_t)(elMat_0_3)); + _data_mat[4] = ((real_t)(elMat_0_4)); + _data_mat[5] = ((real_t)(elMat_0_5)); + _data_mat[6] = ((real_t)(elMat_1_0)); + _data_mat[7] = ((real_t)(elMat_1_1)); + _data_mat[8] = ((real_t)(elMat_1_2)); + _data_mat[9] = ((real_t)(elMat_1_3)); + _data_mat[10] = ((real_t)(elMat_1_4)); + _data_mat[11] = ((real_t)(elMat_1_5)); + _data_mat[12] = ((real_t)(elMat_2_0)); + _data_mat[13] = ((real_t)(elMat_2_1)); + _data_mat[14] = ((real_t)(elMat_2_2)); + _data_mat[15] = ((real_t)(elMat_2_3)); + _data_mat[16] = ((real_t)(elMat_2_4)); + _data_mat[17] = ((real_t)(elMat_2_5)); + _data_mat[18] = ((real_t)(elMat_3_0)); + _data_mat[19] = ((real_t)(elMat_3_1)); + _data_mat[20] = ((real_t)(elMat_3_2)); + _data_mat[21] = ((real_t)(elMat_3_3)); + _data_mat[22] = ((real_t)(elMat_3_4)); + _data_mat[23] = ((real_t)(elMat_3_5)); + _data_mat[24] = ((real_t)(elMat_4_0)); + _data_mat[25] = ((real_t)(elMat_4_1)); + _data_mat[26] = ((real_t)(elMat_4_2)); + _data_mat[27] = ((real_t)(elMat_4_3)); + _data_mat[28] = ((real_t)(elMat_4_4)); + _data_mat[29] = ((real_t)(elMat_4_5)); + _data_mat[30] = ((real_t)(elMat_5_0)); + _data_mat[31] = ((real_t)(elMat_5_1)); + _data_mat[32] = ((real_t)(elMat_5_2)); + _data_mat[33] = ((real_t)(elMat_5_3)); + _data_mat[34] = ((real_t)(elMat_5_4)); + _data_mat[35] = ((real_t)(elMat_5_5)); + + + mat->addValues( _data_rowIdx, _data_colIdx, _data_mat ); + } + } + const real_t tmp_coords_jac_0_BLUE = 1.0 / (micro_edges_per_macro_edge_float)*1.0; + const real_t tmp_coords_jac_1_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0); + const real_t tmp_coords_jac_2_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1); + const real_t tmp_coords_jac_3_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0); + const real_t tmp_coords_jac_4_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1); + const real_t p_affine_const_0_0_BLUE = tmp_coords_jac_1_BLUE; + const real_t p_affine_const_0_1_BLUE = tmp_coords_jac_2_BLUE; + const real_t p_affine_const_1_0_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_3_BLUE; + const real_t p_affine_const_1_1_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_4_BLUE; + const real_t p_affine_const_2_0_BLUE = tmp_coords_jac_1_BLUE + tmp_coords_jac_3_BLUE; + const real_t p_affine_const_2_1_BLUE = tmp_coords_jac_2_BLUE + tmp_coords_jac_4_BLUE; + const real_t jac_affine_0_0_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_1_0_BLUE; + const real_t jac_affine_0_1_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_2_0_BLUE; + const real_t jac_affine_1_0_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_1_1_BLUE; + const real_t jac_affine_1_1_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_2_1_BLUE; + const real_t tmp_coords_jac_5_BLUE = jac_affine_0_0_BLUE*jac_affine_1_1_BLUE - jac_affine_0_1_BLUE*jac_affine_1_0_BLUE; + const real_t tmp_coords_jac_6_BLUE = 1.0 / (tmp_coords_jac_5_BLUE); + const real_t jac_affine_inv_0_0_BLUE = jac_affine_1_1_BLUE*tmp_coords_jac_6_BLUE; + const real_t jac_affine_inv_0_1_BLUE = -jac_affine_0_1_BLUE*tmp_coords_jac_6_BLUE; + const real_t jac_affine_inv_1_0_BLUE = -jac_affine_1_0_BLUE*tmp_coords_jac_6_BLUE; + const real_t jac_affine_inv_1_1_BLUE = jac_affine_0_0_BLUE*tmp_coords_jac_6_BLUE; + const real_t abs_det_jac_affine_BLUE = abs(tmp_coords_jac_5_BLUE); + { + /* FaceType.BLUE */ + for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1) + for (int64_t ctr_0 = 0; ctr_0 < -ctr_1 + micro_edges_per_macro_edge - 1; ctr_0 += 1) + { + + const int64_t phantom_ctr_0 = ctr_0; + real_t _data_float_loop_ctr_array_dim_0[4]; + _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0; + _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1; + _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2; + _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3; + real_t _data_float_loop_ctr_array_dim_1[4]; + _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1; + + const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; + const real_t cp_dof_0 = _data_cpVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + const real_t cp_dof_1 = _data_cpVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t cp_dof_2 = _data_cpVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]; + const real_t cp_dof_3 = _data_cpEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t cp_dof_4 = _data_cpEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]; + const real_t cp_dof_5 = _data_cpEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t ux_dof_0 = _data_uxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + const real_t ux_dof_1 = _data_uxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t ux_dof_2 = _data_uxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]; + const real_t ux_dof_3 = _data_uxEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t ux_dof_4 = _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]; + const real_t ux_dof_5 = _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t uy_dof_0 = _data_uyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + const real_t uy_dof_1 = _data_uyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t uy_dof_2 = _data_uyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]; + const real_t uy_dof_3 = _data_uyEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t uy_dof_4 = _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]; + const real_t uy_dof_5 = _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + real_t q_acc_0_0 = 0.0; + real_t q_acc_0_1 = 0.0; + real_t q_acc_0_2 = 0.0; + real_t q_acc_0_3 = 0.0; + real_t q_acc_0_4 = 0.0; + real_t q_acc_0_5 = 0.0; + real_t q_acc_1_0 = 0.0; + real_t q_acc_1_1 = 0.0; + real_t q_acc_1_2 = 0.0; + real_t q_acc_1_3 = 0.0; + real_t q_acc_1_4 = 0.0; + real_t q_acc_1_5 = 0.0; + real_t q_acc_2_0 = 0.0; + real_t q_acc_2_1 = 0.0; + real_t q_acc_2_2 = 0.0; + real_t q_acc_2_3 = 0.0; + real_t q_acc_2_4 = 0.0; + real_t q_acc_2_5 = 0.0; + real_t q_acc_3_0 = 0.0; + real_t q_acc_3_1 = 0.0; + real_t q_acc_3_2 = 0.0; + real_t q_acc_3_3 = 0.0; + real_t q_acc_3_4 = 0.0; + real_t q_acc_3_5 = 0.0; + real_t q_acc_4_0 = 0.0; + real_t q_acc_4_1 = 0.0; + real_t q_acc_4_2 = 0.0; + real_t q_acc_4_3 = 0.0; + real_t q_acc_4_4 = 0.0; + real_t q_acc_4_5 = 0.0; + real_t q_acc_5_0 = 0.0; + real_t q_acc_5_1 = 0.0; + real_t q_acc_5_2 = 0.0; + real_t q_acc_5_3 = 0.0; + real_t q_acc_5_4 = 0.0; + real_t q_acc_5_5 = 0.0; + for (int64_t q = 0; q < 4; q += 1) + { + const real_t tmp_qloop_0 = p_affine_0_0 + (-p_affine_0_0 + p_affine_1_0)*_data_q_p_0[q] + (-p_affine_0_0 + p_affine_2_0)*_data_q_p_1[q]; + const real_t tmp_qloop_2 = (tmp_qloop_0*tmp_qloop_0); + const real_t tmp_qloop_3 = p_affine_0_1 + (-p_affine_0_1 + p_affine_1_1)*_data_q_p_0[q] + (-p_affine_0_1 + p_affine_2_1)*_data_q_p_1[q]; + const real_t tmp_qloop_4 = (tmp_qloop_3*tmp_qloop_3); + const real_t tmp_qloop_5 = tmp_qloop_2 + tmp_qloop_4; + const real_t tmp_qloop_6 = pow(tmp_qloop_5, -0.50000000000000000); + const real_t tmp_qloop_13 = tmp_qloop_12*tmp_qloop_6; + const real_t tmp_qloop_14 = tmp_qloop_1*tmp_qloop_13; + const real_t tmp_qloop_15 = pow(tmp_qloop_5, -1.5000000000000000); + const real_t tmp_qloop_16 = radRayVertex + tmp_qloop_11*(tmp_qloop_1*(-rayVertex_0 + tmp_qloop_0) - tmp_qloop_8*(-rayVertex_1 + tmp_qloop_3)); + const real_t tmp_qloop_17 = tmp_qloop_15*tmp_qloop_16; + const real_t tmp_qloop_18 = tmp_qloop_17*1.0; + const real_t tmp_qloop_19 = tmp_qloop_13*tmp_qloop_8; + const real_t tmp_qloop_20 = tmp_qloop_0*tmp_qloop_18; + const real_t tmp_qloop_23 = tmp_qloop_12*tmp_qloop_15; + const real_t tmp_qloop_24 = tmp_qloop_1*tmp_qloop_23; + const real_t tmp_qloop_25 = tmp_qloop_2*tmp_qloop_24; + const real_t tmp_qloop_26 = tmp_qloop_16*pow(tmp_qloop_5, -2.5000000000000000)*3.0; + const real_t tmp_qloop_27 = tmp_qloop_0*tmp_qloop_26*tmp_qloop_4; + const real_t tmp_qloop_28 = tmp_qloop_24*tmp_qloop_4 - tmp_qloop_27; + const real_t tmp_qloop_29 = tmp_qloop_23*tmp_qloop_7; + const real_t tmp_qloop_30 = tmp_qloop_0*tmp_qloop_3; + const real_t tmp_qloop_31 = tmp_qloop_24*tmp_qloop_30; + const real_t tmp_qloop_32 = tmp_qloop_17*2.0; + const real_t tmp_qloop_33 = tmp_qloop_2*tmp_qloop_26*tmp_qloop_3; + const real_t tmp_qloop_34 = tmp_qloop_18*tmp_qloop_3 - tmp_qloop_33; + const real_t tmp_qloop_35 = tmp_qloop_29*tmp_qloop_30; + const real_t tmp_qloop_36 = tmp_qloop_23*tmp_qloop_30*tmp_qloop_8; + const real_t tmp_qloop_37 = 4.0*_data_q_p_0[q]; + const real_t tmp_qloop_38 = 4.0*_data_q_p_1[q]; + const real_t tmp_qloop_39 = tmp_qloop_37 + tmp_qloop_38 - 3.0; + const real_t tmp_qloop_40 = jac_affine_inv_0_0_BLUE*tmp_qloop_39 + jac_affine_inv_1_0_BLUE*tmp_qloop_39; + const real_t tmp_qloop_41 = jac_affine_inv_0_1_BLUE*tmp_qloop_39 + jac_affine_inv_1_1_BLUE*tmp_qloop_39; + const real_t tmp_qloop_42 = tmp_qloop_37*_data_q_p_1[q]; + const real_t tmp_qloop_43 = (_data_q_p_0[q]*_data_q_p_0[q]); + const real_t tmp_qloop_44 = tmp_qloop_43*2.0; + const real_t tmp_qloop_45 = tmp_qloop_44 - _data_q_p_0[q]; + const real_t tmp_qloop_46 = (_data_q_p_1[q]*_data_q_p_1[q]); + const real_t tmp_qloop_47 = tmp_qloop_46*2.0; + const real_t tmp_qloop_48 = tmp_qloop_47 - _data_q_p_1[q]; + const real_t tmp_qloop_49 = tmp_qloop_38 - tmp_qloop_42 + tmp_qloop_46*-4.0; + const real_t tmp_qloop_50 = tmp_qloop_37 - tmp_qloop_42 + tmp_qloop_43*-4.0; + const real_t tmp_qloop_51 = tmp_qloop_42 + tmp_qloop_44 + tmp_qloop_47 - 3.0*_data_q_p_0[q] - 3.0*_data_q_p_1[q] + 1.0; + const real_t tmp_qloop_52 = tmp_qloop_42*ux_dof_3 + tmp_qloop_45*ux_dof_1 + tmp_qloop_48*ux_dof_2 + tmp_qloop_49*ux_dof_4 + tmp_qloop_50*ux_dof_5 + tmp_qloop_51*ux_dof_0; + const real_t tmp_qloop_53 = tmp_qloop_42*uy_dof_3 + tmp_qloop_45*uy_dof_1 + tmp_qloop_48*uy_dof_2 + tmp_qloop_49*uy_dof_4 + tmp_qloop_50*uy_dof_5 + tmp_qloop_51*uy_dof_0; + const real_t tmp_qloop_57 = tmp_qloop_37 - 1.0; + const real_t tmp_qloop_58 = jac_affine_inv_0_0_BLUE*tmp_qloop_57; + const real_t tmp_qloop_59 = jac_affine_inv_0_1_BLUE*tmp_qloop_57; + const real_t tmp_qloop_61 = tmp_qloop_38 - 1.0; + const real_t tmp_qloop_62 = jac_affine_inv_1_0_BLUE*tmp_qloop_61; + const real_t tmp_qloop_63 = jac_affine_inv_1_1_BLUE*tmp_qloop_61; + const real_t tmp_qloop_65 = jac_affine_inv_1_0_BLUE*tmp_qloop_37; + const real_t tmp_qloop_66 = jac_affine_inv_0_0_BLUE*tmp_qloop_38; + const real_t tmp_qloop_67 = tmp_qloop_65 + tmp_qloop_66; + const real_t tmp_qloop_68 = jac_affine_inv_1_1_BLUE*tmp_qloop_37; + const real_t tmp_qloop_69 = jac_affine_inv_0_1_BLUE*tmp_qloop_38; + const real_t tmp_qloop_70 = tmp_qloop_68 + tmp_qloop_69; + const real_t tmp_qloop_72 = -tmp_qloop_37 - 8.0*_data_q_p_1[q] + 4.0; + const real_t tmp_qloop_73 = jac_affine_inv_1_0_BLUE*tmp_qloop_72 - tmp_qloop_66; + const real_t tmp_qloop_74 = jac_affine_inv_1_1_BLUE*tmp_qloop_72 - tmp_qloop_69; + const real_t tmp_qloop_76 = -tmp_qloop_38 - 8.0*_data_q_p_0[q] + 4.0; + const real_t tmp_qloop_77 = jac_affine_inv_0_0_BLUE*tmp_qloop_76 - tmp_qloop_65; + const real_t tmp_qloop_78 = jac_affine_inv_0_1_BLUE*tmp_qloop_76 - tmp_qloop_68; + const real_t jac_blending_0_0 = tmp_qloop_0*tmp_qloop_14 + tmp_qloop_18*tmp_qloop_4; + const real_t jac_blending_0_1 = -tmp_qloop_0*tmp_qloop_15*tmp_qloop_16*tmp_qloop_3 - tmp_qloop_0*tmp_qloop_19; + const real_t jac_blending_1_0 = tmp_qloop_14*tmp_qloop_3 - tmp_qloop_20*tmp_qloop_3; + const real_t jac_blending_1_1 = tmp_qloop_15*tmp_qloop_16*tmp_qloop_2*1.0 - tmp_qloop_19*tmp_qloop_3; + const real_t tmp_qloop_21 = jac_blending_0_0*jac_blending_1_1 - jac_blending_0_1*jac_blending_1_0; + const real_t tmp_qloop_22 = 1.0 / (tmp_qloop_21); + const real_t abs_det_jac_blending = tmp_qloop_21; + const real_t tmp_qloop_55 = abs_det_jac_affine_BLUE*abs_det_jac_blending*(cp_dof_0*tmp_qloop_51 + cp_dof_1*tmp_qloop_45 + cp_dof_2*tmp_qloop_48 + cp_dof_3*tmp_qloop_42 + cp_dof_4*tmp_qloop_49 + cp_dof_5*tmp_qloop_50)*_data_q_w[q]; + const real_t tmp_qloop_56 = tmp_qloop_51*tmp_qloop_55; + const real_t tmp_qloop_80 = tmp_qloop_45*tmp_qloop_55; + const real_t tmp_qloop_81 = tmp_qloop_48*tmp_qloop_55; + const real_t tmp_qloop_82 = tmp_qloop_42*tmp_qloop_55; + const real_t tmp_qloop_83 = tmp_qloop_49*tmp_qloop_55; + const real_t tmp_qloop_84 = tmp_qloop_50*tmp_qloop_55; + const real_t jac_blending_inv_0_0 = jac_blending_1_1*tmp_qloop_22; + const real_t jac_blending_inv_0_1 = -jac_blending_0_1*tmp_qloop_22; + const real_t jac_blending_inv_1_0 = -jac_blending_1_0*tmp_qloop_22; + const real_t jac_blending_inv_1_1 = jac_blending_0_0*tmp_qloop_22; + const real_t tmp_qloop_54 = tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_40 + jac_blending_inv_1_0*tmp_qloop_41) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_40 + jac_blending_inv_1_1*tmp_qloop_41); + const real_t tmp_qloop_60 = tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_58 + jac_blending_inv_1_0*tmp_qloop_59) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_58 + jac_blending_inv_1_1*tmp_qloop_59); + const real_t tmp_qloop_64 = tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_62 + jac_blending_inv_1_0*tmp_qloop_63) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_62 + jac_blending_inv_1_1*tmp_qloop_63); + const real_t tmp_qloop_71 = tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_67 + jac_blending_inv_1_0*tmp_qloop_70) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_67 + jac_blending_inv_1_1*tmp_qloop_70); + const real_t tmp_qloop_75 = tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_73 + jac_blending_inv_1_0*tmp_qloop_74) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_73 + jac_blending_inv_1_1*tmp_qloop_74); + const real_t tmp_qloop_79 = tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_77 + jac_blending_inv_1_0*tmp_qloop_78) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_77 + jac_blending_inv_1_1*tmp_qloop_78); + const real_t hessian_blending_0_0_0 = tmp_qloop_14 - tmp_qloop_25 + tmp_qloop_28; + const real_t hessian_blending_1_0_0 = -tmp_qloop_26*(tmp_qloop_3*tmp_qloop_3*tmp_qloop_3) + tmp_qloop_29*tmp_qloop_4 + tmp_qloop_3*tmp_qloop_32 - tmp_qloop_31; + const real_t hessian_blending_0_0_1 = tmp_qloop_1*tmp_qloop_11*tmp_qloop_15*tmp_qloop_30*-2.0 - tmp_qloop_34; + const real_t hessian_blending_1_0_1 = tmp_qloop_1*tmp_qloop_10*tmp_qloop_6*tmp_qloop_9*1.0 - tmp_qloop_20 - tmp_qloop_28 - tmp_qloop_35; + const real_t hessian_blending_0_1_0 = tmp_qloop_10*tmp_qloop_15*tmp_qloop_2*tmp_qloop_8*tmp_qloop_9*1.0 - tmp_qloop_19 - tmp_qloop_31 - tmp_qloop_34; + const real_t hessian_blending_1_1_0 = -tmp_qloop_20 + tmp_qloop_27 - tmp_qloop_35 + tmp_qloop_36; + const real_t hessian_blending_0_1_1 = tmp_qloop_0*tmp_qloop_32 - (tmp_qloop_0*tmp_qloop_0*tmp_qloop_0)*tmp_qloop_26 + tmp_qloop_25 + tmp_qloop_36; + const real_t hessian_blending_1_1_1 = tmp_qloop_10*tmp_qloop_15*tmp_qloop_2*tmp_qloop_7*tmp_qloop_9*1.0 + tmp_qloop_10*tmp_qloop_15*tmp_qloop_4*tmp_qloop_8*tmp_qloop_9*1.0 - tmp_qloop_19 - tmp_qloop_33; + const real_t q_tmp_0_0 = tmp_qloop_54*tmp_qloop_56; + const real_t q_tmp_0_1 = tmp_qloop_56*tmp_qloop_60; + const real_t q_tmp_0_2 = tmp_qloop_56*tmp_qloop_64; + const real_t q_tmp_0_3 = tmp_qloop_56*tmp_qloop_71; + const real_t q_tmp_0_4 = tmp_qloop_56*tmp_qloop_75; + const real_t q_tmp_0_5 = tmp_qloop_56*tmp_qloop_79; + const real_t q_tmp_1_0 = tmp_qloop_54*tmp_qloop_80; + const real_t q_tmp_1_1 = tmp_qloop_60*tmp_qloop_80; + const real_t q_tmp_1_2 = tmp_qloop_64*tmp_qloop_80; + const real_t q_tmp_1_3 = tmp_qloop_71*tmp_qloop_80; + const real_t q_tmp_1_4 = tmp_qloop_75*tmp_qloop_80; + const real_t q_tmp_1_5 = tmp_qloop_79*tmp_qloop_80; + const real_t q_tmp_2_0 = tmp_qloop_54*tmp_qloop_81; + const real_t q_tmp_2_1 = tmp_qloop_60*tmp_qloop_81; + const real_t q_tmp_2_2 = tmp_qloop_64*tmp_qloop_81; + const real_t q_tmp_2_3 = tmp_qloop_71*tmp_qloop_81; + const real_t q_tmp_2_4 = tmp_qloop_75*tmp_qloop_81; + const real_t q_tmp_2_5 = tmp_qloop_79*tmp_qloop_81; + const real_t q_tmp_3_0 = tmp_qloop_54*tmp_qloop_82; + const real_t q_tmp_3_1 = tmp_qloop_60*tmp_qloop_82; + const real_t q_tmp_3_2 = tmp_qloop_64*tmp_qloop_82; + const real_t q_tmp_3_3 = tmp_qloop_71*tmp_qloop_82; + const real_t q_tmp_3_4 = tmp_qloop_75*tmp_qloop_82; + const real_t q_tmp_3_5 = tmp_qloop_79*tmp_qloop_82; + const real_t q_tmp_4_0 = tmp_qloop_54*tmp_qloop_83; + const real_t q_tmp_4_1 = tmp_qloop_60*tmp_qloop_83; + const real_t q_tmp_4_2 = tmp_qloop_64*tmp_qloop_83; + const real_t q_tmp_4_3 = tmp_qloop_71*tmp_qloop_83; + const real_t q_tmp_4_4 = tmp_qloop_75*tmp_qloop_83; + const real_t q_tmp_4_5 = tmp_qloop_79*tmp_qloop_83; + const real_t q_tmp_5_0 = tmp_qloop_54*tmp_qloop_84; + const real_t q_tmp_5_1 = tmp_qloop_60*tmp_qloop_84; + const real_t q_tmp_5_2 = tmp_qloop_64*tmp_qloop_84; + const real_t q_tmp_5_3 = tmp_qloop_71*tmp_qloop_84; + const real_t q_tmp_5_4 = tmp_qloop_75*tmp_qloop_84; + const real_t q_tmp_5_5 = tmp_qloop_79*tmp_qloop_84; + q_acc_0_0 = q_acc_0_0 + q_tmp_0_0; + q_acc_0_1 = q_acc_0_1 + q_tmp_0_1; + q_acc_0_2 = q_acc_0_2 + q_tmp_0_2; + q_acc_0_3 = q_acc_0_3 + q_tmp_0_3; + q_acc_0_4 = q_acc_0_4 + q_tmp_0_4; + q_acc_0_5 = q_acc_0_5 + q_tmp_0_5; + q_acc_1_0 = q_acc_1_0 + q_tmp_1_0; + q_acc_1_1 = q_acc_1_1 + q_tmp_1_1; + q_acc_1_2 = q_acc_1_2 + q_tmp_1_2; + q_acc_1_3 = q_acc_1_3 + q_tmp_1_3; + q_acc_1_4 = q_acc_1_4 + q_tmp_1_4; + q_acc_1_5 = q_acc_1_5 + q_tmp_1_5; + q_acc_2_0 = q_acc_2_0 + q_tmp_2_0; + q_acc_2_1 = q_acc_2_1 + q_tmp_2_1; + q_acc_2_2 = q_acc_2_2 + q_tmp_2_2; + q_acc_2_3 = q_acc_2_3 + q_tmp_2_3; + q_acc_2_4 = q_acc_2_4 + q_tmp_2_4; + q_acc_2_5 = q_acc_2_5 + q_tmp_2_5; + q_acc_3_0 = q_acc_3_0 + q_tmp_3_0; + q_acc_3_1 = q_acc_3_1 + q_tmp_3_1; + q_acc_3_2 = q_acc_3_2 + q_tmp_3_2; + q_acc_3_3 = q_acc_3_3 + q_tmp_3_3; + q_acc_3_4 = q_acc_3_4 + q_tmp_3_4; + q_acc_3_5 = q_acc_3_5 + q_tmp_3_5; + q_acc_4_0 = q_acc_4_0 + q_tmp_4_0; + q_acc_4_1 = q_acc_4_1 + q_tmp_4_1; + q_acc_4_2 = q_acc_4_2 + q_tmp_4_2; + q_acc_4_3 = q_acc_4_3 + q_tmp_4_3; + q_acc_4_4 = q_acc_4_4 + q_tmp_4_4; + q_acc_4_5 = q_acc_4_5 + q_tmp_4_5; + q_acc_5_0 = q_acc_5_0 + q_tmp_5_0; + q_acc_5_1 = q_acc_5_1 + q_tmp_5_1; + q_acc_5_2 = q_acc_5_2 + q_tmp_5_2; + q_acc_5_3 = q_acc_5_3 + q_tmp_5_3; + q_acc_5_4 = q_acc_5_4 + q_tmp_5_4; + q_acc_5_5 = q_acc_5_5 + q_tmp_5_5; + } + const real_t elMat_0_0 = q_acc_0_0; + const real_t elMat_0_1 = q_acc_0_1; + const real_t elMat_0_2 = q_acc_0_2; + const real_t elMat_0_3 = q_acc_0_3; + const real_t elMat_0_4 = q_acc_0_4; + const real_t elMat_0_5 = q_acc_0_5; + const real_t elMat_1_0 = q_acc_1_0; + const real_t elMat_1_1 = q_acc_1_1; + const real_t elMat_1_2 = q_acc_1_2; + const real_t elMat_1_3 = q_acc_1_3; + const real_t elMat_1_4 = q_acc_1_4; + const real_t elMat_1_5 = q_acc_1_5; + const real_t elMat_2_0 = q_acc_2_0; + const real_t elMat_2_1 = q_acc_2_1; + const real_t elMat_2_2 = q_acc_2_2; + const real_t elMat_2_3 = q_acc_2_3; + const real_t elMat_2_4 = q_acc_2_4; + const real_t elMat_2_5 = q_acc_2_5; + const real_t elMat_3_0 = q_acc_3_0; + const real_t elMat_3_1 = q_acc_3_1; + const real_t elMat_3_2 = q_acc_3_2; + const real_t elMat_3_3 = q_acc_3_3; + const real_t elMat_3_4 = q_acc_3_4; + const real_t elMat_3_5 = q_acc_3_5; + const real_t elMat_4_0 = q_acc_4_0; + const real_t elMat_4_1 = q_acc_4_1; + const real_t elMat_4_2 = q_acc_4_2; + const real_t elMat_4_3 = q_acc_4_3; + const real_t elMat_4_4 = q_acc_4_4; + const real_t elMat_4_5 = q_acc_4_5; + const real_t elMat_5_0 = q_acc_5_0; + const real_t elMat_5_1 = q_acc_5_1; + const real_t elMat_5_2 = q_acc_5_2; + const real_t elMat_5_3 = q_acc_5_3; + const real_t elMat_5_4 = q_acc_5_4; + const real_t elMat_5_5 = q_acc_5_5; + + std::vector< uint_t > _data_rowIdx( 6 ); + std::vector< uint_t > _data_colIdx( 6 ); + std::vector< real_t > _data_mat( 36 ); + + _data_rowIdx[0] = ((uint64_t)(_data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1])); + _data_rowIdx[1] = ((uint64_t)(_data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))])); + _data_rowIdx[2] = ((uint64_t)(_data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1])); + _data_rowIdx[3] = ((uint64_t)(_data_dstEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))])); + _data_rowIdx[4] = ((uint64_t)(_data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1])); + _data_rowIdx[5] = ((uint64_t)(_data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))])); + _data_colIdx[0] = ((uint64_t)(_data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1])); + _data_colIdx[1] = ((uint64_t)(_data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))])); + _data_colIdx[2] = ((uint64_t)(_data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1])); + _data_colIdx[3] = ((uint64_t)(_data_srcEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))])); + _data_colIdx[4] = ((uint64_t)(_data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1])); + _data_colIdx[5] = ((uint64_t)(_data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))])); + + /* Apply basis transformation */ + + + + _data_mat[0] = ((real_t)(elMat_0_0)); + _data_mat[1] = ((real_t)(elMat_0_1)); + _data_mat[2] = ((real_t)(elMat_0_2)); + _data_mat[3] = ((real_t)(elMat_0_3)); + _data_mat[4] = ((real_t)(elMat_0_4)); + _data_mat[5] = ((real_t)(elMat_0_5)); + _data_mat[6] = ((real_t)(elMat_1_0)); + _data_mat[7] = ((real_t)(elMat_1_1)); + _data_mat[8] = ((real_t)(elMat_1_2)); + _data_mat[9] = ((real_t)(elMat_1_3)); + _data_mat[10] = ((real_t)(elMat_1_4)); + _data_mat[11] = ((real_t)(elMat_1_5)); + _data_mat[12] = ((real_t)(elMat_2_0)); + _data_mat[13] = ((real_t)(elMat_2_1)); + _data_mat[14] = ((real_t)(elMat_2_2)); + _data_mat[15] = ((real_t)(elMat_2_3)); + _data_mat[16] = ((real_t)(elMat_2_4)); + _data_mat[17] = ((real_t)(elMat_2_5)); + _data_mat[18] = ((real_t)(elMat_3_0)); + _data_mat[19] = ((real_t)(elMat_3_1)); + _data_mat[20] = ((real_t)(elMat_3_2)); + _data_mat[21] = ((real_t)(elMat_3_3)); + _data_mat[22] = ((real_t)(elMat_3_4)); + _data_mat[23] = ((real_t)(elMat_3_5)); + _data_mat[24] = ((real_t)(elMat_4_0)); + _data_mat[25] = ((real_t)(elMat_4_1)); + _data_mat[26] = ((real_t)(elMat_4_2)); + _data_mat[27] = ((real_t)(elMat_4_3)); + _data_mat[28] = ((real_t)(elMat_4_4)); + _data_mat[29] = ((real_t)(elMat_4_5)); + _data_mat[30] = ((real_t)(elMat_5_0)); + _data_mat[31] = ((real_t)(elMat_5_1)); + _data_mat[32] = ((real_t)(elMat_5_2)); + _data_mat[33] = ((real_t)(elMat_5_3)); + _data_mat[34] = ((real_t)(elMat_5_4)); + _data_mat[35] = ((real_t)(elMat_5_5)); + + + mat->addValues( _data_rowIdx, _data_colIdx, _data_mat ); + } + } + } +} +} // namespace operatorgeneration + +} // namespace hyteg diff --git a/operators/advection/noarch/P2ElementwiseAdvection_apply_P2ElementwiseAdvection_macro_2D.cpp b/operators/advection/noarch/P2ElementwiseAdvection_apply_P2ElementwiseAdvection_macro_2D.cpp new file mode 100644 index 0000000000000000000000000000000000000000..4e303a9e31626d1266b498a1ce2faee2367ff3b5 --- /dev/null +++ b/operators/advection/noarch/P2ElementwiseAdvection_apply_P2ElementwiseAdvection_macro_2D.cpp @@ -0,0 +1,534 @@ +/* +* Copyright (c) 2017-2024 Nils Kohl, Daniel Bauer, Fabian Böhm. +* +* This file is part of HyTeG +* (see https://i10git.cs.fau.de/hyteg/hyteg). +* +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. +* +* You should have received a copy of the GNU General Public License +* along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +/* +* The entire file was generated with the HyTeG Operator Generator. +* +* Avoid modifying this file. If buggy, consider fixing the generator itself. +*/ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +#include "../P2ElementwiseAdvection.hpp" + +#define FUNC_PREFIX + +namespace hyteg { + +namespace operatorgeneration { + +void P2ElementwiseAdvection::apply_P2ElementwiseAdvection_macro_2D( real_t * RESTRICT _data_cpEdge, real_t * RESTRICT _data_cpVertex, real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t * RESTRICT _data_uxEdge, real_t * RESTRICT _data_uxVertex, real_t * RESTRICT _data_uyEdge, real_t * RESTRICT _data_uyVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +{ + { + const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669}; + + const real_t _data_q_p_0 [] = {0.33333333333333331, 0.20000000000000001, 0.59999999999999998, 0.20000000000000001}; + + const real_t _data_q_p_1 [] = {0.33333333333333331, 0.59999999999999998, 0.20000000000000001, 0.20000000000000001}; + + const real_t tmp_coords_jac_0_GRAY = 1.0 / (micro_edges_per_macro_edge_float)*1.0; + const real_t p_affine_const_0_0_GRAY = macro_vertex_coord_id_0comp0; + const real_t p_affine_const_0_1_GRAY = macro_vertex_coord_id_0comp1; + const real_t p_affine_const_1_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0); + const real_t p_affine_const_1_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1); + const real_t p_affine_const_2_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0); + const real_t p_affine_const_2_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1); + const real_t jac_affine_0_0_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_1_0_GRAY; + const real_t jac_affine_0_1_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_2_0_GRAY; + const real_t jac_affine_1_0_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_1_1_GRAY; + const real_t jac_affine_1_1_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_2_1_GRAY; + const real_t tmp_coords_jac_1_GRAY = jac_affine_0_0_GRAY*jac_affine_1_1_GRAY - jac_affine_0_1_GRAY*jac_affine_1_0_GRAY; + const real_t tmp_coords_jac_2_GRAY = 1.0 / (tmp_coords_jac_1_GRAY); + const real_t jac_affine_inv_0_0_GRAY = jac_affine_1_1_GRAY*tmp_coords_jac_2_GRAY; + const real_t jac_affine_inv_0_1_GRAY = -jac_affine_0_1_GRAY*tmp_coords_jac_2_GRAY; + const real_t jac_affine_inv_1_0_GRAY = -jac_affine_1_0_GRAY*tmp_coords_jac_2_GRAY; + const real_t jac_affine_inv_1_1_GRAY = jac_affine_0_0_GRAY*tmp_coords_jac_2_GRAY; + const real_t abs_det_jac_affine_GRAY = abs(tmp_coords_jac_1_GRAY); + { + /* FaceType.GRAY */ + for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1) + for (int64_t ctr_0 = 0; ctr_0 < -ctr_1 + micro_edges_per_macro_edge; ctr_0 += 1) + { + + const int64_t phantom_ctr_0 = ctr_0; + real_t _data_float_loop_ctr_array_dim_0[4]; + _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0; + _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1; + _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2; + _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3; + real_t _data_float_loop_ctr_array_dim_1[4]; + _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1; + + const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; + const real_t src_dof_0 = _data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]; + const real_t src_dof_1 = _data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + const real_t src_dof_2 = _data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t src_dof_3 = _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t src_dof_4 = _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t src_dof_5 = _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]; + const real_t cp_dof_0 = _data_cpVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]; + const real_t cp_dof_1 = _data_cpVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + const real_t cp_dof_2 = _data_cpVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t cp_dof_3 = _data_cpEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t cp_dof_4 = _data_cpEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t cp_dof_5 = _data_cpEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]; + const real_t ux_dof_0 = _data_uxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]; + const real_t ux_dof_1 = _data_uxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + const real_t ux_dof_2 = _data_uxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t ux_dof_3 = _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t ux_dof_4 = _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t ux_dof_5 = _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]; + const real_t uy_dof_0 = _data_uyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]; + const real_t uy_dof_1 = _data_uyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + const real_t uy_dof_2 = _data_uyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t uy_dof_3 = _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t uy_dof_4 = _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t uy_dof_5 = _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]; + real_t q_acc_0_0 = 0.0; + real_t q_acc_0_1 = 0.0; + real_t q_acc_0_2 = 0.0; + real_t q_acc_0_3 = 0.0; + real_t q_acc_0_4 = 0.0; + real_t q_acc_0_5 = 0.0; + real_t q_acc_1_0 = 0.0; + real_t q_acc_1_1 = 0.0; + real_t q_acc_1_2 = 0.0; + real_t q_acc_1_3 = 0.0; + real_t q_acc_1_4 = 0.0; + real_t q_acc_1_5 = 0.0; + real_t q_acc_2_0 = 0.0; + real_t q_acc_2_1 = 0.0; + real_t q_acc_2_2 = 0.0; + real_t q_acc_2_3 = 0.0; + real_t q_acc_2_4 = 0.0; + real_t q_acc_2_5 = 0.0; + real_t q_acc_3_0 = 0.0; + real_t q_acc_3_1 = 0.0; + real_t q_acc_3_2 = 0.0; + real_t q_acc_3_3 = 0.0; + real_t q_acc_3_4 = 0.0; + real_t q_acc_3_5 = 0.0; + real_t q_acc_4_0 = 0.0; + real_t q_acc_4_1 = 0.0; + real_t q_acc_4_2 = 0.0; + real_t q_acc_4_3 = 0.0; + real_t q_acc_4_4 = 0.0; + real_t q_acc_4_5 = 0.0; + real_t q_acc_5_0 = 0.0; + real_t q_acc_5_1 = 0.0; + real_t q_acc_5_2 = 0.0; + real_t q_acc_5_3 = 0.0; + real_t q_acc_5_4 = 0.0; + real_t q_acc_5_5 = 0.0; + for (int64_t q = 0; q < 4; q += 1) + { + const real_t tmp_qloop_0 = 4.0*_data_q_p_0[q]; + const real_t tmp_qloop_1 = 4.0*_data_q_p_1[q]; + const real_t tmp_qloop_2 = tmp_qloop_0 + tmp_qloop_1 - 3.0; + const real_t tmp_qloop_3 = tmp_qloop_0*_data_q_p_1[q]; + const real_t tmp_qloop_4 = (_data_q_p_0[q]*_data_q_p_0[q]); + const real_t tmp_qloop_5 = tmp_qloop_4*2.0; + const real_t tmp_qloop_6 = tmp_qloop_5 - _data_q_p_0[q]; + const real_t tmp_qloop_7 = (_data_q_p_1[q]*_data_q_p_1[q]); + const real_t tmp_qloop_8 = tmp_qloop_7*2.0; + const real_t tmp_qloop_9 = tmp_qloop_8 - _data_q_p_1[q]; + const real_t tmp_qloop_10 = tmp_qloop_1 - tmp_qloop_3 + tmp_qloop_7*-4.0; + const real_t tmp_qloop_11 = tmp_qloop_0 - tmp_qloop_3 + tmp_qloop_4*-4.0; + const real_t tmp_qloop_12 = tmp_qloop_3 + tmp_qloop_5 + tmp_qloop_8 - 3.0*_data_q_p_0[q] - 3.0*_data_q_p_1[q] + 1.0; + const real_t tmp_qloop_13 = tmp_qloop_10*ux_dof_4 + tmp_qloop_11*ux_dof_5 + tmp_qloop_12*ux_dof_0 + tmp_qloop_3*ux_dof_3 + tmp_qloop_6*ux_dof_1 + tmp_qloop_9*ux_dof_2; + const real_t tmp_qloop_14 = tmp_qloop_10*uy_dof_4 + tmp_qloop_11*uy_dof_5 + tmp_qloop_12*uy_dof_0 + tmp_qloop_3*uy_dof_3 + tmp_qloop_6*uy_dof_1 + tmp_qloop_9*uy_dof_2; + const real_t tmp_qloop_15 = tmp_qloop_13*(jac_affine_inv_0_0_GRAY*tmp_qloop_2 + jac_affine_inv_1_0_GRAY*tmp_qloop_2) + tmp_qloop_14*(jac_affine_inv_0_1_GRAY*tmp_qloop_2 + jac_affine_inv_1_1_GRAY*tmp_qloop_2); + const real_t tmp_qloop_16 = abs_det_jac_affine_GRAY*(cp_dof_0*tmp_qloop_12 + cp_dof_1*tmp_qloop_6 + cp_dof_2*tmp_qloop_9 + cp_dof_3*tmp_qloop_3 + cp_dof_4*tmp_qloop_10 + cp_dof_5*tmp_qloop_11)*_data_q_w[q]; + const real_t tmp_qloop_17 = tmp_qloop_12*tmp_qloop_16; + const real_t tmp_qloop_18 = tmp_qloop_0 - 1.0; + const real_t tmp_qloop_19 = jac_affine_inv_0_0_GRAY*tmp_qloop_13*tmp_qloop_18 + jac_affine_inv_0_1_GRAY*tmp_qloop_14*tmp_qloop_18; + const real_t tmp_qloop_20 = tmp_qloop_1 - 1.0; + const real_t tmp_qloop_21 = jac_affine_inv_1_0_GRAY*tmp_qloop_13*tmp_qloop_20 + jac_affine_inv_1_1_GRAY*tmp_qloop_14*tmp_qloop_20; + const real_t tmp_qloop_22 = jac_affine_inv_1_0_GRAY*tmp_qloop_0; + const real_t tmp_qloop_23 = jac_affine_inv_0_0_GRAY*tmp_qloop_1; + const real_t tmp_qloop_24 = jac_affine_inv_1_1_GRAY*tmp_qloop_0; + const real_t tmp_qloop_25 = jac_affine_inv_0_1_GRAY*tmp_qloop_1; + const real_t tmp_qloop_26 = tmp_qloop_13*(tmp_qloop_22 + tmp_qloop_23) + tmp_qloop_14*(tmp_qloop_24 + tmp_qloop_25); + const real_t tmp_qloop_27 = -tmp_qloop_0 - 8.0*_data_q_p_1[q] + 4.0; + const real_t tmp_qloop_28 = tmp_qloop_13*(jac_affine_inv_1_0_GRAY*tmp_qloop_27 - tmp_qloop_23) + tmp_qloop_14*(jac_affine_inv_1_1_GRAY*tmp_qloop_27 - tmp_qloop_25); + const real_t tmp_qloop_29 = -tmp_qloop_1 - 8.0*_data_q_p_0[q] + 4.0; + const real_t tmp_qloop_30 = tmp_qloop_13*(jac_affine_inv_0_0_GRAY*tmp_qloop_29 - tmp_qloop_22) + tmp_qloop_14*(jac_affine_inv_0_1_GRAY*tmp_qloop_29 - tmp_qloop_24); + const real_t tmp_qloop_31 = tmp_qloop_16*tmp_qloop_6; + const real_t tmp_qloop_32 = tmp_qloop_16*tmp_qloop_9; + const real_t tmp_qloop_33 = tmp_qloop_16*tmp_qloop_3; + const real_t tmp_qloop_34 = tmp_qloop_10*tmp_qloop_16; + const real_t tmp_qloop_35 = tmp_qloop_11*tmp_qloop_16; + const real_t q_tmp_0_0 = tmp_qloop_15*tmp_qloop_17; + const real_t q_tmp_0_1 = tmp_qloop_17*tmp_qloop_19; + const real_t q_tmp_0_2 = tmp_qloop_17*tmp_qloop_21; + const real_t q_tmp_0_3 = tmp_qloop_17*tmp_qloop_26; + const real_t q_tmp_0_4 = tmp_qloop_17*tmp_qloop_28; + const real_t q_tmp_0_5 = tmp_qloop_17*tmp_qloop_30; + const real_t q_tmp_1_0 = tmp_qloop_15*tmp_qloop_31; + const real_t q_tmp_1_1 = tmp_qloop_19*tmp_qloop_31; + const real_t q_tmp_1_2 = tmp_qloop_21*tmp_qloop_31; + const real_t q_tmp_1_3 = tmp_qloop_26*tmp_qloop_31; + const real_t q_tmp_1_4 = tmp_qloop_28*tmp_qloop_31; + const real_t q_tmp_1_5 = tmp_qloop_30*tmp_qloop_31; + const real_t q_tmp_2_0 = tmp_qloop_15*tmp_qloop_32; + const real_t q_tmp_2_1 = tmp_qloop_19*tmp_qloop_32; + const real_t q_tmp_2_2 = tmp_qloop_21*tmp_qloop_32; + const real_t q_tmp_2_3 = tmp_qloop_26*tmp_qloop_32; + const real_t q_tmp_2_4 = tmp_qloop_28*tmp_qloop_32; + const real_t q_tmp_2_5 = tmp_qloop_30*tmp_qloop_32; + const real_t q_tmp_3_0 = tmp_qloop_15*tmp_qloop_33; + const real_t q_tmp_3_1 = tmp_qloop_19*tmp_qloop_33; + const real_t q_tmp_3_2 = tmp_qloop_21*tmp_qloop_33; + const real_t q_tmp_3_3 = tmp_qloop_26*tmp_qloop_33; + const real_t q_tmp_3_4 = tmp_qloop_28*tmp_qloop_33; + const real_t q_tmp_3_5 = tmp_qloop_30*tmp_qloop_33; + const real_t q_tmp_4_0 = tmp_qloop_15*tmp_qloop_34; + const real_t q_tmp_4_1 = tmp_qloop_19*tmp_qloop_34; + const real_t q_tmp_4_2 = tmp_qloop_21*tmp_qloop_34; + const real_t q_tmp_4_3 = tmp_qloop_26*tmp_qloop_34; + const real_t q_tmp_4_4 = tmp_qloop_28*tmp_qloop_34; + const real_t q_tmp_4_5 = tmp_qloop_30*tmp_qloop_34; + const real_t q_tmp_5_0 = tmp_qloop_15*tmp_qloop_35; + const real_t q_tmp_5_1 = tmp_qloop_19*tmp_qloop_35; + const real_t q_tmp_5_2 = tmp_qloop_21*tmp_qloop_35; + const real_t q_tmp_5_3 = tmp_qloop_26*tmp_qloop_35; + const real_t q_tmp_5_4 = tmp_qloop_28*tmp_qloop_35; + const real_t q_tmp_5_5 = tmp_qloop_30*tmp_qloop_35; + q_acc_0_0 = q_acc_0_0 + q_tmp_0_0; + q_acc_0_1 = q_acc_0_1 + q_tmp_0_1; + q_acc_0_2 = q_acc_0_2 + q_tmp_0_2; + q_acc_0_3 = q_acc_0_3 + q_tmp_0_3; + q_acc_0_4 = q_acc_0_4 + q_tmp_0_4; + q_acc_0_5 = q_acc_0_5 + q_tmp_0_5; + q_acc_1_0 = q_acc_1_0 + q_tmp_1_0; + q_acc_1_1 = q_acc_1_1 + q_tmp_1_1; + q_acc_1_2 = q_acc_1_2 + q_tmp_1_2; + q_acc_1_3 = q_acc_1_3 + q_tmp_1_3; + q_acc_1_4 = q_acc_1_4 + q_tmp_1_4; + q_acc_1_5 = q_acc_1_5 + q_tmp_1_5; + q_acc_2_0 = q_acc_2_0 + q_tmp_2_0; + q_acc_2_1 = q_acc_2_1 + q_tmp_2_1; + q_acc_2_2 = q_acc_2_2 + q_tmp_2_2; + q_acc_2_3 = q_acc_2_3 + q_tmp_2_3; + q_acc_2_4 = q_acc_2_4 + q_tmp_2_4; + q_acc_2_5 = q_acc_2_5 + q_tmp_2_5; + q_acc_3_0 = q_acc_3_0 + q_tmp_3_0; + q_acc_3_1 = q_acc_3_1 + q_tmp_3_1; + q_acc_3_2 = q_acc_3_2 + q_tmp_3_2; + q_acc_3_3 = q_acc_3_3 + q_tmp_3_3; + q_acc_3_4 = q_acc_3_4 + q_tmp_3_4; + q_acc_3_5 = q_acc_3_5 + q_tmp_3_5; + q_acc_4_0 = q_acc_4_0 + q_tmp_4_0; + q_acc_4_1 = q_acc_4_1 + q_tmp_4_1; + q_acc_4_2 = q_acc_4_2 + q_tmp_4_2; + q_acc_4_3 = q_acc_4_3 + q_tmp_4_3; + q_acc_4_4 = q_acc_4_4 + q_tmp_4_4; + q_acc_4_5 = q_acc_4_5 + q_tmp_4_5; + q_acc_5_0 = q_acc_5_0 + q_tmp_5_0; + q_acc_5_1 = q_acc_5_1 + q_tmp_5_1; + q_acc_5_2 = q_acc_5_2 + q_tmp_5_2; + q_acc_5_3 = q_acc_5_3 + q_tmp_5_3; + q_acc_5_4 = q_acc_5_4 + q_tmp_5_4; + q_acc_5_5 = q_acc_5_5 + q_tmp_5_5; + } + const real_t elMatVec_0 = q_acc_0_0*src_dof_0 + q_acc_0_1*src_dof_1 + q_acc_0_2*src_dof_2 + q_acc_0_3*src_dof_3 + q_acc_0_4*src_dof_4 + q_acc_0_5*src_dof_5; + const real_t elMatVec_1 = q_acc_1_0*src_dof_0 + q_acc_1_1*src_dof_1 + q_acc_1_2*src_dof_2 + q_acc_1_3*src_dof_3 + q_acc_1_4*src_dof_4 + q_acc_1_5*src_dof_5; + const real_t elMatVec_2 = q_acc_2_0*src_dof_0 + q_acc_2_1*src_dof_1 + q_acc_2_2*src_dof_2 + q_acc_2_3*src_dof_3 + q_acc_2_4*src_dof_4 + q_acc_2_5*src_dof_5; + const real_t elMatVec_3 = q_acc_3_0*src_dof_0 + q_acc_3_1*src_dof_1 + q_acc_3_2*src_dof_2 + q_acc_3_3*src_dof_3 + q_acc_3_4*src_dof_4 + q_acc_3_5*src_dof_5; + const real_t elMatVec_4 = q_acc_4_0*src_dof_0 + q_acc_4_1*src_dof_1 + q_acc_4_2*src_dof_2 + q_acc_4_3*src_dof_3 + q_acc_4_4*src_dof_4 + q_acc_4_5*src_dof_5; + const real_t elMatVec_5 = q_acc_5_0*src_dof_0 + q_acc_5_1*src_dof_1 + q_acc_5_2*src_dof_2 + q_acc_5_3*src_dof_3 + q_acc_5_4*src_dof_4 + q_acc_5_5*src_dof_5; + _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))] = elMatVec_0 + _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]; + _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1] = elMatVec_1 + _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatVec_2 + _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))] = elMatVec_3 + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))] = elMatVec_4 + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))] = elMatVec_5 + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]; + } + } + const real_t tmp_coords_jac_0_BLUE = 1.0 / (micro_edges_per_macro_edge_float)*1.0; + const real_t tmp_coords_jac_1_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0); + const real_t tmp_coords_jac_2_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1); + const real_t tmp_coords_jac_3_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0); + const real_t tmp_coords_jac_4_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1); + const real_t p_affine_const_0_0_BLUE = tmp_coords_jac_1_BLUE; + const real_t p_affine_const_0_1_BLUE = tmp_coords_jac_2_BLUE; + const real_t p_affine_const_1_0_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_3_BLUE; + const real_t p_affine_const_1_1_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_4_BLUE; + const real_t p_affine_const_2_0_BLUE = tmp_coords_jac_1_BLUE + tmp_coords_jac_3_BLUE; + const real_t p_affine_const_2_1_BLUE = tmp_coords_jac_2_BLUE + tmp_coords_jac_4_BLUE; + const real_t jac_affine_0_0_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_1_0_BLUE; + const real_t jac_affine_0_1_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_2_0_BLUE; + const real_t jac_affine_1_0_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_1_1_BLUE; + const real_t jac_affine_1_1_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_2_1_BLUE; + const real_t tmp_coords_jac_5_BLUE = jac_affine_0_0_BLUE*jac_affine_1_1_BLUE - jac_affine_0_1_BLUE*jac_affine_1_0_BLUE; + const real_t tmp_coords_jac_6_BLUE = 1.0 / (tmp_coords_jac_5_BLUE); + const real_t jac_affine_inv_0_0_BLUE = jac_affine_1_1_BLUE*tmp_coords_jac_6_BLUE; + const real_t jac_affine_inv_0_1_BLUE = -jac_affine_0_1_BLUE*tmp_coords_jac_6_BLUE; + const real_t jac_affine_inv_1_0_BLUE = -jac_affine_1_0_BLUE*tmp_coords_jac_6_BLUE; + const real_t jac_affine_inv_1_1_BLUE = jac_affine_0_0_BLUE*tmp_coords_jac_6_BLUE; + const real_t abs_det_jac_affine_BLUE = abs(tmp_coords_jac_5_BLUE); + { + /* FaceType.BLUE */ + for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1) + for (int64_t ctr_0 = 0; ctr_0 < -ctr_1 + micro_edges_per_macro_edge - 1; ctr_0 += 1) + { + + const int64_t phantom_ctr_0 = ctr_0; + real_t _data_float_loop_ctr_array_dim_0[4]; + _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0; + _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1; + _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2; + _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3; + real_t _data_float_loop_ctr_array_dim_1[4]; + _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1; + + const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; + const real_t src_dof_0 = _data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + const real_t src_dof_1 = _data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t src_dof_2 = _data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]; + const real_t src_dof_3 = _data_srcEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t src_dof_4 = _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]; + const real_t src_dof_5 = _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t cp_dof_0 = _data_cpVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + const real_t cp_dof_1 = _data_cpVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t cp_dof_2 = _data_cpVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]; + const real_t cp_dof_3 = _data_cpEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t cp_dof_4 = _data_cpEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]; + const real_t cp_dof_5 = _data_cpEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t ux_dof_0 = _data_uxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + const real_t ux_dof_1 = _data_uxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t ux_dof_2 = _data_uxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]; + const real_t ux_dof_3 = _data_uxEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t ux_dof_4 = _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]; + const real_t ux_dof_5 = _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t uy_dof_0 = _data_uyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + const real_t uy_dof_1 = _data_uyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t uy_dof_2 = _data_uyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]; + const real_t uy_dof_3 = _data_uyEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t uy_dof_4 = _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]; + const real_t uy_dof_5 = _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + real_t q_acc_0_0 = 0.0; + real_t q_acc_0_1 = 0.0; + real_t q_acc_0_2 = 0.0; + real_t q_acc_0_3 = 0.0; + real_t q_acc_0_4 = 0.0; + real_t q_acc_0_5 = 0.0; + real_t q_acc_1_0 = 0.0; + real_t q_acc_1_1 = 0.0; + real_t q_acc_1_2 = 0.0; + real_t q_acc_1_3 = 0.0; + real_t q_acc_1_4 = 0.0; + real_t q_acc_1_5 = 0.0; + real_t q_acc_2_0 = 0.0; + real_t q_acc_2_1 = 0.0; + real_t q_acc_2_2 = 0.0; + real_t q_acc_2_3 = 0.0; + real_t q_acc_2_4 = 0.0; + real_t q_acc_2_5 = 0.0; + real_t q_acc_3_0 = 0.0; + real_t q_acc_3_1 = 0.0; + real_t q_acc_3_2 = 0.0; + real_t q_acc_3_3 = 0.0; + real_t q_acc_3_4 = 0.0; + real_t q_acc_3_5 = 0.0; + real_t q_acc_4_0 = 0.0; + real_t q_acc_4_1 = 0.0; + real_t q_acc_4_2 = 0.0; + real_t q_acc_4_3 = 0.0; + real_t q_acc_4_4 = 0.0; + real_t q_acc_4_5 = 0.0; + real_t q_acc_5_0 = 0.0; + real_t q_acc_5_1 = 0.0; + real_t q_acc_5_2 = 0.0; + real_t q_acc_5_3 = 0.0; + real_t q_acc_5_4 = 0.0; + real_t q_acc_5_5 = 0.0; + for (int64_t q = 0; q < 4; q += 1) + { + const real_t tmp_qloop_0 = 4.0*_data_q_p_0[q]; + const real_t tmp_qloop_1 = 4.0*_data_q_p_1[q]; + const real_t tmp_qloop_2 = tmp_qloop_0 + tmp_qloop_1 - 3.0; + const real_t tmp_qloop_3 = tmp_qloop_0*_data_q_p_1[q]; + const real_t tmp_qloop_4 = (_data_q_p_0[q]*_data_q_p_0[q]); + const real_t tmp_qloop_5 = tmp_qloop_4*2.0; + const real_t tmp_qloop_6 = tmp_qloop_5 - _data_q_p_0[q]; + const real_t tmp_qloop_7 = (_data_q_p_1[q]*_data_q_p_1[q]); + const real_t tmp_qloop_8 = tmp_qloop_7*2.0; + const real_t tmp_qloop_9 = tmp_qloop_8 - _data_q_p_1[q]; + const real_t tmp_qloop_10 = tmp_qloop_1 - tmp_qloop_3 + tmp_qloop_7*-4.0; + const real_t tmp_qloop_11 = tmp_qloop_0 - tmp_qloop_3 + tmp_qloop_4*-4.0; + const real_t tmp_qloop_12 = tmp_qloop_3 + tmp_qloop_5 + tmp_qloop_8 - 3.0*_data_q_p_0[q] - 3.0*_data_q_p_1[q] + 1.0; + const real_t tmp_qloop_13 = tmp_qloop_10*ux_dof_4 + tmp_qloop_11*ux_dof_5 + tmp_qloop_12*ux_dof_0 + tmp_qloop_3*ux_dof_3 + tmp_qloop_6*ux_dof_1 + tmp_qloop_9*ux_dof_2; + const real_t tmp_qloop_14 = tmp_qloop_10*uy_dof_4 + tmp_qloop_11*uy_dof_5 + tmp_qloop_12*uy_dof_0 + tmp_qloop_3*uy_dof_3 + tmp_qloop_6*uy_dof_1 + tmp_qloop_9*uy_dof_2; + const real_t tmp_qloop_15 = tmp_qloop_13*(jac_affine_inv_0_0_BLUE*tmp_qloop_2 + jac_affine_inv_1_0_BLUE*tmp_qloop_2) + tmp_qloop_14*(jac_affine_inv_0_1_BLUE*tmp_qloop_2 + jac_affine_inv_1_1_BLUE*tmp_qloop_2); + const real_t tmp_qloop_16 = abs_det_jac_affine_BLUE*(cp_dof_0*tmp_qloop_12 + cp_dof_1*tmp_qloop_6 + cp_dof_2*tmp_qloop_9 + cp_dof_3*tmp_qloop_3 + cp_dof_4*tmp_qloop_10 + cp_dof_5*tmp_qloop_11)*_data_q_w[q]; + const real_t tmp_qloop_17 = tmp_qloop_12*tmp_qloop_16; + const real_t tmp_qloop_18 = tmp_qloop_0 - 1.0; + const real_t tmp_qloop_19 = jac_affine_inv_0_0_BLUE*tmp_qloop_13*tmp_qloop_18 + jac_affine_inv_0_1_BLUE*tmp_qloop_14*tmp_qloop_18; + const real_t tmp_qloop_20 = tmp_qloop_1 - 1.0; + const real_t tmp_qloop_21 = jac_affine_inv_1_0_BLUE*tmp_qloop_13*tmp_qloop_20 + jac_affine_inv_1_1_BLUE*tmp_qloop_14*tmp_qloop_20; + const real_t tmp_qloop_22 = jac_affine_inv_1_0_BLUE*tmp_qloop_0; + const real_t tmp_qloop_23 = jac_affine_inv_0_0_BLUE*tmp_qloop_1; + const real_t tmp_qloop_24 = jac_affine_inv_1_1_BLUE*tmp_qloop_0; + const real_t tmp_qloop_25 = jac_affine_inv_0_1_BLUE*tmp_qloop_1; + const real_t tmp_qloop_26 = tmp_qloop_13*(tmp_qloop_22 + tmp_qloop_23) + tmp_qloop_14*(tmp_qloop_24 + tmp_qloop_25); + const real_t tmp_qloop_27 = -tmp_qloop_0 - 8.0*_data_q_p_1[q] + 4.0; + const real_t tmp_qloop_28 = tmp_qloop_13*(jac_affine_inv_1_0_BLUE*tmp_qloop_27 - tmp_qloop_23) + tmp_qloop_14*(jac_affine_inv_1_1_BLUE*tmp_qloop_27 - tmp_qloop_25); + const real_t tmp_qloop_29 = -tmp_qloop_1 - 8.0*_data_q_p_0[q] + 4.0; + const real_t tmp_qloop_30 = tmp_qloop_13*(jac_affine_inv_0_0_BLUE*tmp_qloop_29 - tmp_qloop_22) + tmp_qloop_14*(jac_affine_inv_0_1_BLUE*tmp_qloop_29 - tmp_qloop_24); + const real_t tmp_qloop_31 = tmp_qloop_16*tmp_qloop_6; + const real_t tmp_qloop_32 = tmp_qloop_16*tmp_qloop_9; + const real_t tmp_qloop_33 = tmp_qloop_16*tmp_qloop_3; + const real_t tmp_qloop_34 = tmp_qloop_10*tmp_qloop_16; + const real_t tmp_qloop_35 = tmp_qloop_11*tmp_qloop_16; + const real_t q_tmp_0_0 = tmp_qloop_15*tmp_qloop_17; + const real_t q_tmp_0_1 = tmp_qloop_17*tmp_qloop_19; + const real_t q_tmp_0_2 = tmp_qloop_17*tmp_qloop_21; + const real_t q_tmp_0_3 = tmp_qloop_17*tmp_qloop_26; + const real_t q_tmp_0_4 = tmp_qloop_17*tmp_qloop_28; + const real_t q_tmp_0_5 = tmp_qloop_17*tmp_qloop_30; + const real_t q_tmp_1_0 = tmp_qloop_15*tmp_qloop_31; + const real_t q_tmp_1_1 = tmp_qloop_19*tmp_qloop_31; + const real_t q_tmp_1_2 = tmp_qloop_21*tmp_qloop_31; + const real_t q_tmp_1_3 = tmp_qloop_26*tmp_qloop_31; + const real_t q_tmp_1_4 = tmp_qloop_28*tmp_qloop_31; + const real_t q_tmp_1_5 = tmp_qloop_30*tmp_qloop_31; + const real_t q_tmp_2_0 = tmp_qloop_15*tmp_qloop_32; + const real_t q_tmp_2_1 = tmp_qloop_19*tmp_qloop_32; + const real_t q_tmp_2_2 = tmp_qloop_21*tmp_qloop_32; + const real_t q_tmp_2_3 = tmp_qloop_26*tmp_qloop_32; + const real_t q_tmp_2_4 = tmp_qloop_28*tmp_qloop_32; + const real_t q_tmp_2_5 = tmp_qloop_30*tmp_qloop_32; + const real_t q_tmp_3_0 = tmp_qloop_15*tmp_qloop_33; + const real_t q_tmp_3_1 = tmp_qloop_19*tmp_qloop_33; + const real_t q_tmp_3_2 = tmp_qloop_21*tmp_qloop_33; + const real_t q_tmp_3_3 = tmp_qloop_26*tmp_qloop_33; + const real_t q_tmp_3_4 = tmp_qloop_28*tmp_qloop_33; + const real_t q_tmp_3_5 = tmp_qloop_30*tmp_qloop_33; + const real_t q_tmp_4_0 = tmp_qloop_15*tmp_qloop_34; + const real_t q_tmp_4_1 = tmp_qloop_19*tmp_qloop_34; + const real_t q_tmp_4_2 = tmp_qloop_21*tmp_qloop_34; + const real_t q_tmp_4_3 = tmp_qloop_26*tmp_qloop_34; + const real_t q_tmp_4_4 = tmp_qloop_28*tmp_qloop_34; + const real_t q_tmp_4_5 = tmp_qloop_30*tmp_qloop_34; + const real_t q_tmp_5_0 = tmp_qloop_15*tmp_qloop_35; + const real_t q_tmp_5_1 = tmp_qloop_19*tmp_qloop_35; + const real_t q_tmp_5_2 = tmp_qloop_21*tmp_qloop_35; + const real_t q_tmp_5_3 = tmp_qloop_26*tmp_qloop_35; + const real_t q_tmp_5_4 = tmp_qloop_28*tmp_qloop_35; + const real_t q_tmp_5_5 = tmp_qloop_30*tmp_qloop_35; + q_acc_0_0 = q_acc_0_0 + q_tmp_0_0; + q_acc_0_1 = q_acc_0_1 + q_tmp_0_1; + q_acc_0_2 = q_acc_0_2 + q_tmp_0_2; + q_acc_0_3 = q_acc_0_3 + q_tmp_0_3; + q_acc_0_4 = q_acc_0_4 + q_tmp_0_4; + q_acc_0_5 = q_acc_0_5 + q_tmp_0_5; + q_acc_1_0 = q_acc_1_0 + q_tmp_1_0; + q_acc_1_1 = q_acc_1_1 + q_tmp_1_1; + q_acc_1_2 = q_acc_1_2 + q_tmp_1_2; + q_acc_1_3 = q_acc_1_3 + q_tmp_1_3; + q_acc_1_4 = q_acc_1_4 + q_tmp_1_4; + q_acc_1_5 = q_acc_1_5 + q_tmp_1_5; + q_acc_2_0 = q_acc_2_0 + q_tmp_2_0; + q_acc_2_1 = q_acc_2_1 + q_tmp_2_1; + q_acc_2_2 = q_acc_2_2 + q_tmp_2_2; + q_acc_2_3 = q_acc_2_3 + q_tmp_2_3; + q_acc_2_4 = q_acc_2_4 + q_tmp_2_4; + q_acc_2_5 = q_acc_2_5 + q_tmp_2_5; + q_acc_3_0 = q_acc_3_0 + q_tmp_3_0; + q_acc_3_1 = q_acc_3_1 + q_tmp_3_1; + q_acc_3_2 = q_acc_3_2 + q_tmp_3_2; + q_acc_3_3 = q_acc_3_3 + q_tmp_3_3; + q_acc_3_4 = q_acc_3_4 + q_tmp_3_4; + q_acc_3_5 = q_acc_3_5 + q_tmp_3_5; + q_acc_4_0 = q_acc_4_0 + q_tmp_4_0; + q_acc_4_1 = q_acc_4_1 + q_tmp_4_1; + q_acc_4_2 = q_acc_4_2 + q_tmp_4_2; + q_acc_4_3 = q_acc_4_3 + q_tmp_4_3; + q_acc_4_4 = q_acc_4_4 + q_tmp_4_4; + q_acc_4_5 = q_acc_4_5 + q_tmp_4_5; + q_acc_5_0 = q_acc_5_0 + q_tmp_5_0; + q_acc_5_1 = q_acc_5_1 + q_tmp_5_1; + q_acc_5_2 = q_acc_5_2 + q_tmp_5_2; + q_acc_5_3 = q_acc_5_3 + q_tmp_5_3; + q_acc_5_4 = q_acc_5_4 + q_tmp_5_4; + q_acc_5_5 = q_acc_5_5 + q_tmp_5_5; + } + const real_t elMatVec_0 = q_acc_0_0*src_dof_0 + q_acc_0_1*src_dof_1 + q_acc_0_2*src_dof_2 + q_acc_0_3*src_dof_3 + q_acc_0_4*src_dof_4 + q_acc_0_5*src_dof_5; + const real_t elMatVec_1 = q_acc_1_0*src_dof_0 + q_acc_1_1*src_dof_1 + q_acc_1_2*src_dof_2 + q_acc_1_3*src_dof_3 + q_acc_1_4*src_dof_4 + q_acc_1_5*src_dof_5; + const real_t elMatVec_2 = q_acc_2_0*src_dof_0 + q_acc_2_1*src_dof_1 + q_acc_2_2*src_dof_2 + q_acc_2_3*src_dof_3 + q_acc_2_4*src_dof_4 + q_acc_2_5*src_dof_5; + const real_t elMatVec_3 = q_acc_3_0*src_dof_0 + q_acc_3_1*src_dof_1 + q_acc_3_2*src_dof_2 + q_acc_3_3*src_dof_3 + q_acc_3_4*src_dof_4 + q_acc_3_5*src_dof_5; + const real_t elMatVec_4 = q_acc_4_0*src_dof_0 + q_acc_4_1*src_dof_1 + q_acc_4_2*src_dof_2 + q_acc_4_3*src_dof_3 + q_acc_4_4*src_dof_4 + q_acc_4_5*src_dof_5; + const real_t elMatVec_5 = q_acc_5_0*src_dof_0 + q_acc_5_1*src_dof_1 + q_acc_5_2*src_dof_2 + q_acc_5_3*src_dof_3 + q_acc_5_4*src_dof_4 + q_acc_5_5*src_dof_5; + _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1] = elMatVec_0 + _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatVec_1 + _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1] = elMatVec_2 + _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]; + _data_dstEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatVec_3 + _data_dstEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1] = elMatVec_4 + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]; + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))] = elMatVec_5 + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + } + } + } +} +} // namespace operatorgeneration + +} // namespace hyteg diff --git a/operators/advection/noarch/P2ElementwiseAdvection_computeInverseDiagonalOperatorValues_P2ElementwiseAdvection_macro_2D.cpp b/operators/advection/noarch/P2ElementwiseAdvection_computeInverseDiagonalOperatorValues_P2ElementwiseAdvection_macro_2D.cpp new file mode 100644 index 0000000000000000000000000000000000000000..737154b859f434fc6b415c420a64cd1014ea0ef0 --- /dev/null +++ b/operators/advection/noarch/P2ElementwiseAdvection_computeInverseDiagonalOperatorValues_P2ElementwiseAdvection_macro_2D.cpp @@ -0,0 +1,318 @@ +/* +* Copyright (c) 2017-2024 Nils Kohl, Daniel Bauer, Fabian Böhm. +* +* This file is part of HyTeG +* (see https://i10git.cs.fau.de/hyteg/hyteg). +* +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. +* +* You should have received a copy of the GNU General Public License +* along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +/* +* The entire file was generated with the HyTeG Operator Generator. +* +* Avoid modifying this file. If buggy, consider fixing the generator itself. +*/ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +#include "../P2ElementwiseAdvection.hpp" + +#define FUNC_PREFIX + +namespace hyteg { + +namespace operatorgeneration { + +void P2ElementwiseAdvection::computeInverseDiagonalOperatorValues_P2ElementwiseAdvection_macro_2D( real_t * RESTRICT _data_cpEdge, real_t * RESTRICT _data_cpVertex, real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_uxEdge, real_t * RESTRICT _data_uxVertex, real_t * RESTRICT _data_uyEdge, real_t * RESTRICT _data_uyVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +{ + { + const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669}; + + const real_t _data_q_p_0 [] = {0.33333333333333331, 0.20000000000000001, 0.59999999999999998, 0.20000000000000001}; + + const real_t _data_q_p_1 [] = {0.33333333333333331, 0.59999999999999998, 0.20000000000000001, 0.20000000000000001}; + + const real_t tmp_coords_jac_0_GRAY = 1.0 / (micro_edges_per_macro_edge_float)*1.0; + const real_t p_affine_const_0_0_GRAY = macro_vertex_coord_id_0comp0; + const real_t p_affine_const_0_1_GRAY = macro_vertex_coord_id_0comp1; + const real_t p_affine_const_1_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0); + const real_t p_affine_const_1_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1); + const real_t p_affine_const_2_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0); + const real_t p_affine_const_2_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1); + const real_t jac_affine_0_0_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_1_0_GRAY; + const real_t jac_affine_0_1_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_2_0_GRAY; + const real_t jac_affine_1_0_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_1_1_GRAY; + const real_t jac_affine_1_1_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_2_1_GRAY; + const real_t tmp_coords_jac_1_GRAY = jac_affine_0_0_GRAY*jac_affine_1_1_GRAY - jac_affine_0_1_GRAY*jac_affine_1_0_GRAY; + const real_t tmp_coords_jac_2_GRAY = 1.0 / (tmp_coords_jac_1_GRAY); + const real_t jac_affine_inv_0_0_GRAY = jac_affine_1_1_GRAY*tmp_coords_jac_2_GRAY; + const real_t jac_affine_inv_0_1_GRAY = -jac_affine_0_1_GRAY*tmp_coords_jac_2_GRAY; + const real_t jac_affine_inv_1_0_GRAY = -jac_affine_1_0_GRAY*tmp_coords_jac_2_GRAY; + const real_t jac_affine_inv_1_1_GRAY = jac_affine_0_0_GRAY*tmp_coords_jac_2_GRAY; + const real_t abs_det_jac_affine_GRAY = abs(tmp_coords_jac_1_GRAY); + { + /* FaceType.GRAY */ + for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1) + for (int64_t ctr_0 = 0; ctr_0 < -ctr_1 + micro_edges_per_macro_edge; ctr_0 += 1) + { + + const int64_t phantom_ctr_0 = ctr_0; + real_t _data_float_loop_ctr_array_dim_0[4]; + _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0; + _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1; + _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2; + _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3; + real_t _data_float_loop_ctr_array_dim_1[4]; + _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1; + + const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; + const real_t cp_dof_0 = _data_cpVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]; + const real_t cp_dof_1 = _data_cpVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + const real_t cp_dof_2 = _data_cpVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t cp_dof_3 = _data_cpEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t cp_dof_4 = _data_cpEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t cp_dof_5 = _data_cpEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]; + const real_t ux_dof_0 = _data_uxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]; + const real_t ux_dof_1 = _data_uxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + const real_t ux_dof_2 = _data_uxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t ux_dof_3 = _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t ux_dof_4 = _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t ux_dof_5 = _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]; + const real_t uy_dof_0 = _data_uyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]; + const real_t uy_dof_1 = _data_uyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + const real_t uy_dof_2 = _data_uyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t uy_dof_3 = _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t uy_dof_4 = _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t uy_dof_5 = _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]; + real_t q_acc_0_0 = 0.0; + real_t q_acc_1_1 = 0.0; + real_t q_acc_2_2 = 0.0; + real_t q_acc_3_3 = 0.0; + real_t q_acc_4_4 = 0.0; + real_t q_acc_5_5 = 0.0; + for (int64_t q = 0; q < 4; q += 1) + { + const real_t tmp_qloop_0 = 4.0*_data_q_p_0[q]; + const real_t tmp_qloop_1 = tmp_qloop_0*_data_q_p_1[q]; + const real_t tmp_qloop_2 = (_data_q_p_0[q]*_data_q_p_0[q]); + const real_t tmp_qloop_3 = tmp_qloop_2*2.0; + const real_t tmp_qloop_4 = (_data_q_p_1[q]*_data_q_p_1[q]); + const real_t tmp_qloop_5 = tmp_qloop_4*2.0; + const real_t tmp_qloop_6 = tmp_qloop_1 + tmp_qloop_3 + tmp_qloop_5 - 3.0*_data_q_p_0[q] - 3.0*_data_q_p_1[q] + 1.0; + const real_t tmp_qloop_7 = 4.0*_data_q_p_1[q]; + const real_t tmp_qloop_8 = tmp_qloop_0 + tmp_qloop_7 - 3.0; + const real_t tmp_qloop_9 = tmp_qloop_3 - _data_q_p_0[q]; + const real_t tmp_qloop_10 = tmp_qloop_5 - _data_q_p_1[q]; + const real_t tmp_qloop_11 = -tmp_qloop_1 + tmp_qloop_4*-4.0 + tmp_qloop_7; + const real_t tmp_qloop_12 = tmp_qloop_0 - tmp_qloop_1 + tmp_qloop_2*-4.0; + const real_t tmp_qloop_13 = tmp_qloop_1*ux_dof_3 + tmp_qloop_10*ux_dof_2 + tmp_qloop_11*ux_dof_4 + tmp_qloop_12*ux_dof_5 + tmp_qloop_6*ux_dof_0 + tmp_qloop_9*ux_dof_1; + const real_t tmp_qloop_14 = tmp_qloop_1*uy_dof_3 + tmp_qloop_10*uy_dof_2 + tmp_qloop_11*uy_dof_4 + tmp_qloop_12*uy_dof_5 + tmp_qloop_6*uy_dof_0 + tmp_qloop_9*uy_dof_1; + const real_t tmp_qloop_15 = abs_det_jac_affine_GRAY*(cp_dof_0*tmp_qloop_6 + cp_dof_1*tmp_qloop_9 + cp_dof_2*tmp_qloop_10 + cp_dof_3*tmp_qloop_1 + cp_dof_4*tmp_qloop_11 + cp_dof_5*tmp_qloop_12)*_data_q_w[q]; + const real_t tmp_qloop_16 = tmp_qloop_0 - 1.0; + const real_t tmp_qloop_17 = tmp_qloop_7 - 1.0; + const real_t tmp_qloop_18 = jac_affine_inv_1_0_GRAY*tmp_qloop_0; + const real_t tmp_qloop_19 = jac_affine_inv_0_0_GRAY*tmp_qloop_7; + const real_t tmp_qloop_20 = jac_affine_inv_1_1_GRAY*tmp_qloop_0; + const real_t tmp_qloop_21 = jac_affine_inv_0_1_GRAY*tmp_qloop_7; + const real_t tmp_qloop_22 = -tmp_qloop_0 - 8.0*_data_q_p_1[q] + 4.0; + const real_t tmp_qloop_23 = -tmp_qloop_7 - 8.0*_data_q_p_0[q] + 4.0; + const real_t q_tmp_0_0 = tmp_qloop_15*tmp_qloop_6*(tmp_qloop_13*(jac_affine_inv_0_0_GRAY*tmp_qloop_8 + jac_affine_inv_1_0_GRAY*tmp_qloop_8) + tmp_qloop_14*(jac_affine_inv_0_1_GRAY*tmp_qloop_8 + jac_affine_inv_1_1_GRAY*tmp_qloop_8)); + const real_t q_tmp_1_1 = tmp_qloop_15*tmp_qloop_9*(jac_affine_inv_0_0_GRAY*tmp_qloop_13*tmp_qloop_16 + jac_affine_inv_0_1_GRAY*tmp_qloop_14*tmp_qloop_16); + const real_t q_tmp_2_2 = tmp_qloop_10*tmp_qloop_15*(jac_affine_inv_1_0_GRAY*tmp_qloop_13*tmp_qloop_17 + jac_affine_inv_1_1_GRAY*tmp_qloop_14*tmp_qloop_17); + const real_t q_tmp_3_3 = tmp_qloop_1*tmp_qloop_15*(tmp_qloop_13*(tmp_qloop_18 + tmp_qloop_19) + tmp_qloop_14*(tmp_qloop_20 + tmp_qloop_21)); + const real_t q_tmp_4_4 = tmp_qloop_11*tmp_qloop_15*(tmp_qloop_13*(jac_affine_inv_1_0_GRAY*tmp_qloop_22 - tmp_qloop_19) + tmp_qloop_14*(jac_affine_inv_1_1_GRAY*tmp_qloop_22 - tmp_qloop_21)); + const real_t q_tmp_5_5 = tmp_qloop_12*tmp_qloop_15*(tmp_qloop_13*(jac_affine_inv_0_0_GRAY*tmp_qloop_23 - tmp_qloop_18) + tmp_qloop_14*(jac_affine_inv_0_1_GRAY*tmp_qloop_23 - tmp_qloop_20)); + q_acc_0_0 = q_acc_0_0 + q_tmp_0_0; + q_acc_1_1 = q_acc_1_1 + q_tmp_1_1; + q_acc_2_2 = q_acc_2_2 + q_tmp_2_2; + q_acc_3_3 = q_acc_3_3 + q_tmp_3_3; + q_acc_4_4 = q_acc_4_4 + q_tmp_4_4; + q_acc_5_5 = q_acc_5_5 + q_tmp_5_5; + } + const real_t elMatDiag_0 = q_acc_0_0; + const real_t elMatDiag_1 = q_acc_1_1; + const real_t elMatDiag_2 = q_acc_2_2; + const real_t elMatDiag_3 = q_acc_3_3; + const real_t elMatDiag_4 = q_acc_4_4; + const real_t elMatDiag_5 = q_acc_5_5; + _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))] = elMatDiag_0 + _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]; + _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1] = elMatDiag_1 + _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatDiag_2 + _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))] = elMatDiag_3 + _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))] = elMatDiag_4 + _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))] = elMatDiag_5 + _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]; + } + } + const real_t tmp_coords_jac_0_BLUE = 1.0 / (micro_edges_per_macro_edge_float)*1.0; + const real_t tmp_coords_jac_1_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0); + const real_t tmp_coords_jac_2_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1); + const real_t tmp_coords_jac_3_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0); + const real_t tmp_coords_jac_4_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1); + const real_t p_affine_const_0_0_BLUE = tmp_coords_jac_1_BLUE; + const real_t p_affine_const_0_1_BLUE = tmp_coords_jac_2_BLUE; + const real_t p_affine_const_1_0_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_3_BLUE; + const real_t p_affine_const_1_1_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_4_BLUE; + const real_t p_affine_const_2_0_BLUE = tmp_coords_jac_1_BLUE + tmp_coords_jac_3_BLUE; + const real_t p_affine_const_2_1_BLUE = tmp_coords_jac_2_BLUE + tmp_coords_jac_4_BLUE; + const real_t jac_affine_0_0_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_1_0_BLUE; + const real_t jac_affine_0_1_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_2_0_BLUE; + const real_t jac_affine_1_0_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_1_1_BLUE; + const real_t jac_affine_1_1_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_2_1_BLUE; + const real_t tmp_coords_jac_5_BLUE = jac_affine_0_0_BLUE*jac_affine_1_1_BLUE - jac_affine_0_1_BLUE*jac_affine_1_0_BLUE; + const real_t tmp_coords_jac_6_BLUE = 1.0 / (tmp_coords_jac_5_BLUE); + const real_t jac_affine_inv_0_0_BLUE = jac_affine_1_1_BLUE*tmp_coords_jac_6_BLUE; + const real_t jac_affine_inv_0_1_BLUE = -jac_affine_0_1_BLUE*tmp_coords_jac_6_BLUE; + const real_t jac_affine_inv_1_0_BLUE = -jac_affine_1_0_BLUE*tmp_coords_jac_6_BLUE; + const real_t jac_affine_inv_1_1_BLUE = jac_affine_0_0_BLUE*tmp_coords_jac_6_BLUE; + const real_t abs_det_jac_affine_BLUE = abs(tmp_coords_jac_5_BLUE); + { + /* FaceType.BLUE */ + for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1) + for (int64_t ctr_0 = 0; ctr_0 < -ctr_1 + micro_edges_per_macro_edge - 1; ctr_0 += 1) + { + + const int64_t phantom_ctr_0 = ctr_0; + real_t _data_float_loop_ctr_array_dim_0[4]; + _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0; + _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1; + _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2; + _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3; + real_t _data_float_loop_ctr_array_dim_1[4]; + _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1; + + const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; + const real_t cp_dof_0 = _data_cpVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + const real_t cp_dof_1 = _data_cpVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t cp_dof_2 = _data_cpVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]; + const real_t cp_dof_3 = _data_cpEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t cp_dof_4 = _data_cpEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]; + const real_t cp_dof_5 = _data_cpEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t ux_dof_0 = _data_uxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + const real_t ux_dof_1 = _data_uxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t ux_dof_2 = _data_uxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]; + const real_t ux_dof_3 = _data_uxEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t ux_dof_4 = _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]; + const real_t ux_dof_5 = _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t uy_dof_0 = _data_uyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + const real_t uy_dof_1 = _data_uyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t uy_dof_2 = _data_uyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]; + const real_t uy_dof_3 = _data_uyEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t uy_dof_4 = _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]; + const real_t uy_dof_5 = _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + real_t q_acc_0_0 = 0.0; + real_t q_acc_1_1 = 0.0; + real_t q_acc_2_2 = 0.0; + real_t q_acc_3_3 = 0.0; + real_t q_acc_4_4 = 0.0; + real_t q_acc_5_5 = 0.0; + for (int64_t q = 0; q < 4; q += 1) + { + const real_t tmp_qloop_0 = 4.0*_data_q_p_0[q]; + const real_t tmp_qloop_1 = tmp_qloop_0*_data_q_p_1[q]; + const real_t tmp_qloop_2 = (_data_q_p_0[q]*_data_q_p_0[q]); + const real_t tmp_qloop_3 = tmp_qloop_2*2.0; + const real_t tmp_qloop_4 = (_data_q_p_1[q]*_data_q_p_1[q]); + const real_t tmp_qloop_5 = tmp_qloop_4*2.0; + const real_t tmp_qloop_6 = tmp_qloop_1 + tmp_qloop_3 + tmp_qloop_5 - 3.0*_data_q_p_0[q] - 3.0*_data_q_p_1[q] + 1.0; + const real_t tmp_qloop_7 = 4.0*_data_q_p_1[q]; + const real_t tmp_qloop_8 = tmp_qloop_0 + tmp_qloop_7 - 3.0; + const real_t tmp_qloop_9 = tmp_qloop_3 - _data_q_p_0[q]; + const real_t tmp_qloop_10 = tmp_qloop_5 - _data_q_p_1[q]; + const real_t tmp_qloop_11 = -tmp_qloop_1 + tmp_qloop_4*-4.0 + tmp_qloop_7; + const real_t tmp_qloop_12 = tmp_qloop_0 - tmp_qloop_1 + tmp_qloop_2*-4.0; + const real_t tmp_qloop_13 = tmp_qloop_1*ux_dof_3 + tmp_qloop_10*ux_dof_2 + tmp_qloop_11*ux_dof_4 + tmp_qloop_12*ux_dof_5 + tmp_qloop_6*ux_dof_0 + tmp_qloop_9*ux_dof_1; + const real_t tmp_qloop_14 = tmp_qloop_1*uy_dof_3 + tmp_qloop_10*uy_dof_2 + tmp_qloop_11*uy_dof_4 + tmp_qloop_12*uy_dof_5 + tmp_qloop_6*uy_dof_0 + tmp_qloop_9*uy_dof_1; + const real_t tmp_qloop_15 = abs_det_jac_affine_BLUE*(cp_dof_0*tmp_qloop_6 + cp_dof_1*tmp_qloop_9 + cp_dof_2*tmp_qloop_10 + cp_dof_3*tmp_qloop_1 + cp_dof_4*tmp_qloop_11 + cp_dof_5*tmp_qloop_12)*_data_q_w[q]; + const real_t tmp_qloop_16 = tmp_qloop_0 - 1.0; + const real_t tmp_qloop_17 = tmp_qloop_7 - 1.0; + const real_t tmp_qloop_18 = jac_affine_inv_1_0_BLUE*tmp_qloop_0; + const real_t tmp_qloop_19 = jac_affine_inv_0_0_BLUE*tmp_qloop_7; + const real_t tmp_qloop_20 = jac_affine_inv_1_1_BLUE*tmp_qloop_0; + const real_t tmp_qloop_21 = jac_affine_inv_0_1_BLUE*tmp_qloop_7; + const real_t tmp_qloop_22 = -tmp_qloop_0 - 8.0*_data_q_p_1[q] + 4.0; + const real_t tmp_qloop_23 = -tmp_qloop_7 - 8.0*_data_q_p_0[q] + 4.0; + const real_t q_tmp_0_0 = tmp_qloop_15*tmp_qloop_6*(tmp_qloop_13*(jac_affine_inv_0_0_BLUE*tmp_qloop_8 + jac_affine_inv_1_0_BLUE*tmp_qloop_8) + tmp_qloop_14*(jac_affine_inv_0_1_BLUE*tmp_qloop_8 + jac_affine_inv_1_1_BLUE*tmp_qloop_8)); + const real_t q_tmp_1_1 = tmp_qloop_15*tmp_qloop_9*(jac_affine_inv_0_0_BLUE*tmp_qloop_13*tmp_qloop_16 + jac_affine_inv_0_1_BLUE*tmp_qloop_14*tmp_qloop_16); + const real_t q_tmp_2_2 = tmp_qloop_10*tmp_qloop_15*(jac_affine_inv_1_0_BLUE*tmp_qloop_13*tmp_qloop_17 + jac_affine_inv_1_1_BLUE*tmp_qloop_14*tmp_qloop_17); + const real_t q_tmp_3_3 = tmp_qloop_1*tmp_qloop_15*(tmp_qloop_13*(tmp_qloop_18 + tmp_qloop_19) + tmp_qloop_14*(tmp_qloop_20 + tmp_qloop_21)); + const real_t q_tmp_4_4 = tmp_qloop_11*tmp_qloop_15*(tmp_qloop_13*(jac_affine_inv_1_0_BLUE*tmp_qloop_22 - tmp_qloop_19) + tmp_qloop_14*(jac_affine_inv_1_1_BLUE*tmp_qloop_22 - tmp_qloop_21)); + const real_t q_tmp_5_5 = tmp_qloop_12*tmp_qloop_15*(tmp_qloop_13*(jac_affine_inv_0_0_BLUE*tmp_qloop_23 - tmp_qloop_18) + tmp_qloop_14*(jac_affine_inv_0_1_BLUE*tmp_qloop_23 - tmp_qloop_20)); + q_acc_0_0 = q_acc_0_0 + q_tmp_0_0; + q_acc_1_1 = q_acc_1_1 + q_tmp_1_1; + q_acc_2_2 = q_acc_2_2 + q_tmp_2_2; + q_acc_3_3 = q_acc_3_3 + q_tmp_3_3; + q_acc_4_4 = q_acc_4_4 + q_tmp_4_4; + q_acc_5_5 = q_acc_5_5 + q_tmp_5_5; + } + const real_t elMatDiag_0 = q_acc_0_0; + const real_t elMatDiag_1 = q_acc_1_1; + const real_t elMatDiag_2 = q_acc_2_2; + const real_t elMatDiag_3 = q_acc_3_3; + const real_t elMatDiag_4 = q_acc_4_4; + const real_t elMatDiag_5 = q_acc_5_5; + _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1] = elMatDiag_0 + _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatDiag_1 + _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1] = elMatDiag_2 + _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]; + _data_invDiag_Edge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatDiag_3 + _data_invDiag_Edge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1] = elMatDiag_4 + _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]; + _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))] = elMatDiag_5 + _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + } + } + } +} +} // namespace operatorgeneration + +} // namespace hyteg diff --git a/operators/advection/noarch/P2ElementwiseAdvection_toMatrix_P2ElementwiseAdvection_macro_2D.cpp b/operators/advection/noarch/P2ElementwiseAdvection_toMatrix_P2ElementwiseAdvection_macro_2D.cpp new file mode 100644 index 0000000000000000000000000000000000000000..108b0d54ad22c8eea9c977062ed2f70efe9153c7 --- /dev/null +++ b/operators/advection/noarch/P2ElementwiseAdvection_toMatrix_P2ElementwiseAdvection_macro_2D.cpp @@ -0,0 +1,692 @@ +/* +* Copyright (c) 2017-2024 Nils Kohl, Daniel Bauer, Fabian Böhm. +* +* This file is part of HyTeG +* (see https://i10git.cs.fau.de/hyteg/hyteg). +* +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. +* +* You should have received a copy of the GNU General Public License +* along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +/* +* The entire file was generated with the HyTeG Operator Generator. +* +* Avoid modifying this file. If buggy, consider fixing the generator itself. +*/ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +#include "../P2ElementwiseAdvection.hpp" + +#define FUNC_PREFIX + +namespace hyteg { + +namespace operatorgeneration { + +void P2ElementwiseAdvection::toMatrix_P2ElementwiseAdvection_macro_2D( real_t * RESTRICT _data_cpEdge, real_t * RESTRICT _data_cpVertex, idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t * RESTRICT _data_uxEdge, real_t * RESTRICT _data_uxVertex, real_t * RESTRICT _data_uyEdge, real_t * RESTRICT _data_uyVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +{ + { + const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669}; + + const real_t _data_q_p_0 [] = {0.33333333333333331, 0.20000000000000001, 0.59999999999999998, 0.20000000000000001}; + + const real_t _data_q_p_1 [] = {0.33333333333333331, 0.59999999999999998, 0.20000000000000001, 0.20000000000000001}; + + const real_t tmp_coords_jac_0_GRAY = 1.0 / (micro_edges_per_macro_edge_float)*1.0; + const real_t p_affine_const_0_0_GRAY = macro_vertex_coord_id_0comp0; + const real_t p_affine_const_0_1_GRAY = macro_vertex_coord_id_0comp1; + const real_t p_affine_const_1_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0); + const real_t p_affine_const_1_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1); + const real_t p_affine_const_2_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0); + const real_t p_affine_const_2_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1); + const real_t jac_affine_0_0_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_1_0_GRAY; + const real_t jac_affine_0_1_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_2_0_GRAY; + const real_t jac_affine_1_0_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_1_1_GRAY; + const real_t jac_affine_1_1_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_2_1_GRAY; + const real_t tmp_coords_jac_1_GRAY = jac_affine_0_0_GRAY*jac_affine_1_1_GRAY - jac_affine_0_1_GRAY*jac_affine_1_0_GRAY; + const real_t tmp_coords_jac_2_GRAY = 1.0 / (tmp_coords_jac_1_GRAY); + const real_t jac_affine_inv_0_0_GRAY = jac_affine_1_1_GRAY*tmp_coords_jac_2_GRAY; + const real_t jac_affine_inv_0_1_GRAY = -jac_affine_0_1_GRAY*tmp_coords_jac_2_GRAY; + const real_t jac_affine_inv_1_0_GRAY = -jac_affine_1_0_GRAY*tmp_coords_jac_2_GRAY; + const real_t jac_affine_inv_1_1_GRAY = jac_affine_0_0_GRAY*tmp_coords_jac_2_GRAY; + const real_t abs_det_jac_affine_GRAY = abs(tmp_coords_jac_1_GRAY); + { + /* FaceType.GRAY */ + for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1) + for (int64_t ctr_0 = 0; ctr_0 < -ctr_1 + micro_edges_per_macro_edge; ctr_0 += 1) + { + + const int64_t phantom_ctr_0 = ctr_0; + real_t _data_float_loop_ctr_array_dim_0[4]; + _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0; + _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1; + _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2; + _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3; + real_t _data_float_loop_ctr_array_dim_1[4]; + _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1; + + const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; + const real_t cp_dof_0 = _data_cpVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]; + const real_t cp_dof_1 = _data_cpVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + const real_t cp_dof_2 = _data_cpVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t cp_dof_3 = _data_cpEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t cp_dof_4 = _data_cpEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t cp_dof_5 = _data_cpEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]; + const real_t ux_dof_0 = _data_uxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]; + const real_t ux_dof_1 = _data_uxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + const real_t ux_dof_2 = _data_uxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t ux_dof_3 = _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t ux_dof_4 = _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t ux_dof_5 = _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]; + const real_t uy_dof_0 = _data_uyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]; + const real_t uy_dof_1 = _data_uyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + const real_t uy_dof_2 = _data_uyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t uy_dof_3 = _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t uy_dof_4 = _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t uy_dof_5 = _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]; + real_t q_acc_0_0 = 0.0; + real_t q_acc_0_1 = 0.0; + real_t q_acc_0_2 = 0.0; + real_t q_acc_0_3 = 0.0; + real_t q_acc_0_4 = 0.0; + real_t q_acc_0_5 = 0.0; + real_t q_acc_1_0 = 0.0; + real_t q_acc_1_1 = 0.0; + real_t q_acc_1_2 = 0.0; + real_t q_acc_1_3 = 0.0; + real_t q_acc_1_4 = 0.0; + real_t q_acc_1_5 = 0.0; + real_t q_acc_2_0 = 0.0; + real_t q_acc_2_1 = 0.0; + real_t q_acc_2_2 = 0.0; + real_t q_acc_2_3 = 0.0; + real_t q_acc_2_4 = 0.0; + real_t q_acc_2_5 = 0.0; + real_t q_acc_3_0 = 0.0; + real_t q_acc_3_1 = 0.0; + real_t q_acc_3_2 = 0.0; + real_t q_acc_3_3 = 0.0; + real_t q_acc_3_4 = 0.0; + real_t q_acc_3_5 = 0.0; + real_t q_acc_4_0 = 0.0; + real_t q_acc_4_1 = 0.0; + real_t q_acc_4_2 = 0.0; + real_t q_acc_4_3 = 0.0; + real_t q_acc_4_4 = 0.0; + real_t q_acc_4_5 = 0.0; + real_t q_acc_5_0 = 0.0; + real_t q_acc_5_1 = 0.0; + real_t q_acc_5_2 = 0.0; + real_t q_acc_5_3 = 0.0; + real_t q_acc_5_4 = 0.0; + real_t q_acc_5_5 = 0.0; + for (int64_t q = 0; q < 4; q += 1) + { + const real_t tmp_qloop_0 = 4.0*_data_q_p_0[q]; + const real_t tmp_qloop_1 = 4.0*_data_q_p_1[q]; + const real_t tmp_qloop_2 = tmp_qloop_0 + tmp_qloop_1 - 3.0; + const real_t tmp_qloop_3 = tmp_qloop_0*_data_q_p_1[q]; + const real_t tmp_qloop_4 = (_data_q_p_0[q]*_data_q_p_0[q]); + const real_t tmp_qloop_5 = tmp_qloop_4*2.0; + const real_t tmp_qloop_6 = tmp_qloop_5 - _data_q_p_0[q]; + const real_t tmp_qloop_7 = (_data_q_p_1[q]*_data_q_p_1[q]); + const real_t tmp_qloop_8 = tmp_qloop_7*2.0; + const real_t tmp_qloop_9 = tmp_qloop_8 - _data_q_p_1[q]; + const real_t tmp_qloop_10 = tmp_qloop_1 - tmp_qloop_3 + tmp_qloop_7*-4.0; + const real_t tmp_qloop_11 = tmp_qloop_0 - tmp_qloop_3 + tmp_qloop_4*-4.0; + const real_t tmp_qloop_12 = tmp_qloop_3 + tmp_qloop_5 + tmp_qloop_8 - 3.0*_data_q_p_0[q] - 3.0*_data_q_p_1[q] + 1.0; + const real_t tmp_qloop_13 = tmp_qloop_10*ux_dof_4 + tmp_qloop_11*ux_dof_5 + tmp_qloop_12*ux_dof_0 + tmp_qloop_3*ux_dof_3 + tmp_qloop_6*ux_dof_1 + tmp_qloop_9*ux_dof_2; + const real_t tmp_qloop_14 = tmp_qloop_10*uy_dof_4 + tmp_qloop_11*uy_dof_5 + tmp_qloop_12*uy_dof_0 + tmp_qloop_3*uy_dof_3 + tmp_qloop_6*uy_dof_1 + tmp_qloop_9*uy_dof_2; + const real_t tmp_qloop_15 = tmp_qloop_13*(jac_affine_inv_0_0_GRAY*tmp_qloop_2 + jac_affine_inv_1_0_GRAY*tmp_qloop_2) + tmp_qloop_14*(jac_affine_inv_0_1_GRAY*tmp_qloop_2 + jac_affine_inv_1_1_GRAY*tmp_qloop_2); + const real_t tmp_qloop_16 = abs_det_jac_affine_GRAY*(cp_dof_0*tmp_qloop_12 + cp_dof_1*tmp_qloop_6 + cp_dof_2*tmp_qloop_9 + cp_dof_3*tmp_qloop_3 + cp_dof_4*tmp_qloop_10 + cp_dof_5*tmp_qloop_11)*_data_q_w[q]; + const real_t tmp_qloop_17 = tmp_qloop_12*tmp_qloop_16; + const real_t tmp_qloop_18 = tmp_qloop_0 - 1.0; + const real_t tmp_qloop_19 = jac_affine_inv_0_0_GRAY*tmp_qloop_13*tmp_qloop_18 + jac_affine_inv_0_1_GRAY*tmp_qloop_14*tmp_qloop_18; + const real_t tmp_qloop_20 = tmp_qloop_1 - 1.0; + const real_t tmp_qloop_21 = jac_affine_inv_1_0_GRAY*tmp_qloop_13*tmp_qloop_20 + jac_affine_inv_1_1_GRAY*tmp_qloop_14*tmp_qloop_20; + const real_t tmp_qloop_22 = jac_affine_inv_1_0_GRAY*tmp_qloop_0; + const real_t tmp_qloop_23 = jac_affine_inv_0_0_GRAY*tmp_qloop_1; + const real_t tmp_qloop_24 = jac_affine_inv_1_1_GRAY*tmp_qloop_0; + const real_t tmp_qloop_25 = jac_affine_inv_0_1_GRAY*tmp_qloop_1; + const real_t tmp_qloop_26 = tmp_qloop_13*(tmp_qloop_22 + tmp_qloop_23) + tmp_qloop_14*(tmp_qloop_24 + tmp_qloop_25); + const real_t tmp_qloop_27 = -tmp_qloop_0 - 8.0*_data_q_p_1[q] + 4.0; + const real_t tmp_qloop_28 = tmp_qloop_13*(jac_affine_inv_1_0_GRAY*tmp_qloop_27 - tmp_qloop_23) + tmp_qloop_14*(jac_affine_inv_1_1_GRAY*tmp_qloop_27 - tmp_qloop_25); + const real_t tmp_qloop_29 = -tmp_qloop_1 - 8.0*_data_q_p_0[q] + 4.0; + const real_t tmp_qloop_30 = tmp_qloop_13*(jac_affine_inv_0_0_GRAY*tmp_qloop_29 - tmp_qloop_22) + tmp_qloop_14*(jac_affine_inv_0_1_GRAY*tmp_qloop_29 - tmp_qloop_24); + const real_t tmp_qloop_31 = tmp_qloop_16*tmp_qloop_6; + const real_t tmp_qloop_32 = tmp_qloop_16*tmp_qloop_9; + const real_t tmp_qloop_33 = tmp_qloop_16*tmp_qloop_3; + const real_t tmp_qloop_34 = tmp_qloop_10*tmp_qloop_16; + const real_t tmp_qloop_35 = tmp_qloop_11*tmp_qloop_16; + const real_t q_tmp_0_0 = tmp_qloop_15*tmp_qloop_17; + const real_t q_tmp_0_1 = tmp_qloop_17*tmp_qloop_19; + const real_t q_tmp_0_2 = tmp_qloop_17*tmp_qloop_21; + const real_t q_tmp_0_3 = tmp_qloop_17*tmp_qloop_26; + const real_t q_tmp_0_4 = tmp_qloop_17*tmp_qloop_28; + const real_t q_tmp_0_5 = tmp_qloop_17*tmp_qloop_30; + const real_t q_tmp_1_0 = tmp_qloop_15*tmp_qloop_31; + const real_t q_tmp_1_1 = tmp_qloop_19*tmp_qloop_31; + const real_t q_tmp_1_2 = tmp_qloop_21*tmp_qloop_31; + const real_t q_tmp_1_3 = tmp_qloop_26*tmp_qloop_31; + const real_t q_tmp_1_4 = tmp_qloop_28*tmp_qloop_31; + const real_t q_tmp_1_5 = tmp_qloop_30*tmp_qloop_31; + const real_t q_tmp_2_0 = tmp_qloop_15*tmp_qloop_32; + const real_t q_tmp_2_1 = tmp_qloop_19*tmp_qloop_32; + const real_t q_tmp_2_2 = tmp_qloop_21*tmp_qloop_32; + const real_t q_tmp_2_3 = tmp_qloop_26*tmp_qloop_32; + const real_t q_tmp_2_4 = tmp_qloop_28*tmp_qloop_32; + const real_t q_tmp_2_5 = tmp_qloop_30*tmp_qloop_32; + const real_t q_tmp_3_0 = tmp_qloop_15*tmp_qloop_33; + const real_t q_tmp_3_1 = tmp_qloop_19*tmp_qloop_33; + const real_t q_tmp_3_2 = tmp_qloop_21*tmp_qloop_33; + const real_t q_tmp_3_3 = tmp_qloop_26*tmp_qloop_33; + const real_t q_tmp_3_4 = tmp_qloop_28*tmp_qloop_33; + const real_t q_tmp_3_5 = tmp_qloop_30*tmp_qloop_33; + const real_t q_tmp_4_0 = tmp_qloop_15*tmp_qloop_34; + const real_t q_tmp_4_1 = tmp_qloop_19*tmp_qloop_34; + const real_t q_tmp_4_2 = tmp_qloop_21*tmp_qloop_34; + const real_t q_tmp_4_3 = tmp_qloop_26*tmp_qloop_34; + const real_t q_tmp_4_4 = tmp_qloop_28*tmp_qloop_34; + const real_t q_tmp_4_5 = tmp_qloop_30*tmp_qloop_34; + const real_t q_tmp_5_0 = tmp_qloop_15*tmp_qloop_35; + const real_t q_tmp_5_1 = tmp_qloop_19*tmp_qloop_35; + const real_t q_tmp_5_2 = tmp_qloop_21*tmp_qloop_35; + const real_t q_tmp_5_3 = tmp_qloop_26*tmp_qloop_35; + const real_t q_tmp_5_4 = tmp_qloop_28*tmp_qloop_35; + const real_t q_tmp_5_5 = tmp_qloop_30*tmp_qloop_35; + q_acc_0_0 = q_acc_0_0 + q_tmp_0_0; + q_acc_0_1 = q_acc_0_1 + q_tmp_0_1; + q_acc_0_2 = q_acc_0_2 + q_tmp_0_2; + q_acc_0_3 = q_acc_0_3 + q_tmp_0_3; + q_acc_0_4 = q_acc_0_4 + q_tmp_0_4; + q_acc_0_5 = q_acc_0_5 + q_tmp_0_5; + q_acc_1_0 = q_acc_1_0 + q_tmp_1_0; + q_acc_1_1 = q_acc_1_1 + q_tmp_1_1; + q_acc_1_2 = q_acc_1_2 + q_tmp_1_2; + q_acc_1_3 = q_acc_1_3 + q_tmp_1_3; + q_acc_1_4 = q_acc_1_4 + q_tmp_1_4; + q_acc_1_5 = q_acc_1_5 + q_tmp_1_5; + q_acc_2_0 = q_acc_2_0 + q_tmp_2_0; + q_acc_2_1 = q_acc_2_1 + q_tmp_2_1; + q_acc_2_2 = q_acc_2_2 + q_tmp_2_2; + q_acc_2_3 = q_acc_2_3 + q_tmp_2_3; + q_acc_2_4 = q_acc_2_4 + q_tmp_2_4; + q_acc_2_5 = q_acc_2_5 + q_tmp_2_5; + q_acc_3_0 = q_acc_3_0 + q_tmp_3_0; + q_acc_3_1 = q_acc_3_1 + q_tmp_3_1; + q_acc_3_2 = q_acc_3_2 + q_tmp_3_2; + q_acc_3_3 = q_acc_3_3 + q_tmp_3_3; + q_acc_3_4 = q_acc_3_4 + q_tmp_3_4; + q_acc_3_5 = q_acc_3_5 + q_tmp_3_5; + q_acc_4_0 = q_acc_4_0 + q_tmp_4_0; + q_acc_4_1 = q_acc_4_1 + q_tmp_4_1; + q_acc_4_2 = q_acc_4_2 + q_tmp_4_2; + q_acc_4_3 = q_acc_4_3 + q_tmp_4_3; + q_acc_4_4 = q_acc_4_4 + q_tmp_4_4; + q_acc_4_5 = q_acc_4_5 + q_tmp_4_5; + q_acc_5_0 = q_acc_5_0 + q_tmp_5_0; + q_acc_5_1 = q_acc_5_1 + q_tmp_5_1; + q_acc_5_2 = q_acc_5_2 + q_tmp_5_2; + q_acc_5_3 = q_acc_5_3 + q_tmp_5_3; + q_acc_5_4 = q_acc_5_4 + q_tmp_5_4; + q_acc_5_5 = q_acc_5_5 + q_tmp_5_5; + } + const real_t elMat_0_0 = q_acc_0_0; + const real_t elMat_0_1 = q_acc_0_1; + const real_t elMat_0_2 = q_acc_0_2; + const real_t elMat_0_3 = q_acc_0_3; + const real_t elMat_0_4 = q_acc_0_4; + const real_t elMat_0_5 = q_acc_0_5; + const real_t elMat_1_0 = q_acc_1_0; + const real_t elMat_1_1 = q_acc_1_1; + const real_t elMat_1_2 = q_acc_1_2; + const real_t elMat_1_3 = q_acc_1_3; + const real_t elMat_1_4 = q_acc_1_4; + const real_t elMat_1_5 = q_acc_1_5; + const real_t elMat_2_0 = q_acc_2_0; + const real_t elMat_2_1 = q_acc_2_1; + const real_t elMat_2_2 = q_acc_2_2; + const real_t elMat_2_3 = q_acc_2_3; + const real_t elMat_2_4 = q_acc_2_4; + const real_t elMat_2_5 = q_acc_2_5; + const real_t elMat_3_0 = q_acc_3_0; + const real_t elMat_3_1 = q_acc_3_1; + const real_t elMat_3_2 = q_acc_3_2; + const real_t elMat_3_3 = q_acc_3_3; + const real_t elMat_3_4 = q_acc_3_4; + const real_t elMat_3_5 = q_acc_3_5; + const real_t elMat_4_0 = q_acc_4_0; + const real_t elMat_4_1 = q_acc_4_1; + const real_t elMat_4_2 = q_acc_4_2; + const real_t elMat_4_3 = q_acc_4_3; + const real_t elMat_4_4 = q_acc_4_4; + const real_t elMat_4_5 = q_acc_4_5; + const real_t elMat_5_0 = q_acc_5_0; + const real_t elMat_5_1 = q_acc_5_1; + const real_t elMat_5_2 = q_acc_5_2; + const real_t elMat_5_3 = q_acc_5_3; + const real_t elMat_5_4 = q_acc_5_4; + const real_t elMat_5_5 = q_acc_5_5; + + std::vector< uint_t > _data_rowIdx( 6 ); + std::vector< uint_t > _data_colIdx( 6 ); + std::vector< real_t > _data_mat( 36 ); + + _data_rowIdx[0] = ((uint64_t)(_data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))])); + _data_rowIdx[1] = ((uint64_t)(_data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1])); + _data_rowIdx[2] = ((uint64_t)(_data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))])); + _data_rowIdx[3] = ((uint64_t)(_data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))])); + _data_rowIdx[4] = ((uint64_t)(_data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))])); + _data_rowIdx[5] = ((uint64_t)(_data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))])); + _data_colIdx[0] = ((uint64_t)(_data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))])); + _data_colIdx[1] = ((uint64_t)(_data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1])); + _data_colIdx[2] = ((uint64_t)(_data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))])); + _data_colIdx[3] = ((uint64_t)(_data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))])); + _data_colIdx[4] = ((uint64_t)(_data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))])); + _data_colIdx[5] = ((uint64_t)(_data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))])); + + /* Apply basis transformation */ + + + + _data_mat[0] = ((real_t)(elMat_0_0)); + _data_mat[1] = ((real_t)(elMat_0_1)); + _data_mat[2] = ((real_t)(elMat_0_2)); + _data_mat[3] = ((real_t)(elMat_0_3)); + _data_mat[4] = ((real_t)(elMat_0_4)); + _data_mat[5] = ((real_t)(elMat_0_5)); + _data_mat[6] = ((real_t)(elMat_1_0)); + _data_mat[7] = ((real_t)(elMat_1_1)); + _data_mat[8] = ((real_t)(elMat_1_2)); + _data_mat[9] = ((real_t)(elMat_1_3)); + _data_mat[10] = ((real_t)(elMat_1_4)); + _data_mat[11] = ((real_t)(elMat_1_5)); + _data_mat[12] = ((real_t)(elMat_2_0)); + _data_mat[13] = ((real_t)(elMat_2_1)); + _data_mat[14] = ((real_t)(elMat_2_2)); + _data_mat[15] = ((real_t)(elMat_2_3)); + _data_mat[16] = ((real_t)(elMat_2_4)); + _data_mat[17] = ((real_t)(elMat_2_5)); + _data_mat[18] = ((real_t)(elMat_3_0)); + _data_mat[19] = ((real_t)(elMat_3_1)); + _data_mat[20] = ((real_t)(elMat_3_2)); + _data_mat[21] = ((real_t)(elMat_3_3)); + _data_mat[22] = ((real_t)(elMat_3_4)); + _data_mat[23] = ((real_t)(elMat_3_5)); + _data_mat[24] = ((real_t)(elMat_4_0)); + _data_mat[25] = ((real_t)(elMat_4_1)); + _data_mat[26] = ((real_t)(elMat_4_2)); + _data_mat[27] = ((real_t)(elMat_4_3)); + _data_mat[28] = ((real_t)(elMat_4_4)); + _data_mat[29] = ((real_t)(elMat_4_5)); + _data_mat[30] = ((real_t)(elMat_5_0)); + _data_mat[31] = ((real_t)(elMat_5_1)); + _data_mat[32] = ((real_t)(elMat_5_2)); + _data_mat[33] = ((real_t)(elMat_5_3)); + _data_mat[34] = ((real_t)(elMat_5_4)); + _data_mat[35] = ((real_t)(elMat_5_5)); + + + mat->addValues( _data_rowIdx, _data_colIdx, _data_mat ); + } + } + const real_t tmp_coords_jac_0_BLUE = 1.0 / (micro_edges_per_macro_edge_float)*1.0; + const real_t tmp_coords_jac_1_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0); + const real_t tmp_coords_jac_2_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1); + const real_t tmp_coords_jac_3_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0); + const real_t tmp_coords_jac_4_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1); + const real_t p_affine_const_0_0_BLUE = tmp_coords_jac_1_BLUE; + const real_t p_affine_const_0_1_BLUE = tmp_coords_jac_2_BLUE; + const real_t p_affine_const_1_0_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_3_BLUE; + const real_t p_affine_const_1_1_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_4_BLUE; + const real_t p_affine_const_2_0_BLUE = tmp_coords_jac_1_BLUE + tmp_coords_jac_3_BLUE; + const real_t p_affine_const_2_1_BLUE = tmp_coords_jac_2_BLUE + tmp_coords_jac_4_BLUE; + const real_t jac_affine_0_0_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_1_0_BLUE; + const real_t jac_affine_0_1_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_2_0_BLUE; + const real_t jac_affine_1_0_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_1_1_BLUE; + const real_t jac_affine_1_1_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_2_1_BLUE; + const real_t tmp_coords_jac_5_BLUE = jac_affine_0_0_BLUE*jac_affine_1_1_BLUE - jac_affine_0_1_BLUE*jac_affine_1_0_BLUE; + const real_t tmp_coords_jac_6_BLUE = 1.0 / (tmp_coords_jac_5_BLUE); + const real_t jac_affine_inv_0_0_BLUE = jac_affine_1_1_BLUE*tmp_coords_jac_6_BLUE; + const real_t jac_affine_inv_0_1_BLUE = -jac_affine_0_1_BLUE*tmp_coords_jac_6_BLUE; + const real_t jac_affine_inv_1_0_BLUE = -jac_affine_1_0_BLUE*tmp_coords_jac_6_BLUE; + const real_t jac_affine_inv_1_1_BLUE = jac_affine_0_0_BLUE*tmp_coords_jac_6_BLUE; + const real_t abs_det_jac_affine_BLUE = abs(tmp_coords_jac_5_BLUE); + { + /* FaceType.BLUE */ + for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1) + for (int64_t ctr_0 = 0; ctr_0 < -ctr_1 + micro_edges_per_macro_edge - 1; ctr_0 += 1) + { + + const int64_t phantom_ctr_0 = ctr_0; + real_t _data_float_loop_ctr_array_dim_0[4]; + _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0; + _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1; + _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2; + _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3; + real_t _data_float_loop_ctr_array_dim_1[4]; + _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1; + + const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; + const real_t cp_dof_0 = _data_cpVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + const real_t cp_dof_1 = _data_cpVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t cp_dof_2 = _data_cpVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]; + const real_t cp_dof_3 = _data_cpEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t cp_dof_4 = _data_cpEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]; + const real_t cp_dof_5 = _data_cpEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t ux_dof_0 = _data_uxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + const real_t ux_dof_1 = _data_uxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t ux_dof_2 = _data_uxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]; + const real_t ux_dof_3 = _data_uxEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t ux_dof_4 = _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]; + const real_t ux_dof_5 = _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t uy_dof_0 = _data_uyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + const real_t uy_dof_1 = _data_uyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t uy_dof_2 = _data_uyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]; + const real_t uy_dof_3 = _data_uyEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t uy_dof_4 = _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]; + const real_t uy_dof_5 = _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + real_t q_acc_0_0 = 0.0; + real_t q_acc_0_1 = 0.0; + real_t q_acc_0_2 = 0.0; + real_t q_acc_0_3 = 0.0; + real_t q_acc_0_4 = 0.0; + real_t q_acc_0_5 = 0.0; + real_t q_acc_1_0 = 0.0; + real_t q_acc_1_1 = 0.0; + real_t q_acc_1_2 = 0.0; + real_t q_acc_1_3 = 0.0; + real_t q_acc_1_4 = 0.0; + real_t q_acc_1_5 = 0.0; + real_t q_acc_2_0 = 0.0; + real_t q_acc_2_1 = 0.0; + real_t q_acc_2_2 = 0.0; + real_t q_acc_2_3 = 0.0; + real_t q_acc_2_4 = 0.0; + real_t q_acc_2_5 = 0.0; + real_t q_acc_3_0 = 0.0; + real_t q_acc_3_1 = 0.0; + real_t q_acc_3_2 = 0.0; + real_t q_acc_3_3 = 0.0; + real_t q_acc_3_4 = 0.0; + real_t q_acc_3_5 = 0.0; + real_t q_acc_4_0 = 0.0; + real_t q_acc_4_1 = 0.0; + real_t q_acc_4_2 = 0.0; + real_t q_acc_4_3 = 0.0; + real_t q_acc_4_4 = 0.0; + real_t q_acc_4_5 = 0.0; + real_t q_acc_5_0 = 0.0; + real_t q_acc_5_1 = 0.0; + real_t q_acc_5_2 = 0.0; + real_t q_acc_5_3 = 0.0; + real_t q_acc_5_4 = 0.0; + real_t q_acc_5_5 = 0.0; + for (int64_t q = 0; q < 4; q += 1) + { + const real_t tmp_qloop_0 = 4.0*_data_q_p_0[q]; + const real_t tmp_qloop_1 = 4.0*_data_q_p_1[q]; + const real_t tmp_qloop_2 = tmp_qloop_0 + tmp_qloop_1 - 3.0; + const real_t tmp_qloop_3 = tmp_qloop_0*_data_q_p_1[q]; + const real_t tmp_qloop_4 = (_data_q_p_0[q]*_data_q_p_0[q]); + const real_t tmp_qloop_5 = tmp_qloop_4*2.0; + const real_t tmp_qloop_6 = tmp_qloop_5 - _data_q_p_0[q]; + const real_t tmp_qloop_7 = (_data_q_p_1[q]*_data_q_p_1[q]); + const real_t tmp_qloop_8 = tmp_qloop_7*2.0; + const real_t tmp_qloop_9 = tmp_qloop_8 - _data_q_p_1[q]; + const real_t tmp_qloop_10 = tmp_qloop_1 - tmp_qloop_3 + tmp_qloop_7*-4.0; + const real_t tmp_qloop_11 = tmp_qloop_0 - tmp_qloop_3 + tmp_qloop_4*-4.0; + const real_t tmp_qloop_12 = tmp_qloop_3 + tmp_qloop_5 + tmp_qloop_8 - 3.0*_data_q_p_0[q] - 3.0*_data_q_p_1[q] + 1.0; + const real_t tmp_qloop_13 = tmp_qloop_10*ux_dof_4 + tmp_qloop_11*ux_dof_5 + tmp_qloop_12*ux_dof_0 + tmp_qloop_3*ux_dof_3 + tmp_qloop_6*ux_dof_1 + tmp_qloop_9*ux_dof_2; + const real_t tmp_qloop_14 = tmp_qloop_10*uy_dof_4 + tmp_qloop_11*uy_dof_5 + tmp_qloop_12*uy_dof_0 + tmp_qloop_3*uy_dof_3 + tmp_qloop_6*uy_dof_1 + tmp_qloop_9*uy_dof_2; + const real_t tmp_qloop_15 = tmp_qloop_13*(jac_affine_inv_0_0_BLUE*tmp_qloop_2 + jac_affine_inv_1_0_BLUE*tmp_qloop_2) + tmp_qloop_14*(jac_affine_inv_0_1_BLUE*tmp_qloop_2 + jac_affine_inv_1_1_BLUE*tmp_qloop_2); + const real_t tmp_qloop_16 = abs_det_jac_affine_BLUE*(cp_dof_0*tmp_qloop_12 + cp_dof_1*tmp_qloop_6 + cp_dof_2*tmp_qloop_9 + cp_dof_3*tmp_qloop_3 + cp_dof_4*tmp_qloop_10 + cp_dof_5*tmp_qloop_11)*_data_q_w[q]; + const real_t tmp_qloop_17 = tmp_qloop_12*tmp_qloop_16; + const real_t tmp_qloop_18 = tmp_qloop_0 - 1.0; + const real_t tmp_qloop_19 = jac_affine_inv_0_0_BLUE*tmp_qloop_13*tmp_qloop_18 + jac_affine_inv_0_1_BLUE*tmp_qloop_14*tmp_qloop_18; + const real_t tmp_qloop_20 = tmp_qloop_1 - 1.0; + const real_t tmp_qloop_21 = jac_affine_inv_1_0_BLUE*tmp_qloop_13*tmp_qloop_20 + jac_affine_inv_1_1_BLUE*tmp_qloop_14*tmp_qloop_20; + const real_t tmp_qloop_22 = jac_affine_inv_1_0_BLUE*tmp_qloop_0; + const real_t tmp_qloop_23 = jac_affine_inv_0_0_BLUE*tmp_qloop_1; + const real_t tmp_qloop_24 = jac_affine_inv_1_1_BLUE*tmp_qloop_0; + const real_t tmp_qloop_25 = jac_affine_inv_0_1_BLUE*tmp_qloop_1; + const real_t tmp_qloop_26 = tmp_qloop_13*(tmp_qloop_22 + tmp_qloop_23) + tmp_qloop_14*(tmp_qloop_24 + tmp_qloop_25); + const real_t tmp_qloop_27 = -tmp_qloop_0 - 8.0*_data_q_p_1[q] + 4.0; + const real_t tmp_qloop_28 = tmp_qloop_13*(jac_affine_inv_1_0_BLUE*tmp_qloop_27 - tmp_qloop_23) + tmp_qloop_14*(jac_affine_inv_1_1_BLUE*tmp_qloop_27 - tmp_qloop_25); + const real_t tmp_qloop_29 = -tmp_qloop_1 - 8.0*_data_q_p_0[q] + 4.0; + const real_t tmp_qloop_30 = tmp_qloop_13*(jac_affine_inv_0_0_BLUE*tmp_qloop_29 - tmp_qloop_22) + tmp_qloop_14*(jac_affine_inv_0_1_BLUE*tmp_qloop_29 - tmp_qloop_24); + const real_t tmp_qloop_31 = tmp_qloop_16*tmp_qloop_6; + const real_t tmp_qloop_32 = tmp_qloop_16*tmp_qloop_9; + const real_t tmp_qloop_33 = tmp_qloop_16*tmp_qloop_3; + const real_t tmp_qloop_34 = tmp_qloop_10*tmp_qloop_16; + const real_t tmp_qloop_35 = tmp_qloop_11*tmp_qloop_16; + const real_t q_tmp_0_0 = tmp_qloop_15*tmp_qloop_17; + const real_t q_tmp_0_1 = tmp_qloop_17*tmp_qloop_19; + const real_t q_tmp_0_2 = tmp_qloop_17*tmp_qloop_21; + const real_t q_tmp_0_3 = tmp_qloop_17*tmp_qloop_26; + const real_t q_tmp_0_4 = tmp_qloop_17*tmp_qloop_28; + const real_t q_tmp_0_5 = tmp_qloop_17*tmp_qloop_30; + const real_t q_tmp_1_0 = tmp_qloop_15*tmp_qloop_31; + const real_t q_tmp_1_1 = tmp_qloop_19*tmp_qloop_31; + const real_t q_tmp_1_2 = tmp_qloop_21*tmp_qloop_31; + const real_t q_tmp_1_3 = tmp_qloop_26*tmp_qloop_31; + const real_t q_tmp_1_4 = tmp_qloop_28*tmp_qloop_31; + const real_t q_tmp_1_5 = tmp_qloop_30*tmp_qloop_31; + const real_t q_tmp_2_0 = tmp_qloop_15*tmp_qloop_32; + const real_t q_tmp_2_1 = tmp_qloop_19*tmp_qloop_32; + const real_t q_tmp_2_2 = tmp_qloop_21*tmp_qloop_32; + const real_t q_tmp_2_3 = tmp_qloop_26*tmp_qloop_32; + const real_t q_tmp_2_4 = tmp_qloop_28*tmp_qloop_32; + const real_t q_tmp_2_5 = tmp_qloop_30*tmp_qloop_32; + const real_t q_tmp_3_0 = tmp_qloop_15*tmp_qloop_33; + const real_t q_tmp_3_1 = tmp_qloop_19*tmp_qloop_33; + const real_t q_tmp_3_2 = tmp_qloop_21*tmp_qloop_33; + const real_t q_tmp_3_3 = tmp_qloop_26*tmp_qloop_33; + const real_t q_tmp_3_4 = tmp_qloop_28*tmp_qloop_33; + const real_t q_tmp_3_5 = tmp_qloop_30*tmp_qloop_33; + const real_t q_tmp_4_0 = tmp_qloop_15*tmp_qloop_34; + const real_t q_tmp_4_1 = tmp_qloop_19*tmp_qloop_34; + const real_t q_tmp_4_2 = tmp_qloop_21*tmp_qloop_34; + const real_t q_tmp_4_3 = tmp_qloop_26*tmp_qloop_34; + const real_t q_tmp_4_4 = tmp_qloop_28*tmp_qloop_34; + const real_t q_tmp_4_5 = tmp_qloop_30*tmp_qloop_34; + const real_t q_tmp_5_0 = tmp_qloop_15*tmp_qloop_35; + const real_t q_tmp_5_1 = tmp_qloop_19*tmp_qloop_35; + const real_t q_tmp_5_2 = tmp_qloop_21*tmp_qloop_35; + const real_t q_tmp_5_3 = tmp_qloop_26*tmp_qloop_35; + const real_t q_tmp_5_4 = tmp_qloop_28*tmp_qloop_35; + const real_t q_tmp_5_5 = tmp_qloop_30*tmp_qloop_35; + q_acc_0_0 = q_acc_0_0 + q_tmp_0_0; + q_acc_0_1 = q_acc_0_1 + q_tmp_0_1; + q_acc_0_2 = q_acc_0_2 + q_tmp_0_2; + q_acc_0_3 = q_acc_0_3 + q_tmp_0_3; + q_acc_0_4 = q_acc_0_4 + q_tmp_0_4; + q_acc_0_5 = q_acc_0_5 + q_tmp_0_5; + q_acc_1_0 = q_acc_1_0 + q_tmp_1_0; + q_acc_1_1 = q_acc_1_1 + q_tmp_1_1; + q_acc_1_2 = q_acc_1_2 + q_tmp_1_2; + q_acc_1_3 = q_acc_1_3 + q_tmp_1_3; + q_acc_1_4 = q_acc_1_4 + q_tmp_1_4; + q_acc_1_5 = q_acc_1_5 + q_tmp_1_5; + q_acc_2_0 = q_acc_2_0 + q_tmp_2_0; + q_acc_2_1 = q_acc_2_1 + q_tmp_2_1; + q_acc_2_2 = q_acc_2_2 + q_tmp_2_2; + q_acc_2_3 = q_acc_2_3 + q_tmp_2_3; + q_acc_2_4 = q_acc_2_4 + q_tmp_2_4; + q_acc_2_5 = q_acc_2_5 + q_tmp_2_5; + q_acc_3_0 = q_acc_3_0 + q_tmp_3_0; + q_acc_3_1 = q_acc_3_1 + q_tmp_3_1; + q_acc_3_2 = q_acc_3_2 + q_tmp_3_2; + q_acc_3_3 = q_acc_3_3 + q_tmp_3_3; + q_acc_3_4 = q_acc_3_4 + q_tmp_3_4; + q_acc_3_5 = q_acc_3_5 + q_tmp_3_5; + q_acc_4_0 = q_acc_4_0 + q_tmp_4_0; + q_acc_4_1 = q_acc_4_1 + q_tmp_4_1; + q_acc_4_2 = q_acc_4_2 + q_tmp_4_2; + q_acc_4_3 = q_acc_4_3 + q_tmp_4_3; + q_acc_4_4 = q_acc_4_4 + q_tmp_4_4; + q_acc_4_5 = q_acc_4_5 + q_tmp_4_5; + q_acc_5_0 = q_acc_5_0 + q_tmp_5_0; + q_acc_5_1 = q_acc_5_1 + q_tmp_5_1; + q_acc_5_2 = q_acc_5_2 + q_tmp_5_2; + q_acc_5_3 = q_acc_5_3 + q_tmp_5_3; + q_acc_5_4 = q_acc_5_4 + q_tmp_5_4; + q_acc_5_5 = q_acc_5_5 + q_tmp_5_5; + } + const real_t elMat_0_0 = q_acc_0_0; + const real_t elMat_0_1 = q_acc_0_1; + const real_t elMat_0_2 = q_acc_0_2; + const real_t elMat_0_3 = q_acc_0_3; + const real_t elMat_0_4 = q_acc_0_4; + const real_t elMat_0_5 = q_acc_0_5; + const real_t elMat_1_0 = q_acc_1_0; + const real_t elMat_1_1 = q_acc_1_1; + const real_t elMat_1_2 = q_acc_1_2; + const real_t elMat_1_3 = q_acc_1_3; + const real_t elMat_1_4 = q_acc_1_4; + const real_t elMat_1_5 = q_acc_1_5; + const real_t elMat_2_0 = q_acc_2_0; + const real_t elMat_2_1 = q_acc_2_1; + const real_t elMat_2_2 = q_acc_2_2; + const real_t elMat_2_3 = q_acc_2_3; + const real_t elMat_2_4 = q_acc_2_4; + const real_t elMat_2_5 = q_acc_2_5; + const real_t elMat_3_0 = q_acc_3_0; + const real_t elMat_3_1 = q_acc_3_1; + const real_t elMat_3_2 = q_acc_3_2; + const real_t elMat_3_3 = q_acc_3_3; + const real_t elMat_3_4 = q_acc_3_4; + const real_t elMat_3_5 = q_acc_3_5; + const real_t elMat_4_0 = q_acc_4_0; + const real_t elMat_4_1 = q_acc_4_1; + const real_t elMat_4_2 = q_acc_4_2; + const real_t elMat_4_3 = q_acc_4_3; + const real_t elMat_4_4 = q_acc_4_4; + const real_t elMat_4_5 = q_acc_4_5; + const real_t elMat_5_0 = q_acc_5_0; + const real_t elMat_5_1 = q_acc_5_1; + const real_t elMat_5_2 = q_acc_5_2; + const real_t elMat_5_3 = q_acc_5_3; + const real_t elMat_5_4 = q_acc_5_4; + const real_t elMat_5_5 = q_acc_5_5; + + std::vector< uint_t > _data_rowIdx( 6 ); + std::vector< uint_t > _data_colIdx( 6 ); + std::vector< real_t > _data_mat( 36 ); + + _data_rowIdx[0] = ((uint64_t)(_data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1])); + _data_rowIdx[1] = ((uint64_t)(_data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))])); + _data_rowIdx[2] = ((uint64_t)(_data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1])); + _data_rowIdx[3] = ((uint64_t)(_data_dstEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))])); + _data_rowIdx[4] = ((uint64_t)(_data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1])); + _data_rowIdx[5] = ((uint64_t)(_data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))])); + _data_colIdx[0] = ((uint64_t)(_data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1])); + _data_colIdx[1] = ((uint64_t)(_data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))])); + _data_colIdx[2] = ((uint64_t)(_data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1])); + _data_colIdx[3] = ((uint64_t)(_data_srcEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))])); + _data_colIdx[4] = ((uint64_t)(_data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1])); + _data_colIdx[5] = ((uint64_t)(_data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))])); + + /* Apply basis transformation */ + + + + _data_mat[0] = ((real_t)(elMat_0_0)); + _data_mat[1] = ((real_t)(elMat_0_1)); + _data_mat[2] = ((real_t)(elMat_0_2)); + _data_mat[3] = ((real_t)(elMat_0_3)); + _data_mat[4] = ((real_t)(elMat_0_4)); + _data_mat[5] = ((real_t)(elMat_0_5)); + _data_mat[6] = ((real_t)(elMat_1_0)); + _data_mat[7] = ((real_t)(elMat_1_1)); + _data_mat[8] = ((real_t)(elMat_1_2)); + _data_mat[9] = ((real_t)(elMat_1_3)); + _data_mat[10] = ((real_t)(elMat_1_4)); + _data_mat[11] = ((real_t)(elMat_1_5)); + _data_mat[12] = ((real_t)(elMat_2_0)); + _data_mat[13] = ((real_t)(elMat_2_1)); + _data_mat[14] = ((real_t)(elMat_2_2)); + _data_mat[15] = ((real_t)(elMat_2_3)); + _data_mat[16] = ((real_t)(elMat_2_4)); + _data_mat[17] = ((real_t)(elMat_2_5)); + _data_mat[18] = ((real_t)(elMat_3_0)); + _data_mat[19] = ((real_t)(elMat_3_1)); + _data_mat[20] = ((real_t)(elMat_3_2)); + _data_mat[21] = ((real_t)(elMat_3_3)); + _data_mat[22] = ((real_t)(elMat_3_4)); + _data_mat[23] = ((real_t)(elMat_3_5)); + _data_mat[24] = ((real_t)(elMat_4_0)); + _data_mat[25] = ((real_t)(elMat_4_1)); + _data_mat[26] = ((real_t)(elMat_4_2)); + _data_mat[27] = ((real_t)(elMat_4_3)); + _data_mat[28] = ((real_t)(elMat_4_4)); + _data_mat[29] = ((real_t)(elMat_4_5)); + _data_mat[30] = ((real_t)(elMat_5_0)); + _data_mat[31] = ((real_t)(elMat_5_1)); + _data_mat[32] = ((real_t)(elMat_5_2)); + _data_mat[33] = ((real_t)(elMat_5_3)); + _data_mat[34] = ((real_t)(elMat_5_4)); + _data_mat[35] = ((real_t)(elMat_5_5)); + + + mat->addValues( _data_rowIdx, _data_colIdx, _data_mat ); + } + } + } +} +} // namespace operatorgeneration + +} // namespace hyteg diff --git a/operators/supg_advection/CMakeLists.txt b/operators/supg_advection/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..af2b32f7cbe94964056c3ae833647aeca9540f7c --- /dev/null +++ b/operators/supg_advection/CMakeLists.txt @@ -0,0 +1,52 @@ +add_library( opgen-supg_advection + + P2ElementwiseSupgAdvection.cpp + P2ElementwiseSupgAdvection.hpp + P2ElementwiseSupgAdvectionAnnulusMap.cpp + P2ElementwiseSupgAdvectionAnnulusMap.hpp +) + +if(HYTEG_BUILD_WITH_AVX AND WALBERLA_DOUBLE_ACCURACY) + target_sources(opgen-supg_advection PRIVATE + + avx/P2ElementwiseSupgAdvectionAnnulusMap_apply_P2ElementwiseSupgAdvectionAnnulusMap_macro_2D.cpp + avx/P2ElementwiseSupgAdvectionAnnulusMap_computeInverseDiagonalOperatorValues_P2ElementwiseSupgAdvectionAnnulusMap_macro_2D.cpp + avx/P2ElementwiseSupgAdvection_apply_P2ElementwiseSupgAdvection_macro_2D.cpp + avx/P2ElementwiseSupgAdvection_computeInverseDiagonalOperatorValues_P2ElementwiseSupgAdvection_macro_2D.cpp + noarch/P2ElementwiseSupgAdvectionAnnulusMap_toMatrix_P2ElementwiseSupgAdvectionAnnulusMap_macro_2D.cpp + noarch/P2ElementwiseSupgAdvection_toMatrix_P2ElementwiseSupgAdvection_macro_2D.cpp + ) + + set_source_files_properties( + + avx/P2ElementwiseSupgAdvectionAnnulusMap_apply_P2ElementwiseSupgAdvectionAnnulusMap_macro_2D.cpp + avx/P2ElementwiseSupgAdvectionAnnulusMap_computeInverseDiagonalOperatorValues_P2ElementwiseSupgAdvectionAnnulusMap_macro_2D.cpp + avx/P2ElementwiseSupgAdvection_apply_P2ElementwiseSupgAdvection_macro_2D.cpp + avx/P2ElementwiseSupgAdvection_computeInverseDiagonalOperatorValues_P2ElementwiseSupgAdvection_macro_2D.cpp + + PROPERTIES COMPILE_OPTIONS ${HYTEG_COMPILER_NATIVE_FLAGS} + ) +else() + if(HYTEG_BUILD_WITH_AVX AND NOT WALBERLA_DOUBLE_ACCURACY) + message(WARNING "AVX vectorization only available in double precision. Using scalar kernels.") + endif() + + target_sources(opgen-supg_advection PRIVATE + + noarch/P2ElementwiseSupgAdvectionAnnulusMap_apply_P2ElementwiseSupgAdvectionAnnulusMap_macro_2D.cpp + noarch/P2ElementwiseSupgAdvectionAnnulusMap_computeInverseDiagonalOperatorValues_P2ElementwiseSupgAdvectionAnnulusMap_macro_2D.cpp + noarch/P2ElementwiseSupgAdvectionAnnulusMap_toMatrix_P2ElementwiseSupgAdvectionAnnulusMap_macro_2D.cpp + noarch/P2ElementwiseSupgAdvection_apply_P2ElementwiseSupgAdvection_macro_2D.cpp + noarch/P2ElementwiseSupgAdvection_computeInverseDiagonalOperatorValues_P2ElementwiseSupgAdvection_macro_2D.cpp + noarch/P2ElementwiseSupgAdvection_toMatrix_P2ElementwiseSupgAdvection_macro_2D.cpp + ) +endif() + +if (HYTEG_BUILD_WITH_PETSC) + target_link_libraries(opgen-supg_advection PUBLIC PETSc::PETSc) +endif () +if (WALBERLA_BUILD_WITH_HALF_PRECISION_SUPPORT) + target_compile_features(opgen-supg_advection PUBLIC cxx_std_23) +else () + target_compile_features(opgen-supg_advection PUBLIC cxx_std_17) +endif () diff --git a/operators/supg_advection/P2ElementwiseSupgAdvection.cpp b/operators/supg_advection/P2ElementwiseSupgAdvection.cpp new file mode 100644 index 0000000000000000000000000000000000000000..da2963e6920df65784e9eeac831e7355ad8814b8 --- /dev/null +++ b/operators/supg_advection/P2ElementwiseSupgAdvection.cpp @@ -0,0 +1,397 @@ +/* +* Copyright (c) 2017-2024 Nils Kohl, Daniel Bauer, Fabian Böhm. +* +* This file is part of HyTeG +* (see https://i10git.cs.fau.de/hyteg/hyteg). +* +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. +* +* You should have received a copy of the GNU General Public License +* along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +/* +* The entire file was generated with the HyTeG Operator Generator. +* +* Avoid modifying this file. If buggy, consider fixing the generator itself. +*/ + +// Unfortunately, the inverse diagonal kernel wrapper triggers a GCC bug (maybe +// (related to) https://gcc.gnu.org/bugzilla/show_bug.cgi?id=107087) causing a +// warning in an internal standard library header (bits/stl_algobase.h). As a +// workaround, we disable the warning and include this header indirectly through +// a public header. +#include <waLBerlaDefinitions.h> +#ifdef WALBERLA_CXX_COMPILER_IS_GNU +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wnonnull" +#endif +#include <cmath> +#ifdef WALBERLA_CXX_COMPILER_IS_GNU +#pragma GCC diagnostic pop +#endif + +#include "P2ElementwiseSupgAdvection.hpp" + +#define FUNC_PREFIX + +namespace hyteg { + +namespace operatorgeneration { + +P2ElementwiseSupgAdvection::P2ElementwiseSupgAdvection( const std::shared_ptr< PrimitiveStorage >& storage, + size_t minLevel, + size_t maxLevel, + const P2Function< real_t >& _cp_times_delta, + const P2Function< real_t >& _ux, + const P2Function< real_t >& _uy ) +: Operator( storage, minLevel, maxLevel ) +, cp_times_delta( _cp_times_delta ) +, ux( _ux ) +, uy( _uy ) +{} + +void P2ElementwiseSupgAdvection::apply( const P2Function< real_t >& src, + const P2Function< real_t >& dst, + uint_t level, + DoFType flag, + UpdateType updateType ) const +{ + this->startTiming( "apply" ); + + // Make sure that halos are up-to-date + this->timingTree_->start( "pre-communication" ); + if ( this->storage_->hasGlobalCells() ) + { + WALBERLA_ABORT( "Not implemented." ); + } + else + { + communication::syncFunctionBetweenPrimitives( src, level, communication::syncDirection_t::LOW2HIGH ); + communication::syncFunctionBetweenPrimitives( cp_times_delta, level, communication::syncDirection_t::LOW2HIGH ); + communication::syncFunctionBetweenPrimitives( ux, level, communication::syncDirection_t::LOW2HIGH ); + communication::syncFunctionBetweenPrimitives( uy, level, communication::syncDirection_t::LOW2HIGH ); + } + this->timingTree_->stop( "pre-communication" ); + + if ( updateType == Replace ) + { + // We need to zero the destination array (including halos). + // However, we must not zero out anything that is not flagged with the specified BCs. + // Therefore, we first zero out everything that flagged, and then, later, + // the halos of the highest dim primitives. + dst.interpolate( walberla::numeric_cast< real_t >( 0 ), level, flag ); + } + + if ( storage_->hasGlobalCells() ) + { + WALBERLA_ABORT( "Not implemented." ); + } + else + { + for ( auto& it : storage_->getFaces() ) + { + Face& face = *it.second; + + // get hold of the actual numerical data in the functions + real_t* _data_srcVertex = face.getData( src.getVertexDoFFunction().getFaceDataID() )->getPointer( level ); + real_t* _data_srcEdge = face.getData( src.getEdgeDoFFunction().getFaceDataID() )->getPointer( level ); + real_t* _data_dstVertex = face.getData( dst.getVertexDoFFunction().getFaceDataID() )->getPointer( level ); + real_t* _data_dstEdge = face.getData( dst.getEdgeDoFFunction().getFaceDataID() )->getPointer( level ); + real_t* _data_cp_times_deltaVertex = + face.getData( cp_times_delta.getVertexDoFFunction().getFaceDataID() )->getPointer( level ); + real_t* _data_cp_times_deltaEdge = + face.getData( cp_times_delta.getEdgeDoFFunction().getFaceDataID() )->getPointer( level ); + real_t* _data_uxVertex = face.getData( ux.getVertexDoFFunction().getFaceDataID() )->getPointer( level ); + real_t* _data_uxEdge = face.getData( ux.getEdgeDoFFunction().getFaceDataID() )->getPointer( level ); + real_t* _data_uyVertex = face.getData( uy.getVertexDoFFunction().getFaceDataID() )->getPointer( level ); + real_t* _data_uyEdge = face.getData( uy.getEdgeDoFFunction().getFaceDataID() )->getPointer( level ); + + // Zero out dst halos only + // + // This is also necessary when using update type == Add. + // During additive comm we then skip zeroing the data on the lower-dim primitives. + for ( const auto& idx : vertexdof::macroface::Iterator( level ) ) + { + if ( vertexdof::macroface::isVertexOnBoundary( level, idx ) ) + { + auto arrayIdx = vertexdof::macroface::index( level, idx.x(), idx.y() ); + _data_dstVertex[arrayIdx] = walberla::numeric_cast< real_t >( 0 ); + } + } + for ( const auto& idx : edgedof::macroface::Iterator( level ) ) + { + for ( const auto& orientation : edgedof::faceLocalEdgeDoFOrientations ) + { + if ( !edgedof::macroface::isInnerEdgeDoF( level, idx, orientation ) ) + { + auto arrayIdx = edgedof::macroface::index( level, idx.x(), idx.y(), orientation ); + _data_dstEdge[arrayIdx] = walberla::numeric_cast< real_t >( 0 ); + } + } + } + + const auto micro_edges_per_macro_edge = (int64_t) levelinfo::num_microedges_per_edge( level ); + const auto micro_edges_per_macro_edge_float = (real_t) levelinfo::num_microedges_per_edge( level ); + const real_t macro_vertex_coord_id_0comp0 = (real_t) face.getCoordinates()[0][0]; + const real_t macro_vertex_coord_id_0comp1 = (real_t) face.getCoordinates()[0][1]; + const real_t macro_vertex_coord_id_1comp0 = (real_t) face.getCoordinates()[1][0]; + const real_t macro_vertex_coord_id_1comp1 = (real_t) face.getCoordinates()[1][1]; + const real_t macro_vertex_coord_id_2comp0 = (real_t) face.getCoordinates()[2][0]; + const real_t macro_vertex_coord_id_2comp1 = (real_t) face.getCoordinates()[2][1]; + + this->timingTree_->start( "kernel" ); + + apply_P2ElementwiseSupgAdvection_macro_2D( + + _data_cp_times_deltaEdge, + _data_cp_times_deltaVertex, + _data_dstEdge, + _data_dstVertex, + _data_srcEdge, + _data_srcVertex, + _data_uxEdge, + _data_uxVertex, + _data_uyEdge, + _data_uyVertex, + macro_vertex_coord_id_0comp0, + macro_vertex_coord_id_0comp1, + macro_vertex_coord_id_1comp0, + macro_vertex_coord_id_1comp1, + macro_vertex_coord_id_2comp0, + macro_vertex_coord_id_2comp1, + micro_edges_per_macro_edge, + micro_edges_per_macro_edge_float ); + + this->timingTree_->stop( "kernel" ); + } + + // Push result to lower-dimensional primitives + // + this->timingTree_->start( "post-communication" ); + // Note: We could avoid communication here by implementing the apply() also for the respective + // lower dimensional primitives! + dst.getVertexDoFFunction().communicateAdditively< Face, Edge >( + level, DoFType::All ^ flag, *storage_, updateType == Replace ); + dst.getVertexDoFFunction().communicateAdditively< Face, Vertex >( + level, DoFType::All ^ flag, *storage_, updateType == Replace ); + dst.getEdgeDoFFunction().communicateAdditively< Face, Edge >( + level, DoFType::All ^ flag, *storage_, updateType == Replace ); + this->timingTree_->stop( "post-communication" ); + } + + this->stopTiming( "apply" ); +} +void P2ElementwiseSupgAdvection::toMatrix( const std::shared_ptr< SparseMatrixProxy >& mat, + const P2Function< idx_t >& src, + const P2Function< idx_t >& dst, + uint_t level, + DoFType flag ) const +{ + this->startTiming( "toMatrix" ); + + // We currently ignore the flag provided! + if ( flag != All ) + { + WALBERLA_LOG_WARNING_ON_ROOT( "Input flag ignored in toMatrix; using flag = All" ); + } + + if ( storage_->hasGlobalCells() ) + { + this->timingTree_->start( "pre-communication" ); + cp_times_delta.communicate< Face, Cell >( level ); + cp_times_delta.communicate< Edge, Cell >( level ); + cp_times_delta.communicate< Vertex, Cell >( level ); + ux.communicate< Face, Cell >( level ); + ux.communicate< Edge, Cell >( level ); + ux.communicate< Vertex, Cell >( level ); + uy.communicate< Face, Cell >( level ); + uy.communicate< Edge, Cell >( level ); + uy.communicate< Vertex, Cell >( level ); + this->timingTree_->stop( "pre-communication" ); + + WALBERLA_ABORT( "Not implemented." ); + } + else + { + this->timingTree_->start( "pre-communication" ); + communication::syncFunctionBetweenPrimitives( cp_times_delta, level, communication::syncDirection_t::LOW2HIGH ); + communication::syncFunctionBetweenPrimitives( ux, level, communication::syncDirection_t::LOW2HIGH ); + communication::syncFunctionBetweenPrimitives( uy, level, communication::syncDirection_t::LOW2HIGH ); + this->timingTree_->stop( "pre-communication" ); + + for ( auto& it : storage_->getFaces() ) + { + Face& face = *it.second; + + // get hold of the actual numerical data + idx_t* _data_srcVertex = face.getData( src.getVertexDoFFunction().getFaceDataID() )->getPointer( level ); + idx_t* _data_srcEdge = face.getData( src.getEdgeDoFFunction().getFaceDataID() )->getPointer( level ); + idx_t* _data_dstVertex = face.getData( dst.getVertexDoFFunction().getFaceDataID() )->getPointer( level ); + idx_t* _data_dstEdge = face.getData( dst.getEdgeDoFFunction().getFaceDataID() )->getPointer( level ); + real_t* _data_cp_times_deltaVertex = + face.getData( cp_times_delta.getVertexDoFFunction().getFaceDataID() )->getPointer( level ); + real_t* _data_cp_times_deltaEdge = + face.getData( cp_times_delta.getEdgeDoFFunction().getFaceDataID() )->getPointer( level ); + real_t* _data_uxVertex = face.getData( ux.getVertexDoFFunction().getFaceDataID() )->getPointer( level ); + real_t* _data_uxEdge = face.getData( ux.getEdgeDoFFunction().getFaceDataID() )->getPointer( level ); + real_t* _data_uyVertex = face.getData( uy.getVertexDoFFunction().getFaceDataID() )->getPointer( level ); + real_t* _data_uyEdge = face.getData( uy.getEdgeDoFFunction().getFaceDataID() )->getPointer( level ); + + const auto micro_edges_per_macro_edge = (int64_t) levelinfo::num_microedges_per_edge( level ); + const auto micro_edges_per_macro_edge_float = (real_t) levelinfo::num_microedges_per_edge( level ); + const real_t macro_vertex_coord_id_0comp0 = (real_t) face.getCoordinates()[0][0]; + const real_t macro_vertex_coord_id_0comp1 = (real_t) face.getCoordinates()[0][1]; + const real_t macro_vertex_coord_id_1comp0 = (real_t) face.getCoordinates()[1][0]; + const real_t macro_vertex_coord_id_1comp1 = (real_t) face.getCoordinates()[1][1]; + const real_t macro_vertex_coord_id_2comp0 = (real_t) face.getCoordinates()[2][0]; + const real_t macro_vertex_coord_id_2comp1 = (real_t) face.getCoordinates()[2][1]; + + this->timingTree_->start( "kernel" ); + + toMatrix_P2ElementwiseSupgAdvection_macro_2D( + + _data_cp_times_deltaEdge, + _data_cp_times_deltaVertex, + _data_dstEdge, + _data_dstVertex, + _data_srcEdge, + _data_srcVertex, + _data_uxEdge, + _data_uxVertex, + _data_uyEdge, + _data_uyVertex, + macro_vertex_coord_id_0comp0, + macro_vertex_coord_id_0comp1, + macro_vertex_coord_id_1comp0, + macro_vertex_coord_id_1comp1, + macro_vertex_coord_id_2comp0, + macro_vertex_coord_id_2comp1, + mat, + micro_edges_per_macro_edge, + micro_edges_per_macro_edge_float ); + + this->timingTree_->stop( "kernel" ); + } + } + this->stopTiming( "toMatrix" ); +} +void P2ElementwiseSupgAdvection::computeInverseDiagonalOperatorValues() +{ + this->startTiming( "computeInverseDiagonalOperatorValues" ); + + if ( invDiag_ == nullptr ) + { + invDiag_ = std::make_shared< P2Function< real_t > >( "inverse diagonal entries", storage_, minLevel_, maxLevel_ ); + } + + for ( uint_t level = minLevel_; level <= maxLevel_; level++ ) + { + invDiag_->setToZero( level ); + + if ( storage_->hasGlobalCells() ) + { + this->timingTree_->start( "pre-communication" ); + cp_times_delta.communicate< Face, Cell >( level ); + cp_times_delta.communicate< Edge, Cell >( level ); + cp_times_delta.communicate< Vertex, Cell >( level ); + ux.communicate< Face, Cell >( level ); + ux.communicate< Edge, Cell >( level ); + ux.communicate< Vertex, Cell >( level ); + uy.communicate< Face, Cell >( level ); + uy.communicate< Edge, Cell >( level ); + uy.communicate< Vertex, Cell >( level ); + this->timingTree_->stop( "pre-communication" ); + + WALBERLA_ABORT( "Not implemented." ); + ( *invDiag_ ).invertElementwise( level ); + } + else + { + this->timingTree_->start( "pre-communication" ); + communication::syncFunctionBetweenPrimitives( cp_times_delta, level, communication::syncDirection_t::LOW2HIGH ); + communication::syncFunctionBetweenPrimitives( ux, level, communication::syncDirection_t::LOW2HIGH ); + communication::syncFunctionBetweenPrimitives( uy, level, communication::syncDirection_t::LOW2HIGH ); + this->timingTree_->stop( "pre-communication" ); + + for ( auto& it : storage_->getFaces() ) + { + Face& face = *it.second; + + // get hold of the actual numerical data + real_t* _data_invDiag_Vertex = + face.getData( ( *invDiag_ ).getVertexDoFFunction().getFaceDataID() )->getPointer( level ); + real_t* _data_invDiag_Edge = face.getData( ( *invDiag_ ).getEdgeDoFFunction().getFaceDataID() )->getPointer( level ); + real_t* _data_cp_times_deltaVertex = + face.getData( cp_times_delta.getVertexDoFFunction().getFaceDataID() )->getPointer( level ); + real_t* _data_cp_times_deltaEdge = + face.getData( cp_times_delta.getEdgeDoFFunction().getFaceDataID() )->getPointer( level ); + real_t* _data_uxVertex = face.getData( ux.getVertexDoFFunction().getFaceDataID() )->getPointer( level ); + real_t* _data_uxEdge = face.getData( ux.getEdgeDoFFunction().getFaceDataID() )->getPointer( level ); + real_t* _data_uyVertex = face.getData( uy.getVertexDoFFunction().getFaceDataID() )->getPointer( level ); + real_t* _data_uyEdge = face.getData( uy.getEdgeDoFFunction().getFaceDataID() )->getPointer( level ); + + const auto micro_edges_per_macro_edge = (int64_t) levelinfo::num_microedges_per_edge( level ); + const auto micro_edges_per_macro_edge_float = (real_t) levelinfo::num_microedges_per_edge( level ); + const real_t macro_vertex_coord_id_0comp0 = (real_t) face.getCoordinates()[0][0]; + const real_t macro_vertex_coord_id_0comp1 = (real_t) face.getCoordinates()[0][1]; + const real_t macro_vertex_coord_id_1comp0 = (real_t) face.getCoordinates()[1][0]; + const real_t macro_vertex_coord_id_1comp1 = (real_t) face.getCoordinates()[1][1]; + const real_t macro_vertex_coord_id_2comp0 = (real_t) face.getCoordinates()[2][0]; + const real_t macro_vertex_coord_id_2comp1 = (real_t) face.getCoordinates()[2][1]; + + this->timingTree_->start( "kernel" ); + + computeInverseDiagonalOperatorValues_P2ElementwiseSupgAdvection_macro_2D( + + _data_cp_times_deltaEdge, + _data_cp_times_deltaVertex, + _data_invDiag_Edge, + _data_invDiag_Vertex, + _data_uxEdge, + _data_uxVertex, + _data_uyEdge, + _data_uyVertex, + macro_vertex_coord_id_0comp0, + macro_vertex_coord_id_0comp1, + macro_vertex_coord_id_1comp0, + macro_vertex_coord_id_1comp1, + macro_vertex_coord_id_2comp0, + macro_vertex_coord_id_2comp1, + micro_edges_per_macro_edge, + micro_edges_per_macro_edge_float ); + + this->timingTree_->stop( "kernel" ); + } + + // Push result to lower-dimensional primitives + // + this->timingTree_->start( "post-communication" ); + // Note: We could avoid communication here by implementing the apply() also for the respective + // lower dimensional primitives! + ( *invDiag_ ).getVertexDoFFunction().communicateAdditively< Face, Edge >( level ); + ( *invDiag_ ).getVertexDoFFunction().communicateAdditively< Face, Vertex >( level ); + ( *invDiag_ ).getEdgeDoFFunction().communicateAdditively< Face, Edge >( level ); + this->timingTree_->stop( "post-communication" ); + ( *invDiag_ ).invertElementwise( level ); + } + } + + this->stopTiming( "computeInverseDiagonalOperatorValues" ); +} +std::shared_ptr< P2Function< real_t > > P2ElementwiseSupgAdvection::getInverseDiagonalValues() const +{ + return invDiag_; +} + +} // namespace operatorgeneration + +} // namespace hyteg diff --git a/operators/supg_advection/P2ElementwiseSupgAdvection.hpp b/operators/supg_advection/P2ElementwiseSupgAdvection.hpp new file mode 100644 index 0000000000000000000000000000000000000000..2a9d3838f1a614003a2bed318c4c0216d9239e1e --- /dev/null +++ b/operators/supg_advection/P2ElementwiseSupgAdvection.hpp @@ -0,0 +1,183 @@ +/* +* Copyright (c) 2017-2024 Nils Kohl, Daniel Bauer, Fabian Böhm. +* +* This file is part of HyTeG +* (see https://i10git.cs.fau.de/hyteg/hyteg). +* +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. +* +* You should have received a copy of the GNU General Public License +* along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +/* +* The entire file was generated with the HyTeG Operator Generator. +* +* Avoid modifying this file. If buggy, consider fixing the generator itself. +*/ + +#pragma once + +#include "core/DataTypes.h" + +#include "hyteg/LikwidWrapper.hpp" +#include "hyteg/boundary/BoundaryConditions.hpp" +#include "hyteg/communication/Syncing.hpp" +#include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp" +#include "hyteg/operators/Operator.hpp" +#include "hyteg/p2functionspace/P2Function.hpp" +#include "hyteg/primitivestorage/PrimitiveStorage.hpp" +#include "hyteg/solvers/Smoothables.hpp" +#include "hyteg/sparseassembly/SparseMatrixProxy.hpp" +#include "hyteg/types/types.hpp" + +#define FUNC_PREFIX + +namespace hyteg { + +namespace operatorgeneration { + +/// advection operator which needs to be used in combination with SUPG +/// +/// Geometry map: IdentityMap +/// +/// Weak formulation +/// +/// T: trial function (scalar space: Lagrange, degree: 2) +/// s: test function (scalar space: Lagrange, degree: 2) +/// u: velocity function (vectorial space: Lagrange, degree: 2) +/// +/// ∫ cp ( u · ∇T ) 𝛿(u · ∇s) + +class P2ElementwiseSupgAdvection : public Operator< P2Function< real_t >, P2Function< real_t > >, + public OperatorWithInverseDiagonal< P2Function< real_t > > +{ + public: + P2ElementwiseSupgAdvection( const std::shared_ptr< PrimitiveStorage >& storage, + size_t minLevel, + size_t maxLevel, + const P2Function< real_t >& _cp_times_delta, + const P2Function< real_t >& _ux, + const P2Function< real_t >& _uy ); + + void apply( const P2Function< real_t >& src, + const P2Function< real_t >& dst, + uint_t level, + DoFType flag, + UpdateType updateType = Replace ) const; + + void toMatrix( const std::shared_ptr< SparseMatrixProxy >& mat, + const P2Function< idx_t >& src, + const P2Function< idx_t >& dst, + uint_t level, + DoFType flag ) const; + + void computeInverseDiagonalOperatorValues(); + + std::shared_ptr< P2Function< real_t > > getInverseDiagonalValues() const; + + protected: + private: + /// Integral: P2ElementwiseSupgAdvection + /// - volume element: triangle, dim: 2, vertices: 3, spacedim: 2 + /// - kernel type: apply + /// - loop strategy: SAWTOOTH + /// - quadrature rule: Dunavant 3 | points: 4, degree: 3 + /// - blending map: IdentityMap + /// - operations per element: + /// adds muls divs pows abs assignments function_calls unknown_ops + /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- + /// 404 432 12 0 0 0 0 1 + void apply_P2ElementwiseSupgAdvection_macro_2D( real_t* RESTRICT _data_cp_times_deltaEdge, + real_t* RESTRICT _data_cp_times_deltaVertex, + real_t* RESTRICT _data_dstEdge, + real_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_srcEdge, + real_t* RESTRICT _data_srcVertex, + real_t* RESTRICT _data_uxEdge, + real_t* RESTRICT _data_uxVertex, + real_t* RESTRICT _data_uyEdge, + real_t* RESTRICT _data_uyVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; + + /// Integral: P2ElementwiseSupgAdvection + /// - volume element: triangle, dim: 2, vertices: 3, spacedim: 2 + /// - kernel type: toMatrix + /// - loop strategy: SAWTOOTH + /// - quadrature rule: Dunavant 3 | points: 4, degree: 3 + /// - blending map: IdentityMap + /// - operations per element: + /// adds muls divs pows abs assignments function_calls unknown_ops + /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- + /// 368 396 12 0 0 0 0 4 + void toMatrix_P2ElementwiseSupgAdvection_macro_2D( real_t* RESTRICT _data_cp_times_deltaEdge, + real_t* RESTRICT _data_cp_times_deltaVertex, + idx_t* RESTRICT _data_dstEdge, + idx_t* RESTRICT _data_dstVertex, + idx_t* RESTRICT _data_srcEdge, + idx_t* RESTRICT _data_srcVertex, + real_t* RESTRICT _data_uxEdge, + real_t* RESTRICT _data_uxVertex, + real_t* RESTRICT _data_uyEdge, + real_t* RESTRICT _data_uyVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + std::shared_ptr< SparseMatrixProxy > mat, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; + + /// Integral: P2ElementwiseSupgAdvection + /// - volume element: triangle, dim: 2, vertices: 3, spacedim: 2 + /// - kernel type: computeInverseDiagonalOperatorValues + /// - loop strategy: SAWTOOTH + /// - quadrature rule: Dunavant 3 | points: 4, degree: 3 + /// - blending map: IdentityMap + /// - operations per element: + /// adds muls divs pows abs assignments function_calls unknown_ops + /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- + /// 254 316 12 0 0 0 0 1 + void computeInverseDiagonalOperatorValues_P2ElementwiseSupgAdvection_macro_2D( real_t* RESTRICT _data_cp_times_deltaEdge, + real_t* RESTRICT _data_cp_times_deltaVertex, + real_t* RESTRICT _data_invDiag_Edge, + real_t* RESTRICT _data_invDiag_Vertex, + real_t* RESTRICT _data_uxEdge, + real_t* RESTRICT _data_uxVertex, + real_t* RESTRICT _data_uyEdge, + real_t* RESTRICT _data_uyVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; + + std::shared_ptr< P2Function< real_t > > invDiag_; + P2Function< real_t > cp_times_delta; + P2Function< real_t > ux; + P2Function< real_t > uy; +}; + +} // namespace operatorgeneration + +} // namespace hyteg diff --git a/operators/supg_advection/P2ElementwiseSupgAdvectionAnnulusMap.cpp b/operators/supg_advection/P2ElementwiseSupgAdvectionAnnulusMap.cpp new file mode 100644 index 0000000000000000000000000000000000000000..486b863cd2fbf17de6a5d00f214a5c268ecb5bed --- /dev/null +++ b/operators/supg_advection/P2ElementwiseSupgAdvectionAnnulusMap.cpp @@ -0,0 +1,454 @@ +/* +* Copyright (c) 2017-2024 Nils Kohl, Daniel Bauer, Fabian Böhm. +* +* This file is part of HyTeG +* (see https://i10git.cs.fau.de/hyteg/hyteg). +* +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. +* +* You should have received a copy of the GNU General Public License +* along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +/* +* The entire file was generated with the HyTeG Operator Generator. +* +* Avoid modifying this file. If buggy, consider fixing the generator itself. +*/ + +// Unfortunately, the inverse diagonal kernel wrapper triggers a GCC bug (maybe +// (related to) https://gcc.gnu.org/bugzilla/show_bug.cgi?id=107087) causing a +// warning in an internal standard library header (bits/stl_algobase.h). As a +// workaround, we disable the warning and include this header indirectly through +// a public header. +#include <waLBerlaDefinitions.h> +#ifdef WALBERLA_CXX_COMPILER_IS_GNU +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wnonnull" +#endif +#include <cmath> +#ifdef WALBERLA_CXX_COMPILER_IS_GNU +#pragma GCC diagnostic pop +#endif + +#include "P2ElementwiseSupgAdvectionAnnulusMap.hpp" + +#define FUNC_PREFIX + +namespace hyteg { + +namespace operatorgeneration { + +P2ElementwiseSupgAdvectionAnnulusMap::P2ElementwiseSupgAdvectionAnnulusMap( const std::shared_ptr< PrimitiveStorage >& storage, + size_t minLevel, + size_t maxLevel, + const P2Function< real_t >& _cp_times_delta, + const P2Function< real_t >& _ux, + const P2Function< real_t >& _uy ) +: Operator( storage, minLevel, maxLevel ) +, cp_times_delta( _cp_times_delta ) +, ux( _ux ) +, uy( _uy ) +{} + +void P2ElementwiseSupgAdvectionAnnulusMap::apply( const P2Function< real_t >& src, + const P2Function< real_t >& dst, + uint_t level, + DoFType flag, + UpdateType updateType ) const +{ + this->startTiming( "apply" ); + + // Make sure that halos are up-to-date + this->timingTree_->start( "pre-communication" ); + if ( this->storage_->hasGlobalCells() ) + { + WALBERLA_ABORT( "Not implemented." ); + } + else + { + communication::syncFunctionBetweenPrimitives( src, level, communication::syncDirection_t::LOW2HIGH ); + communication::syncFunctionBetweenPrimitives( cp_times_delta, level, communication::syncDirection_t::LOW2HIGH ); + communication::syncFunctionBetweenPrimitives( ux, level, communication::syncDirection_t::LOW2HIGH ); + communication::syncFunctionBetweenPrimitives( uy, level, communication::syncDirection_t::LOW2HIGH ); + } + this->timingTree_->stop( "pre-communication" ); + + if ( updateType == Replace ) + { + // We need to zero the destination array (including halos). + // However, we must not zero out anything that is not flagged with the specified BCs. + // Therefore, we first zero out everything that flagged, and then, later, + // the halos of the highest dim primitives. + dst.interpolate( walberla::numeric_cast< real_t >( 0 ), level, flag ); + } + + if ( storage_->hasGlobalCells() ) + { + WALBERLA_ABORT( "Not implemented." ); + } + else + { + for ( auto& it : storage_->getFaces() ) + { + Face& face = *it.second; + + // get hold of the actual numerical data in the functions + real_t* _data_srcVertex = face.getData( src.getVertexDoFFunction().getFaceDataID() )->getPointer( level ); + real_t* _data_srcEdge = face.getData( src.getEdgeDoFFunction().getFaceDataID() )->getPointer( level ); + real_t* _data_dstVertex = face.getData( dst.getVertexDoFFunction().getFaceDataID() )->getPointer( level ); + real_t* _data_dstEdge = face.getData( dst.getEdgeDoFFunction().getFaceDataID() )->getPointer( level ); + real_t* _data_cp_times_deltaVertex = + face.getData( cp_times_delta.getVertexDoFFunction().getFaceDataID() )->getPointer( level ); + real_t* _data_cp_times_deltaEdge = + face.getData( cp_times_delta.getEdgeDoFFunction().getFaceDataID() )->getPointer( level ); + real_t* _data_uxVertex = face.getData( ux.getVertexDoFFunction().getFaceDataID() )->getPointer( level ); + real_t* _data_uxEdge = face.getData( ux.getEdgeDoFFunction().getFaceDataID() )->getPointer( level ); + real_t* _data_uyVertex = face.getData( uy.getVertexDoFFunction().getFaceDataID() )->getPointer( level ); + real_t* _data_uyEdge = face.getData( uy.getEdgeDoFFunction().getFaceDataID() )->getPointer( level ); + + // Zero out dst halos only + // + // This is also necessary when using update type == Add. + // During additive comm we then skip zeroing the data on the lower-dim primitives. + for ( const auto& idx : vertexdof::macroface::Iterator( level ) ) + { + if ( vertexdof::macroface::isVertexOnBoundary( level, idx ) ) + { + auto arrayIdx = vertexdof::macroface::index( level, idx.x(), idx.y() ); + _data_dstVertex[arrayIdx] = walberla::numeric_cast< real_t >( 0 ); + } + } + for ( const auto& idx : edgedof::macroface::Iterator( level ) ) + { + for ( const auto& orientation : edgedof::faceLocalEdgeDoFOrientations ) + { + if ( !edgedof::macroface::isInnerEdgeDoF( level, idx, orientation ) ) + { + auto arrayIdx = edgedof::macroface::index( level, idx.x(), idx.y(), orientation ); + _data_dstEdge[arrayIdx] = walberla::numeric_cast< real_t >( 0 ); + } + } + } + + const auto micro_edges_per_macro_edge = (int64_t) levelinfo::num_microedges_per_edge( level ); + const auto micro_edges_per_macro_edge_float = (real_t) levelinfo::num_microedges_per_edge( level ); + const real_t macro_vertex_coord_id_0comp0 = (real_t) face.getCoordinates()[0][0]; + const real_t macro_vertex_coord_id_0comp1 = (real_t) face.getCoordinates()[0][1]; + const real_t macro_vertex_coord_id_1comp0 = (real_t) face.getCoordinates()[1][0]; + const real_t macro_vertex_coord_id_1comp1 = (real_t) face.getCoordinates()[1][1]; + const real_t macro_vertex_coord_id_2comp0 = (real_t) face.getCoordinates()[2][0]; + const real_t macro_vertex_coord_id_2comp1 = (real_t) face.getCoordinates()[2][1]; + WALBERLA_CHECK_NOT_NULLPTR( + std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() ), + "This operator requires the AnnulusMap to be registered as GeometryMap on every macro-cell." ) + real_t radRefVertex = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->radRefVertex(); + real_t radRayVertex = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->radRayVertex(); + real_t refVertex_0 = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->refVertex()[0]; + real_t rayVertex_0 = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->rayVertex()[0]; + real_t thrVertex_0 = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->thrVertex()[0]; + real_t refVertex_1 = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->refVertex()[1]; + real_t rayVertex_1 = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->rayVertex()[1]; + real_t thrVertex_1 = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->thrVertex()[1]; + + this->timingTree_->start( "kernel" ); + + apply_P2ElementwiseSupgAdvectionAnnulusMap_macro_2D( + + _data_cp_times_deltaEdge, + _data_cp_times_deltaVertex, + _data_dstEdge, + _data_dstVertex, + _data_srcEdge, + _data_srcVertex, + _data_uxEdge, + _data_uxVertex, + _data_uyEdge, + _data_uyVertex, + macro_vertex_coord_id_0comp0, + macro_vertex_coord_id_0comp1, + macro_vertex_coord_id_1comp0, + macro_vertex_coord_id_1comp1, + macro_vertex_coord_id_2comp0, + macro_vertex_coord_id_2comp1, + micro_edges_per_macro_edge, + micro_edges_per_macro_edge_float, + radRayVertex, + radRefVertex, + rayVertex_0, + rayVertex_1, + refVertex_0, + refVertex_1, + thrVertex_0, + thrVertex_1 ); + + this->timingTree_->stop( "kernel" ); + } + + // Push result to lower-dimensional primitives + // + this->timingTree_->start( "post-communication" ); + // Note: We could avoid communication here by implementing the apply() also for the respective + // lower dimensional primitives! + dst.getVertexDoFFunction().communicateAdditively< Face, Edge >( + level, DoFType::All ^ flag, *storage_, updateType == Replace ); + dst.getVertexDoFFunction().communicateAdditively< Face, Vertex >( + level, DoFType::All ^ flag, *storage_, updateType == Replace ); + dst.getEdgeDoFFunction().communicateAdditively< Face, Edge >( + level, DoFType::All ^ flag, *storage_, updateType == Replace ); + this->timingTree_->stop( "post-communication" ); + } + + this->stopTiming( "apply" ); +} +void P2ElementwiseSupgAdvectionAnnulusMap::toMatrix( const std::shared_ptr< SparseMatrixProxy >& mat, + const P2Function< idx_t >& src, + const P2Function< idx_t >& dst, + uint_t level, + DoFType flag ) const +{ + this->startTiming( "toMatrix" ); + + // We currently ignore the flag provided! + if ( flag != All ) + { + WALBERLA_LOG_WARNING_ON_ROOT( "Input flag ignored in toMatrix; using flag = All" ); + } + + if ( storage_->hasGlobalCells() ) + { + this->timingTree_->start( "pre-communication" ); + cp_times_delta.communicate< Face, Cell >( level ); + cp_times_delta.communicate< Edge, Cell >( level ); + cp_times_delta.communicate< Vertex, Cell >( level ); + ux.communicate< Face, Cell >( level ); + ux.communicate< Edge, Cell >( level ); + ux.communicate< Vertex, Cell >( level ); + uy.communicate< Face, Cell >( level ); + uy.communicate< Edge, Cell >( level ); + uy.communicate< Vertex, Cell >( level ); + this->timingTree_->stop( "pre-communication" ); + + WALBERLA_ABORT( "Not implemented." ); + } + else + { + this->timingTree_->start( "pre-communication" ); + communication::syncFunctionBetweenPrimitives( cp_times_delta, level, communication::syncDirection_t::LOW2HIGH ); + communication::syncFunctionBetweenPrimitives( ux, level, communication::syncDirection_t::LOW2HIGH ); + communication::syncFunctionBetweenPrimitives( uy, level, communication::syncDirection_t::LOW2HIGH ); + this->timingTree_->stop( "pre-communication" ); + + for ( auto& it : storage_->getFaces() ) + { + Face& face = *it.second; + + // get hold of the actual numerical data + idx_t* _data_srcVertex = face.getData( src.getVertexDoFFunction().getFaceDataID() )->getPointer( level ); + idx_t* _data_srcEdge = face.getData( src.getEdgeDoFFunction().getFaceDataID() )->getPointer( level ); + idx_t* _data_dstVertex = face.getData( dst.getVertexDoFFunction().getFaceDataID() )->getPointer( level ); + idx_t* _data_dstEdge = face.getData( dst.getEdgeDoFFunction().getFaceDataID() )->getPointer( level ); + real_t* _data_cp_times_deltaVertex = + face.getData( cp_times_delta.getVertexDoFFunction().getFaceDataID() )->getPointer( level ); + real_t* _data_cp_times_deltaEdge = + face.getData( cp_times_delta.getEdgeDoFFunction().getFaceDataID() )->getPointer( level ); + real_t* _data_uxVertex = face.getData( ux.getVertexDoFFunction().getFaceDataID() )->getPointer( level ); + real_t* _data_uxEdge = face.getData( ux.getEdgeDoFFunction().getFaceDataID() )->getPointer( level ); + real_t* _data_uyVertex = face.getData( uy.getVertexDoFFunction().getFaceDataID() )->getPointer( level ); + real_t* _data_uyEdge = face.getData( uy.getEdgeDoFFunction().getFaceDataID() )->getPointer( level ); + + const auto micro_edges_per_macro_edge = (int64_t) levelinfo::num_microedges_per_edge( level ); + const auto micro_edges_per_macro_edge_float = (real_t) levelinfo::num_microedges_per_edge( level ); + const real_t macro_vertex_coord_id_0comp0 = (real_t) face.getCoordinates()[0][0]; + const real_t macro_vertex_coord_id_0comp1 = (real_t) face.getCoordinates()[0][1]; + const real_t macro_vertex_coord_id_1comp0 = (real_t) face.getCoordinates()[1][0]; + const real_t macro_vertex_coord_id_1comp1 = (real_t) face.getCoordinates()[1][1]; + const real_t macro_vertex_coord_id_2comp0 = (real_t) face.getCoordinates()[2][0]; + const real_t macro_vertex_coord_id_2comp1 = (real_t) face.getCoordinates()[2][1]; + WALBERLA_CHECK_NOT_NULLPTR( + std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() ), + "This operator requires the AnnulusMap to be registered as GeometryMap on every macro-cell." ) + real_t radRefVertex = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->radRefVertex(); + real_t radRayVertex = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->radRayVertex(); + real_t refVertex_0 = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->refVertex()[0]; + real_t rayVertex_0 = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->rayVertex()[0]; + real_t thrVertex_0 = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->thrVertex()[0]; + real_t refVertex_1 = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->refVertex()[1]; + real_t rayVertex_1 = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->rayVertex()[1]; + real_t thrVertex_1 = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->thrVertex()[1]; + + this->timingTree_->start( "kernel" ); + + toMatrix_P2ElementwiseSupgAdvectionAnnulusMap_macro_2D( + + _data_cp_times_deltaEdge, + _data_cp_times_deltaVertex, + _data_dstEdge, + _data_dstVertex, + _data_srcEdge, + _data_srcVertex, + _data_uxEdge, + _data_uxVertex, + _data_uyEdge, + _data_uyVertex, + macro_vertex_coord_id_0comp0, + macro_vertex_coord_id_0comp1, + macro_vertex_coord_id_1comp0, + macro_vertex_coord_id_1comp1, + macro_vertex_coord_id_2comp0, + macro_vertex_coord_id_2comp1, + mat, + micro_edges_per_macro_edge, + micro_edges_per_macro_edge_float, + radRayVertex, + radRefVertex, + rayVertex_0, + rayVertex_1, + refVertex_0, + refVertex_1, + thrVertex_0, + thrVertex_1 ); + + this->timingTree_->stop( "kernel" ); + } + } + this->stopTiming( "toMatrix" ); +} +void P2ElementwiseSupgAdvectionAnnulusMap::computeInverseDiagonalOperatorValues() +{ + this->startTiming( "computeInverseDiagonalOperatorValues" ); + + if ( invDiag_ == nullptr ) + { + invDiag_ = std::make_shared< P2Function< real_t > >( "inverse diagonal entries", storage_, minLevel_, maxLevel_ ); + } + + for ( uint_t level = minLevel_; level <= maxLevel_; level++ ) + { + invDiag_->setToZero( level ); + + if ( storage_->hasGlobalCells() ) + { + this->timingTree_->start( "pre-communication" ); + cp_times_delta.communicate< Face, Cell >( level ); + cp_times_delta.communicate< Edge, Cell >( level ); + cp_times_delta.communicate< Vertex, Cell >( level ); + ux.communicate< Face, Cell >( level ); + ux.communicate< Edge, Cell >( level ); + ux.communicate< Vertex, Cell >( level ); + uy.communicate< Face, Cell >( level ); + uy.communicate< Edge, Cell >( level ); + uy.communicate< Vertex, Cell >( level ); + this->timingTree_->stop( "pre-communication" ); + + WALBERLA_ABORT( "Not implemented." ); + ( *invDiag_ ).invertElementwise( level ); + } + else + { + this->timingTree_->start( "pre-communication" ); + communication::syncFunctionBetweenPrimitives( cp_times_delta, level, communication::syncDirection_t::LOW2HIGH ); + communication::syncFunctionBetweenPrimitives( ux, level, communication::syncDirection_t::LOW2HIGH ); + communication::syncFunctionBetweenPrimitives( uy, level, communication::syncDirection_t::LOW2HIGH ); + this->timingTree_->stop( "pre-communication" ); + + for ( auto& it : storage_->getFaces() ) + { + Face& face = *it.second; + + // get hold of the actual numerical data + real_t* _data_invDiag_Vertex = + face.getData( ( *invDiag_ ).getVertexDoFFunction().getFaceDataID() )->getPointer( level ); + real_t* _data_invDiag_Edge = face.getData( ( *invDiag_ ).getEdgeDoFFunction().getFaceDataID() )->getPointer( level ); + real_t* _data_cp_times_deltaVertex = + face.getData( cp_times_delta.getVertexDoFFunction().getFaceDataID() )->getPointer( level ); + real_t* _data_cp_times_deltaEdge = + face.getData( cp_times_delta.getEdgeDoFFunction().getFaceDataID() )->getPointer( level ); + real_t* _data_uxVertex = face.getData( ux.getVertexDoFFunction().getFaceDataID() )->getPointer( level ); + real_t* _data_uxEdge = face.getData( ux.getEdgeDoFFunction().getFaceDataID() )->getPointer( level ); + real_t* _data_uyVertex = face.getData( uy.getVertexDoFFunction().getFaceDataID() )->getPointer( level ); + real_t* _data_uyEdge = face.getData( uy.getEdgeDoFFunction().getFaceDataID() )->getPointer( level ); + + const auto micro_edges_per_macro_edge = (int64_t) levelinfo::num_microedges_per_edge( level ); + const auto micro_edges_per_macro_edge_float = (real_t) levelinfo::num_microedges_per_edge( level ); + const real_t macro_vertex_coord_id_0comp0 = (real_t) face.getCoordinates()[0][0]; + const real_t macro_vertex_coord_id_0comp1 = (real_t) face.getCoordinates()[0][1]; + const real_t macro_vertex_coord_id_1comp0 = (real_t) face.getCoordinates()[1][0]; + const real_t macro_vertex_coord_id_1comp1 = (real_t) face.getCoordinates()[1][1]; + const real_t macro_vertex_coord_id_2comp0 = (real_t) face.getCoordinates()[2][0]; + const real_t macro_vertex_coord_id_2comp1 = (real_t) face.getCoordinates()[2][1]; + WALBERLA_CHECK_NOT_NULLPTR( + std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() ), + "This operator requires the AnnulusMap to be registered as GeometryMap on every macro-cell." ) + real_t radRefVertex = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->radRefVertex(); + real_t radRayVertex = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->radRayVertex(); + real_t refVertex_0 = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->refVertex()[0]; + real_t rayVertex_0 = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->rayVertex()[0]; + real_t thrVertex_0 = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->thrVertex()[0]; + real_t refVertex_1 = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->refVertex()[1]; + real_t rayVertex_1 = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->rayVertex()[1]; + real_t thrVertex_1 = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->thrVertex()[1]; + + this->timingTree_->start( "kernel" ); + + computeInverseDiagonalOperatorValues_P2ElementwiseSupgAdvectionAnnulusMap_macro_2D( + + _data_cp_times_deltaEdge, + _data_cp_times_deltaVertex, + _data_invDiag_Edge, + _data_invDiag_Vertex, + _data_uxEdge, + _data_uxVertex, + _data_uyEdge, + _data_uyVertex, + macro_vertex_coord_id_0comp0, + macro_vertex_coord_id_0comp1, + macro_vertex_coord_id_1comp0, + macro_vertex_coord_id_1comp1, + macro_vertex_coord_id_2comp0, + macro_vertex_coord_id_2comp1, + micro_edges_per_macro_edge, + micro_edges_per_macro_edge_float, + radRayVertex, + radRefVertex, + rayVertex_0, + rayVertex_1, + refVertex_0, + refVertex_1, + thrVertex_0, + thrVertex_1 ); + + this->timingTree_->stop( "kernel" ); + } + + // Push result to lower-dimensional primitives + // + this->timingTree_->start( "post-communication" ); + // Note: We could avoid communication here by implementing the apply() also for the respective + // lower dimensional primitives! + ( *invDiag_ ).getVertexDoFFunction().communicateAdditively< Face, Edge >( level ); + ( *invDiag_ ).getVertexDoFFunction().communicateAdditively< Face, Vertex >( level ); + ( *invDiag_ ).getEdgeDoFFunction().communicateAdditively< Face, Edge >( level ); + this->timingTree_->stop( "post-communication" ); + ( *invDiag_ ).invertElementwise( level ); + } + } + + this->stopTiming( "computeInverseDiagonalOperatorValues" ); +} +std::shared_ptr< P2Function< real_t > > P2ElementwiseSupgAdvectionAnnulusMap::getInverseDiagonalValues() const +{ + return invDiag_; +} + +} // namespace operatorgeneration + +} // namespace hyteg diff --git a/operators/supg_advection/P2ElementwiseSupgAdvectionAnnulusMap.hpp b/operators/supg_advection/P2ElementwiseSupgAdvectionAnnulusMap.hpp new file mode 100644 index 0000000000000000000000000000000000000000..0f012333e253ef9799d976b344b3dcc8775f4737 --- /dev/null +++ b/operators/supg_advection/P2ElementwiseSupgAdvectionAnnulusMap.hpp @@ -0,0 +1,209 @@ +/* +* Copyright (c) 2017-2024 Nils Kohl, Daniel Bauer, Fabian Böhm. +* +* This file is part of HyTeG +* (see https://i10git.cs.fau.de/hyteg/hyteg). +* +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. +* +* You should have received a copy of the GNU General Public License +* along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +/* +* The entire file was generated with the HyTeG Operator Generator. +* +* Avoid modifying this file. If buggy, consider fixing the generator itself. +*/ + +#pragma once + +#include "core/DataTypes.h" + +#include "hyteg/LikwidWrapper.hpp" +#include "hyteg/boundary/BoundaryConditions.hpp" +#include "hyteg/communication/Syncing.hpp" +#include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp" +#include "hyteg/geometry/AnnulusMap.hpp" +#include "hyteg/operators/Operator.hpp" +#include "hyteg/p2functionspace/P2Function.hpp" +#include "hyteg/primitivestorage/PrimitiveStorage.hpp" +#include "hyteg/solvers/Smoothables.hpp" +#include "hyteg/sparseassembly/SparseMatrixProxy.hpp" +#include "hyteg/types/types.hpp" + +#define FUNC_PREFIX + +namespace hyteg { + +namespace operatorgeneration { + +/// advection operator which needs to be used in combination with SUPG +/// +/// Geometry map: AnnulusMap +/// +/// Weak formulation +/// +/// T: trial function (scalar space: Lagrange, degree: 2) +/// s: test function (scalar space: Lagrange, degree: 2) +/// u: velocity function (vectorial space: Lagrange, degree: 2) +/// +/// ∫ cp ( u · ∇T ) 𝛿(u · ∇s) + +class P2ElementwiseSupgAdvectionAnnulusMap : public Operator< P2Function< real_t >, P2Function< real_t > >, + public OperatorWithInverseDiagonal< P2Function< real_t > > +{ + public: + P2ElementwiseSupgAdvectionAnnulusMap( const std::shared_ptr< PrimitiveStorage >& storage, + size_t minLevel, + size_t maxLevel, + const P2Function< real_t >& _cp_times_delta, + const P2Function< real_t >& _ux, + const P2Function< real_t >& _uy ); + + void apply( const P2Function< real_t >& src, + const P2Function< real_t >& dst, + uint_t level, + DoFType flag, + UpdateType updateType = Replace ) const; + + void toMatrix( const std::shared_ptr< SparseMatrixProxy >& mat, + const P2Function< idx_t >& src, + const P2Function< idx_t >& dst, + uint_t level, + DoFType flag ) const; + + void computeInverseDiagonalOperatorValues(); + + std::shared_ptr< P2Function< real_t > > getInverseDiagonalValues() const; + + protected: + private: + /// Integral: P2ElementwiseSupgAdvectionAnnulusMap + /// - volume element: triangle, dim: 2, vertices: 3, spacedim: 2 + /// - kernel type: apply + /// - loop strategy: SAWTOOTH + /// - quadrature rule: Dunavant 3 | points: 4, degree: 3 + /// - blending map: AnnulusMap + /// - operations per element: + /// adds muls divs pows abs assignments function_calls unknown_ops + /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- + /// 640 852 20 12 0 0 0 1 + void apply_P2ElementwiseSupgAdvectionAnnulusMap_macro_2D( real_t* RESTRICT _data_cp_times_deltaEdge, + real_t* RESTRICT _data_cp_times_deltaVertex, + real_t* RESTRICT _data_dstEdge, + real_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_srcEdge, + real_t* RESTRICT _data_srcVertex, + real_t* RESTRICT _data_uxEdge, + real_t* RESTRICT _data_uxVertex, + real_t* RESTRICT _data_uyEdge, + real_t* RESTRICT _data_uyVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t refVertex_0, + real_t refVertex_1, + real_t thrVertex_0, + real_t thrVertex_1 ) const; + + /// Integral: P2ElementwiseSupgAdvectionAnnulusMap + /// - volume element: triangle, dim: 2, vertices: 3, spacedim: 2 + /// - kernel type: toMatrix + /// - loop strategy: SAWTOOTH + /// - quadrature rule: Dunavant 3 | points: 4, degree: 3 + /// - blending map: AnnulusMap + /// - operations per element: + /// adds muls divs pows abs assignments function_calls unknown_ops + /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- + /// 604 816 20 12 0 0 0 4 + void toMatrix_P2ElementwiseSupgAdvectionAnnulusMap_macro_2D( real_t* RESTRICT _data_cp_times_deltaEdge, + real_t* RESTRICT _data_cp_times_deltaVertex, + idx_t* RESTRICT _data_dstEdge, + idx_t* RESTRICT _data_dstVertex, + idx_t* RESTRICT _data_srcEdge, + idx_t* RESTRICT _data_srcVertex, + real_t* RESTRICT _data_uxEdge, + real_t* RESTRICT _data_uxVertex, + real_t* RESTRICT _data_uyEdge, + real_t* RESTRICT _data_uyVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + std::shared_ptr< SparseMatrixProxy > mat, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t refVertex_0, + real_t refVertex_1, + real_t thrVertex_0, + real_t thrVertex_1 ) const; + + /// Integral: P2ElementwiseSupgAdvectionAnnulusMap + /// - volume element: triangle, dim: 2, vertices: 3, spacedim: 2 + /// - kernel type: computeInverseDiagonalOperatorValues + /// - loop strategy: SAWTOOTH + /// - quadrature rule: Dunavant 3 | points: 4, degree: 3 + /// - blending map: AnnulusMap + /// - operations per element: + /// adds muls divs pows abs assignments function_calls unknown_ops + /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- + /// 490 736 20 12 0 0 0 1 + void computeInverseDiagonalOperatorValues_P2ElementwiseSupgAdvectionAnnulusMap_macro_2D( + real_t* RESTRICT _data_cp_times_deltaEdge, + real_t* RESTRICT _data_cp_times_deltaVertex, + real_t* RESTRICT _data_invDiag_Edge, + real_t* RESTRICT _data_invDiag_Vertex, + real_t* RESTRICT _data_uxEdge, + real_t* RESTRICT _data_uxVertex, + real_t* RESTRICT _data_uyEdge, + real_t* RESTRICT _data_uyVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t refVertex_0, + real_t refVertex_1, + real_t thrVertex_0, + real_t thrVertex_1 ) const; + + std::shared_ptr< P2Function< real_t > > invDiag_; + P2Function< real_t > cp_times_delta; + P2Function< real_t > ux; + P2Function< real_t > uy; +}; + +} // namespace operatorgeneration + +} // namespace hyteg diff --git a/operators/supg_advection/avx/P2ElementwiseSupgAdvectionAnnulusMap_apply_P2ElementwiseSupgAdvectionAnnulusMap_macro_2D.cpp b/operators/supg_advection/avx/P2ElementwiseSupgAdvectionAnnulusMap_apply_P2ElementwiseSupgAdvectionAnnulusMap_macro_2D.cpp new file mode 100644 index 0000000000000000000000000000000000000000..12f74c85da1765f5e0ee8a94c21da89320ae9651 --- /dev/null +++ b/operators/supg_advection/avx/P2ElementwiseSupgAdvectionAnnulusMap_apply_P2ElementwiseSupgAdvectionAnnulusMap_macro_2D.cpp @@ -0,0 +1,1245 @@ +/* +* Copyright (c) 2017-2024 Nils Kohl, Daniel Bauer, Fabian Böhm. +* +* This file is part of HyTeG +* (see https://i10git.cs.fau.de/hyteg/hyteg). +* +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. +* +* You should have received a copy of the GNU General Public License +* along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +/* +* The entire file was generated with the HyTeG Operator Generator. +* +* Avoid modifying this file. If buggy, consider fixing the generator itself. +*/ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +#include "../P2ElementwiseSupgAdvectionAnnulusMap.hpp" + +#define FUNC_PREFIX + +namespace hyteg { + +namespace operatorgeneration { + +void P2ElementwiseSupgAdvectionAnnulusMap::apply_P2ElementwiseSupgAdvectionAnnulusMap_macro_2D( real_t * RESTRICT _data_cp_times_deltaEdge, real_t * RESTRICT _data_cp_times_deltaVertex, real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t * RESTRICT _data_uxEdge, real_t * RESTRICT _data_uxVertex, real_t * RESTRICT _data_uyEdge, real_t * RESTRICT _data_uyVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const +{ + { + const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669}; + + const real_t _data_q_p_0 [] = {0.33333333333333331, 0.20000000000000001, 0.59999999999999998, 0.20000000000000001}; + + const real_t _data_q_p_1 [] = {0.33333333333333331, 0.59999999999999998, 0.20000000000000001, 0.20000000000000001}; + + const real_t tmp_coords_jac_0_GRAY = 1.0 / (micro_edges_per_macro_edge_float)*1.0; + const real_t p_affine_const_0_0_GRAY = macro_vertex_coord_id_0comp0; + const real_t p_affine_const_0_1_GRAY = macro_vertex_coord_id_0comp1; + const real_t p_affine_const_1_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0); + const real_t p_affine_const_1_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1); + const real_t p_affine_const_2_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0); + const real_t p_affine_const_2_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1); + const real_t jac_affine_0_0_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_1_0_GRAY; + const real_t jac_affine_0_1_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_2_0_GRAY; + const real_t jac_affine_1_0_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_1_1_GRAY; + const real_t jac_affine_1_1_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_2_1_GRAY; + const real_t tmp_coords_jac_1_GRAY = jac_affine_0_0_GRAY*jac_affine_1_1_GRAY - jac_affine_0_1_GRAY*jac_affine_1_0_GRAY; + const real_t tmp_coords_jac_2_GRAY = 1.0 / (tmp_coords_jac_1_GRAY); + const real_t jac_affine_inv_0_0_GRAY = jac_affine_1_1_GRAY*tmp_coords_jac_2_GRAY; + const real_t jac_affine_inv_0_1_GRAY = -jac_affine_0_1_GRAY*tmp_coords_jac_2_GRAY; + const real_t jac_affine_inv_1_0_GRAY = -jac_affine_1_0_GRAY*tmp_coords_jac_2_GRAY; + const real_t jac_affine_inv_1_1_GRAY = jac_affine_0_0_GRAY*tmp_coords_jac_2_GRAY; + const real_t abs_det_jac_affine_GRAY = abs(tmp_coords_jac_1_GRAY); + const real_t tmp_qloop_1 = -rayVertex_1 + thrVertex_1; + const real_t tmp_qloop_7 = rayVertex_0 - thrVertex_0; + const real_t tmp_qloop_8 = -tmp_qloop_7; + const real_t tmp_qloop_9 = 1.0 / (tmp_qloop_1*(-rayVertex_0 + refVertex_0) - tmp_qloop_8*(-rayVertex_1 + refVertex_1)); + const real_t tmp_qloop_10 = -radRayVertex + radRefVertex; + const real_t tmp_qloop_11 = tmp_qloop_10*tmp_qloop_9; + const real_t tmp_qloop_12 = tmp_qloop_11*1.0; + { + /* FaceType.GRAY */ + for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1) + { + for (int64_t ctr_0 = 0; ctr_0 < (int64_t)((-ctr_1 + micro_edges_per_macro_edge) / (4)) * (4); ctr_0 += 4) + { + + const int64_t phantom_ctr_0 = ctr_0; + real_t _data_float_loop_ctr_array_dim_0[4]; + _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0; + _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1; + _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2; + _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3; + real_t _data_float_loop_ctr_array_dim_1[4]; + _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1; + + const __m256d p_affine_0_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)); + const __m256d p_affine_0_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)); + const __m256d p_affine_1_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)); + const __m256d p_affine_1_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)); + const __m256d p_affine_2_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)); + const __m256d p_affine_2_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)); + const __m256d src_dof_0 = _mm256_loadu_pd(& _data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]); + const __m256d src_dof_1 = _mm256_loadu_pd(& _data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]); + const __m256d src_dof_2 = _mm256_loadu_pd(& _data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]); + const __m256d src_dof_3 = _mm256_loadu_pd(& _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]); + const __m256d src_dof_4 = _mm256_loadu_pd(& _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]); + const __m256d src_dof_5 = _mm256_loadu_pd(& _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]); + const __m256d cp_times_delta_dof_0 = _mm256_loadu_pd(& _data_cp_times_deltaVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]); + const __m256d cp_times_delta_dof_1 = _mm256_loadu_pd(& _data_cp_times_deltaVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]); + const __m256d cp_times_delta_dof_2 = _mm256_loadu_pd(& _data_cp_times_deltaVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]); + const __m256d cp_times_delta_dof_3 = _mm256_loadu_pd(& _data_cp_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]); + const __m256d cp_times_delta_dof_4 = _mm256_loadu_pd(& _data_cp_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]); + const __m256d cp_times_delta_dof_5 = _mm256_loadu_pd(& _data_cp_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]); + const __m256d ux_dof_0 = _mm256_loadu_pd(& _data_uxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]); + const __m256d ux_dof_1 = _mm256_loadu_pd(& _data_uxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]); + const __m256d ux_dof_2 = _mm256_loadu_pd(& _data_uxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]); + const __m256d ux_dof_3 = _mm256_loadu_pd(& _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]); + const __m256d ux_dof_4 = _mm256_loadu_pd(& _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]); + const __m256d ux_dof_5 = _mm256_loadu_pd(& _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]); + const __m256d uy_dof_0 = _mm256_loadu_pd(& _data_uyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]); + const __m256d uy_dof_1 = _mm256_loadu_pd(& _data_uyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]); + const __m256d uy_dof_2 = _mm256_loadu_pd(& _data_uyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]); + const __m256d uy_dof_3 = _mm256_loadu_pd(& _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]); + const __m256d uy_dof_4 = _mm256_loadu_pd(& _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]); + const __m256d uy_dof_5 = _mm256_loadu_pd(& _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]); + __m256d q_acc_0_0 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_1 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_2 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_3 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_4 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_5 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_0 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_1 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_2 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_3 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_4 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_5 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_0 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_1 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_2 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_3 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_4 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_5 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_0 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_1 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_2 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_3 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_4 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_5 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_4_0 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_4_1 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_4_2 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_4_3 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_4_4 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_4_5 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_5_0 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_5_1 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_5_2 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_5_3 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_5_4 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_5_5 = _mm256_set_pd(0.0,0.0,0.0,0.0); + for (int64_t q = 0; q < 4; q += 1) + { + const __m256d tmp_qloop_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_1_0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_2_0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),p_affine_0_0); + const __m256d tmp_qloop_2 = _mm256_mul_pd(tmp_qloop_0,tmp_qloop_0); + const __m256d tmp_qloop_3 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_1_1),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_2_1),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),p_affine_0_1); + const __m256d tmp_qloop_4 = _mm256_mul_pd(tmp_qloop_3,tmp_qloop_3); + const __m256d tmp_qloop_5 = _mm256_add_pd(tmp_qloop_2,tmp_qloop_4); + const __m256d tmp_qloop_6 = _mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_sqrt_pd(tmp_qloop_5)); + const __m256d tmp_qloop_13 = _mm256_mul_pd(tmp_qloop_6,_mm256_set_pd(tmp_qloop_12,tmp_qloop_12,tmp_qloop_12,tmp_qloop_12)); + const __m256d tmp_qloop_14 = _mm256_mul_pd(tmp_qloop_13,_mm256_set_pd(tmp_qloop_1,tmp_qloop_1,tmp_qloop_1,tmp_qloop_1)); + const __m256d tmp_qloop_15 = _mm256_div_pd(_mm256_sqrt_pd(tmp_qloop_5),_mm256_mul_pd(tmp_qloop_5,tmp_qloop_5)); + const __m256d tmp_qloop_16 = _mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(rayVertex_0,rayVertex_0,rayVertex_0,rayVertex_0)),tmp_qloop_0),_mm256_set_pd(tmp_qloop_1,tmp_qloop_1,tmp_qloop_1,tmp_qloop_1)),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(rayVertex_1,rayVertex_1,rayVertex_1,rayVertex_1)),tmp_qloop_3),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_8,tmp_qloop_8,tmp_qloop_8,tmp_qloop_8))),_mm256_set_pd(tmp_qloop_11,tmp_qloop_11,tmp_qloop_11,tmp_qloop_11)),_mm256_set_pd(radRayVertex,radRayVertex,radRayVertex,radRayVertex)); + const __m256d tmp_qloop_17 = _mm256_mul_pd(tmp_qloop_15,tmp_qloop_16); + const __m256d tmp_qloop_18 = _mm256_mul_pd(tmp_qloop_17,_mm256_set_pd(1.0,1.0,1.0,1.0)); + const __m256d tmp_qloop_19 = _mm256_mul_pd(tmp_qloop_13,_mm256_set_pd(tmp_qloop_8,tmp_qloop_8,tmp_qloop_8,tmp_qloop_8)); + const __m256d tmp_qloop_20 = _mm256_mul_pd(tmp_qloop_0,tmp_qloop_18); + const __m256d tmp_qloop_23 = _mm256_mul_pd(tmp_qloop_15,_mm256_set_pd(tmp_qloop_12,tmp_qloop_12,tmp_qloop_12,tmp_qloop_12)); + const __m256d tmp_qloop_24 = _mm256_mul_pd(tmp_qloop_23,_mm256_set_pd(tmp_qloop_1,tmp_qloop_1,tmp_qloop_1,tmp_qloop_1)); + const __m256d tmp_qloop_25 = _mm256_mul_pd(tmp_qloop_2,tmp_qloop_24); + const __m256d tmp_qloop_26 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_16,_mm256_div_pd(_mm256_sqrt_pd(tmp_qloop_5),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_5,tmp_qloop_5),tmp_qloop_5))),_mm256_set_pd(3.0,3.0,3.0,3.0)); + const __m256d tmp_qloop_27 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_0,tmp_qloop_26),tmp_qloop_4); + const __m256d tmp_qloop_28 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_27,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_24,tmp_qloop_4)); + const __m256d tmp_qloop_29 = _mm256_mul_pd(tmp_qloop_23,_mm256_set_pd(tmp_qloop_7,tmp_qloop_7,tmp_qloop_7,tmp_qloop_7)); + const __m256d tmp_qloop_30 = _mm256_mul_pd(tmp_qloop_0,tmp_qloop_3); + const __m256d tmp_qloop_31 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_30); + const __m256d tmp_qloop_32 = _mm256_mul_pd(tmp_qloop_17,_mm256_set_pd(2.0,2.0,2.0,2.0)); + const __m256d tmp_qloop_33 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_2,tmp_qloop_26),tmp_qloop_3); + const __m256d tmp_qloop_34 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_33,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_18,tmp_qloop_3)); + const __m256d tmp_qloop_35 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_30); + const __m256d tmp_qloop_36 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_23,tmp_qloop_30),_mm256_set_pd(tmp_qloop_8,tmp_qloop_8,tmp_qloop_8,tmp_qloop_8)); + const __m256d tmp_qloop_37 = _mm256_mul_pd(_mm256_set_pd(4.0,4.0,4.0,4.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])); + const __m256d tmp_qloop_38 = _mm256_mul_pd(_mm256_set_pd(4.0,4.0,4.0,4.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])); + const __m256d tmp_qloop_39 = _mm256_add_pd(_mm256_add_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),tmp_qloop_37),tmp_qloop_38); + const __m256d tmp_qloop_40 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_39,_mm256_set_pd(jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY)),_mm256_mul_pd(tmp_qloop_39,_mm256_set_pd(jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY))); + const __m256d tmp_qloop_41 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_39,_mm256_set_pd(jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY)),_mm256_mul_pd(tmp_qloop_39,_mm256_set_pd(jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY))); + const __m256d tmp_qloop_42 = _mm256_mul_pd(tmp_qloop_37,_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])); + const __m256d tmp_qloop_43 = _mm256_mul_pd(_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])); + const __m256d tmp_qloop_44 = _mm256_mul_pd(tmp_qloop_43,_mm256_set_pd(2.0,2.0,2.0,2.0)); + const __m256d tmp_qloop_45 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),tmp_qloop_44); + const __m256d tmp_qloop_46 = _mm256_mul_pd(_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])); + const __m256d tmp_qloop_47 = _mm256_mul_pd(tmp_qloop_46,_mm256_set_pd(2.0,2.0,2.0,2.0)); + const __m256d tmp_qloop_48 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])),tmp_qloop_47); + const __m256d tmp_qloop_49 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_46,_mm256_set_pd(-4.0,-4.0,-4.0,-4.0)),_mm256_mul_pd(tmp_qloop_42,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_38); + const __m256d tmp_qloop_50 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_43,_mm256_set_pd(-4.0,-4.0,-4.0,-4.0)),_mm256_mul_pd(tmp_qloop_42,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_37); + const __m256d tmp_qloop_51 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),tmp_qloop_42),tmp_qloop_44),tmp_qloop_47); + const __m256d tmp_qloop_52 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_42,ux_dof_3),_mm256_mul_pd(tmp_qloop_45,ux_dof_1)),_mm256_mul_pd(tmp_qloop_48,ux_dof_2)),_mm256_mul_pd(tmp_qloop_49,ux_dof_4)),_mm256_mul_pd(tmp_qloop_50,ux_dof_5)),_mm256_mul_pd(tmp_qloop_51,ux_dof_0)); + const __m256d tmp_qloop_53 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_42,uy_dof_3),_mm256_mul_pd(tmp_qloop_45,uy_dof_1)),_mm256_mul_pd(tmp_qloop_48,uy_dof_2)),_mm256_mul_pd(tmp_qloop_49,uy_dof_4)),_mm256_mul_pd(tmp_qloop_50,uy_dof_5)),_mm256_mul_pd(tmp_qloop_51,uy_dof_0)); + const __m256d tmp_qloop_56 = _mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),tmp_qloop_37); + const __m256d tmp_qloop_57 = _mm256_mul_pd(tmp_qloop_56,_mm256_set_pd(jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY)); + const __m256d tmp_qloop_58 = _mm256_mul_pd(tmp_qloop_56,_mm256_set_pd(jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY)); + const __m256d tmp_qloop_62 = _mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),tmp_qloop_38); + const __m256d tmp_qloop_63 = _mm256_mul_pd(tmp_qloop_62,_mm256_set_pd(jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY)); + const __m256d tmp_qloop_64 = _mm256_mul_pd(tmp_qloop_62,_mm256_set_pd(jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY)); + const __m256d tmp_qloop_67 = _mm256_mul_pd(tmp_qloop_37,_mm256_set_pd(jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY)); + const __m256d tmp_qloop_68 = _mm256_mul_pd(tmp_qloop_38,_mm256_set_pd(jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY)); + const __m256d tmp_qloop_69 = _mm256_add_pd(tmp_qloop_67,tmp_qloop_68); + const __m256d tmp_qloop_70 = _mm256_mul_pd(tmp_qloop_37,_mm256_set_pd(jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY)); + const __m256d tmp_qloop_71 = _mm256_mul_pd(tmp_qloop_38,_mm256_set_pd(jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY)); + const __m256d tmp_qloop_72 = _mm256_add_pd(tmp_qloop_70,tmp_qloop_71); + const __m256d tmp_qloop_75 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-8.0,-8.0,-8.0,-8.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])),_mm256_mul_pd(tmp_qloop_37,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_set_pd(4.0,4.0,4.0,4.0)); + const __m256d tmp_qloop_76 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_68,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_75,_mm256_set_pd(jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY))); + const __m256d tmp_qloop_77 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_71,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_75,_mm256_set_pd(jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY))); + const __m256d tmp_qloop_80 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-8.0,-8.0,-8.0,-8.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(tmp_qloop_38,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_set_pd(4.0,4.0,4.0,4.0)); + const __m256d tmp_qloop_81 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_67,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_80,_mm256_set_pd(jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY))); + const __m256d tmp_qloop_82 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_70,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_80,_mm256_set_pd(jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY))); + const __m256d jac_blending_0_0 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_0,tmp_qloop_14),_mm256_mul_pd(tmp_qloop_18,tmp_qloop_4)); + const __m256d jac_blending_0_1 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_0,tmp_qloop_19),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_0,tmp_qloop_15),tmp_qloop_16),tmp_qloop_3),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))); + const __m256d jac_blending_1_0 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_14,tmp_qloop_3),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_20,tmp_qloop_3),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))); + const __m256d jac_blending_1_1 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_19,tmp_qloop_3),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_15,tmp_qloop_16),tmp_qloop_2),_mm256_set_pd(1.0,1.0,1.0,1.0))); + const __m256d tmp_qloop_21 = _mm256_add_pd(_mm256_mul_pd(jac_blending_0_0,jac_blending_1_1),_mm256_mul_pd(_mm256_mul_pd(jac_blending_0_1,jac_blending_1_0),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))); + const __m256d tmp_qloop_22 = _mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),tmp_qloop_21); + const __m256d abs_det_jac_blending = tmp_qloop_21; + const __m256d tmp_qloop_55 = _mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(abs_det_jac_blending,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(cp_times_delta_dof_0,tmp_qloop_51),_mm256_mul_pd(cp_times_delta_dof_1,tmp_qloop_45)),_mm256_mul_pd(cp_times_delta_dof_2,tmp_qloop_48)),_mm256_mul_pd(cp_times_delta_dof_3,tmp_qloop_42)),_mm256_mul_pd(cp_times_delta_dof_4,tmp_qloop_49)),_mm256_mul_pd(cp_times_delta_dof_5,tmp_qloop_50))),_mm256_set_pd(_data_q_w[q],_data_q_w[q],_data_q_w[q],_data_q_w[q])),_mm256_set_pd(abs_det_jac_affine_GRAY,abs_det_jac_affine_GRAY,abs_det_jac_affine_GRAY,abs_det_jac_affine_GRAY)); + const __m256d jac_blending_inv_0_0 = _mm256_mul_pd(jac_blending_1_1,tmp_qloop_22); + const __m256d jac_blending_inv_0_1 = _mm256_mul_pd(_mm256_mul_pd(jac_blending_0_1,tmp_qloop_22),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)); + const __m256d jac_blending_inv_1_0 = _mm256_mul_pd(_mm256_mul_pd(jac_blending_1_0,tmp_qloop_22),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)); + const __m256d jac_blending_inv_1_1 = _mm256_mul_pd(jac_blending_0_0,tmp_qloop_22); + const __m256d tmp_qloop_54 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_52,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,tmp_qloop_40),_mm256_mul_pd(jac_blending_inv_1_0,tmp_qloop_41))),_mm256_mul_pd(tmp_qloop_53,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,tmp_qloop_40),_mm256_mul_pd(jac_blending_inv_1_1,tmp_qloop_41)))); + const __m256d tmp_qloop_59 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_52,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,tmp_qloop_57),_mm256_mul_pd(jac_blending_inv_1_0,tmp_qloop_58))),_mm256_mul_pd(tmp_qloop_53,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,tmp_qloop_57),_mm256_mul_pd(jac_blending_inv_1_1,tmp_qloop_58)))); + const __m256d tmp_qloop_60 = _mm256_mul_pd(tmp_qloop_54,tmp_qloop_55); + const __m256d tmp_qloop_61 = _mm256_mul_pd(tmp_qloop_59,tmp_qloop_60); + const __m256d tmp_qloop_65 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_52,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,tmp_qloop_63),_mm256_mul_pd(jac_blending_inv_1_0,tmp_qloop_64))),_mm256_mul_pd(tmp_qloop_53,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,tmp_qloop_63),_mm256_mul_pd(jac_blending_inv_1_1,tmp_qloop_64)))); + const __m256d tmp_qloop_66 = _mm256_mul_pd(tmp_qloop_60,tmp_qloop_65); + const __m256d tmp_qloop_73 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_52,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,tmp_qloop_69),_mm256_mul_pd(jac_blending_inv_1_0,tmp_qloop_72))),_mm256_mul_pd(tmp_qloop_53,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,tmp_qloop_69),_mm256_mul_pd(jac_blending_inv_1_1,tmp_qloop_72)))); + const __m256d tmp_qloop_74 = _mm256_mul_pd(tmp_qloop_60,tmp_qloop_73); + const __m256d tmp_qloop_78 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_52,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,tmp_qloop_76),_mm256_mul_pd(jac_blending_inv_1_0,tmp_qloop_77))),_mm256_mul_pd(tmp_qloop_53,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,tmp_qloop_76),_mm256_mul_pd(jac_blending_inv_1_1,tmp_qloop_77)))); + const __m256d tmp_qloop_79 = _mm256_mul_pd(tmp_qloop_60,tmp_qloop_78); + const __m256d tmp_qloop_83 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_52,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,tmp_qloop_81),_mm256_mul_pd(jac_blending_inv_1_0,tmp_qloop_82))),_mm256_mul_pd(tmp_qloop_53,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,tmp_qloop_81),_mm256_mul_pd(jac_blending_inv_1_1,tmp_qloop_82)))); + const __m256d tmp_qloop_84 = _mm256_mul_pd(tmp_qloop_60,tmp_qloop_83); + const __m256d tmp_qloop_85 = _mm256_mul_pd(tmp_qloop_55,tmp_qloop_59); + const __m256d tmp_qloop_86 = _mm256_mul_pd(tmp_qloop_65,tmp_qloop_85); + const __m256d tmp_qloop_87 = _mm256_mul_pd(tmp_qloop_73,tmp_qloop_85); + const __m256d tmp_qloop_88 = _mm256_mul_pd(tmp_qloop_78,tmp_qloop_85); + const __m256d tmp_qloop_89 = _mm256_mul_pd(tmp_qloop_83,tmp_qloop_85); + const __m256d tmp_qloop_90 = _mm256_mul_pd(tmp_qloop_55,tmp_qloop_65); + const __m256d tmp_qloop_91 = _mm256_mul_pd(tmp_qloop_73,tmp_qloop_90); + const __m256d tmp_qloop_92 = _mm256_mul_pd(tmp_qloop_78,tmp_qloop_90); + const __m256d tmp_qloop_93 = _mm256_mul_pd(tmp_qloop_83,tmp_qloop_90); + const __m256d tmp_qloop_94 = _mm256_mul_pd(tmp_qloop_55,tmp_qloop_73); + const __m256d tmp_qloop_95 = _mm256_mul_pd(tmp_qloop_78,tmp_qloop_94); + const __m256d tmp_qloop_96 = _mm256_mul_pd(tmp_qloop_83,tmp_qloop_94); + const __m256d tmp_qloop_97 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_55,tmp_qloop_78),tmp_qloop_83); + const __m256d hessian_blending_0_0_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_25,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),tmp_qloop_14),tmp_qloop_28); + const __m256d hessian_blending_1_0_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_31,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_29,tmp_qloop_4)),_mm256_mul_pd(tmp_qloop_3,tmp_qloop_32)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_26,_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_3,tmp_qloop_3),tmp_qloop_3)),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))); + const __m256d hessian_blending_0_0_1 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_34,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_15,tmp_qloop_30),_mm256_set_pd(-2.0,-2.0,-2.0,-2.0)),_mm256_set_pd(tmp_qloop_1,tmp_qloop_1,tmp_qloop_1,tmp_qloop_1)),_mm256_set_pd(tmp_qloop_11,tmp_qloop_11,tmp_qloop_11,tmp_qloop_11))); + const __m256d hessian_blending_1_0_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_20,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_28,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(tmp_qloop_35,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_6,_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_1,tmp_qloop_1,tmp_qloop_1,tmp_qloop_1)),_mm256_set_pd(tmp_qloop_10,tmp_qloop_10,tmp_qloop_10,tmp_qloop_10)),_mm256_set_pd(tmp_qloop_9,tmp_qloop_9,tmp_qloop_9,tmp_qloop_9))); + const __m256d hessian_blending_0_1_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_19,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_31,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(tmp_qloop_34,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_15,tmp_qloop_2),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_10,tmp_qloop_10,tmp_qloop_10,tmp_qloop_10)),_mm256_set_pd(tmp_qloop_8,tmp_qloop_8,tmp_qloop_8,tmp_qloop_8)),_mm256_set_pd(tmp_qloop_9,tmp_qloop_9,tmp_qloop_9,tmp_qloop_9))); + const __m256d hessian_blending_1_1_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_20,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_35,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_27),tmp_qloop_36); + const __m256d hessian_blending_0_1_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_0,tmp_qloop_32),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_0,tmp_qloop_0),tmp_qloop_0),tmp_qloop_26),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_25),tmp_qloop_36); + const __m256d hessian_blending_1_1_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_19,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_33,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_15,tmp_qloop_2),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_10,tmp_qloop_10,tmp_qloop_10,tmp_qloop_10)),_mm256_set_pd(tmp_qloop_7,tmp_qloop_7,tmp_qloop_7,tmp_qloop_7)),_mm256_set_pd(tmp_qloop_9,tmp_qloop_9,tmp_qloop_9,tmp_qloop_9))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_15,tmp_qloop_4),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_10,tmp_qloop_10,tmp_qloop_10,tmp_qloop_10)),_mm256_set_pd(tmp_qloop_8,tmp_qloop_8,tmp_qloop_8,tmp_qloop_8)),_mm256_set_pd(tmp_qloop_9,tmp_qloop_9,tmp_qloop_9,tmp_qloop_9))); + const __m256d q_tmp_0_0 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_54,tmp_qloop_54),tmp_qloop_55); + const __m256d q_tmp_0_1 = tmp_qloop_61; + const __m256d q_tmp_0_2 = tmp_qloop_66; + const __m256d q_tmp_0_3 = tmp_qloop_74; + const __m256d q_tmp_0_4 = tmp_qloop_79; + const __m256d q_tmp_0_5 = tmp_qloop_84; + const __m256d q_tmp_1_0 = tmp_qloop_61; + const __m256d q_tmp_1_1 = _mm256_mul_pd(tmp_qloop_55,_mm256_mul_pd(tmp_qloop_59,tmp_qloop_59)); + const __m256d q_tmp_1_2 = tmp_qloop_86; + const __m256d q_tmp_1_3 = tmp_qloop_87; + const __m256d q_tmp_1_4 = tmp_qloop_88; + const __m256d q_tmp_1_5 = tmp_qloop_89; + const __m256d q_tmp_2_0 = tmp_qloop_66; + const __m256d q_tmp_2_1 = tmp_qloop_86; + const __m256d q_tmp_2_2 = _mm256_mul_pd(tmp_qloop_55,_mm256_mul_pd(tmp_qloop_65,tmp_qloop_65)); + const __m256d q_tmp_2_3 = tmp_qloop_91; + const __m256d q_tmp_2_4 = tmp_qloop_92; + const __m256d q_tmp_2_5 = tmp_qloop_93; + const __m256d q_tmp_3_0 = tmp_qloop_74; + const __m256d q_tmp_3_1 = tmp_qloop_87; + const __m256d q_tmp_3_2 = tmp_qloop_91; + const __m256d q_tmp_3_3 = _mm256_mul_pd(tmp_qloop_55,_mm256_mul_pd(tmp_qloop_73,tmp_qloop_73)); + const __m256d q_tmp_3_4 = tmp_qloop_95; + const __m256d q_tmp_3_5 = tmp_qloop_96; + const __m256d q_tmp_4_0 = tmp_qloop_79; + const __m256d q_tmp_4_1 = tmp_qloop_88; + const __m256d q_tmp_4_2 = tmp_qloop_92; + const __m256d q_tmp_4_3 = tmp_qloop_95; + const __m256d q_tmp_4_4 = _mm256_mul_pd(tmp_qloop_55,_mm256_mul_pd(tmp_qloop_78,tmp_qloop_78)); + const __m256d q_tmp_4_5 = tmp_qloop_97; + const __m256d q_tmp_5_0 = tmp_qloop_84; + const __m256d q_tmp_5_1 = tmp_qloop_89; + const __m256d q_tmp_5_2 = tmp_qloop_93; + const __m256d q_tmp_5_3 = tmp_qloop_96; + const __m256d q_tmp_5_4 = tmp_qloop_97; + const __m256d q_tmp_5_5 = _mm256_mul_pd(tmp_qloop_55,_mm256_mul_pd(tmp_qloop_83,tmp_qloop_83)); + q_acc_0_0 = _mm256_add_pd(q_acc_0_0,q_tmp_0_0); + q_acc_0_1 = _mm256_add_pd(q_acc_0_1,q_tmp_0_1); + q_acc_0_2 = _mm256_add_pd(q_acc_0_2,q_tmp_0_2); + q_acc_0_3 = _mm256_add_pd(q_acc_0_3,q_tmp_0_3); + q_acc_0_4 = _mm256_add_pd(q_acc_0_4,q_tmp_0_4); + q_acc_0_5 = _mm256_add_pd(q_acc_0_5,q_tmp_0_5); + q_acc_1_0 = _mm256_add_pd(q_acc_1_0,q_tmp_1_0); + q_acc_1_1 = _mm256_add_pd(q_acc_1_1,q_tmp_1_1); + q_acc_1_2 = _mm256_add_pd(q_acc_1_2,q_tmp_1_2); + q_acc_1_3 = _mm256_add_pd(q_acc_1_3,q_tmp_1_3); + q_acc_1_4 = _mm256_add_pd(q_acc_1_4,q_tmp_1_4); + q_acc_1_5 = _mm256_add_pd(q_acc_1_5,q_tmp_1_5); + q_acc_2_0 = _mm256_add_pd(q_acc_2_0,q_tmp_2_0); + q_acc_2_1 = _mm256_add_pd(q_acc_2_1,q_tmp_2_1); + q_acc_2_2 = _mm256_add_pd(q_acc_2_2,q_tmp_2_2); + q_acc_2_3 = _mm256_add_pd(q_acc_2_3,q_tmp_2_3); + q_acc_2_4 = _mm256_add_pd(q_acc_2_4,q_tmp_2_4); + q_acc_2_5 = _mm256_add_pd(q_acc_2_5,q_tmp_2_5); + q_acc_3_0 = _mm256_add_pd(q_acc_3_0,q_tmp_3_0); + q_acc_3_1 = _mm256_add_pd(q_acc_3_1,q_tmp_3_1); + q_acc_3_2 = _mm256_add_pd(q_acc_3_2,q_tmp_3_2); + q_acc_3_3 = _mm256_add_pd(q_acc_3_3,q_tmp_3_3); + q_acc_3_4 = _mm256_add_pd(q_acc_3_4,q_tmp_3_4); + q_acc_3_5 = _mm256_add_pd(q_acc_3_5,q_tmp_3_5); + q_acc_4_0 = _mm256_add_pd(q_acc_4_0,q_tmp_4_0); + q_acc_4_1 = _mm256_add_pd(q_acc_4_1,q_tmp_4_1); + q_acc_4_2 = _mm256_add_pd(q_acc_4_2,q_tmp_4_2); + q_acc_4_3 = _mm256_add_pd(q_acc_4_3,q_tmp_4_3); + q_acc_4_4 = _mm256_add_pd(q_acc_4_4,q_tmp_4_4); + q_acc_4_5 = _mm256_add_pd(q_acc_4_5,q_tmp_4_5); + q_acc_5_0 = _mm256_add_pd(q_acc_5_0,q_tmp_5_0); + q_acc_5_1 = _mm256_add_pd(q_acc_5_1,q_tmp_5_1); + q_acc_5_2 = _mm256_add_pd(q_acc_5_2,q_tmp_5_2); + q_acc_5_3 = _mm256_add_pd(q_acc_5_3,q_tmp_5_3); + q_acc_5_4 = _mm256_add_pd(q_acc_5_4,q_tmp_5_4); + q_acc_5_5 = _mm256_add_pd(q_acc_5_5,q_tmp_5_5); + } + const __m256d elMatVec_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_0_0,src_dof_0),_mm256_mul_pd(q_acc_0_1,src_dof_1)),_mm256_mul_pd(q_acc_0_2,src_dof_2)),_mm256_mul_pd(q_acc_0_3,src_dof_3)),_mm256_mul_pd(q_acc_0_4,src_dof_4)),_mm256_mul_pd(q_acc_0_5,src_dof_5)); + const __m256d elMatVec_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_1_0,src_dof_0),_mm256_mul_pd(q_acc_1_1,src_dof_1)),_mm256_mul_pd(q_acc_1_2,src_dof_2)),_mm256_mul_pd(q_acc_1_3,src_dof_3)),_mm256_mul_pd(q_acc_1_4,src_dof_4)),_mm256_mul_pd(q_acc_1_5,src_dof_5)); + const __m256d elMatVec_2 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_2_0,src_dof_0),_mm256_mul_pd(q_acc_2_1,src_dof_1)),_mm256_mul_pd(q_acc_2_2,src_dof_2)),_mm256_mul_pd(q_acc_2_3,src_dof_3)),_mm256_mul_pd(q_acc_2_4,src_dof_4)),_mm256_mul_pd(q_acc_2_5,src_dof_5)); + const __m256d elMatVec_3 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_3_0,src_dof_0),_mm256_mul_pd(q_acc_3_1,src_dof_1)),_mm256_mul_pd(q_acc_3_2,src_dof_2)),_mm256_mul_pd(q_acc_3_3,src_dof_3)),_mm256_mul_pd(q_acc_3_4,src_dof_4)),_mm256_mul_pd(q_acc_3_5,src_dof_5)); + const __m256d elMatVec_4 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_4_0,src_dof_0),_mm256_mul_pd(q_acc_4_1,src_dof_1)),_mm256_mul_pd(q_acc_4_2,src_dof_2)),_mm256_mul_pd(q_acc_4_3,src_dof_3)),_mm256_mul_pd(q_acc_4_4,src_dof_4)),_mm256_mul_pd(q_acc_4_5,src_dof_5)); + const __m256d elMatVec_5 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_5_0,src_dof_0),_mm256_mul_pd(q_acc_5_1,src_dof_1)),_mm256_mul_pd(q_acc_5_2,src_dof_2)),_mm256_mul_pd(q_acc_5_3,src_dof_3)),_mm256_mul_pd(q_acc_5_4,src_dof_4)),_mm256_mul_pd(q_acc_5_5,src_dof_5)); + _mm256_storeu_pd(&_data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))],_mm256_add_pd(elMatVec_0,_mm256_loadu_pd(& _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]))); + _mm256_storeu_pd(&_data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1],_mm256_add_pd(elMatVec_1,_mm256_loadu_pd(& _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]))); + _mm256_storeu_pd(&_data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))],_mm256_add_pd(elMatVec_2,_mm256_loadu_pd(& _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]))); + _mm256_storeu_pd(&_data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))],_mm256_add_pd(elMatVec_3,_mm256_loadu_pd(& _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]))); + _mm256_storeu_pd(&_data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))],_mm256_add_pd(elMatVec_4,_mm256_loadu_pd(& _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]))); + _mm256_storeu_pd(&_data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))],_mm256_add_pd(elMatVec_5,_mm256_loadu_pd(& _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]))); + } + for (int64_t ctr_0 = (int64_t)((-ctr_1 + micro_edges_per_macro_edge) / (4)) * (4); ctr_0 < -ctr_1 + micro_edges_per_macro_edge; ctr_0 += 1) + { + + const int64_t phantom_ctr_0 = ctr_0; + real_t _data_float_loop_ctr_array_dim_0[4]; + _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0; + _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1; + _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2; + _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3; + real_t _data_float_loop_ctr_array_dim_1[4]; + _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1; + + const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; + const real_t src_dof_0 = _data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]; + const real_t src_dof_1 = _data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + const real_t src_dof_2 = _data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t src_dof_3 = _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t src_dof_4 = _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t src_dof_5 = _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]; + const real_t cp_times_delta_dof_0 = _data_cp_times_deltaVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]; + const real_t cp_times_delta_dof_1 = _data_cp_times_deltaVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + const real_t cp_times_delta_dof_2 = _data_cp_times_deltaVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t cp_times_delta_dof_3 = _data_cp_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t cp_times_delta_dof_4 = _data_cp_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t cp_times_delta_dof_5 = _data_cp_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]; + const real_t ux_dof_0 = _data_uxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]; + const real_t ux_dof_1 = _data_uxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + const real_t ux_dof_2 = _data_uxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t ux_dof_3 = _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t ux_dof_4 = _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t ux_dof_5 = _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]; + const real_t uy_dof_0 = _data_uyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]; + const real_t uy_dof_1 = _data_uyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + const real_t uy_dof_2 = _data_uyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t uy_dof_3 = _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t uy_dof_4 = _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t uy_dof_5 = _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]; + real_t q_acc_0_0 = 0.0; + real_t q_acc_0_1 = 0.0; + real_t q_acc_0_2 = 0.0; + real_t q_acc_0_3 = 0.0; + real_t q_acc_0_4 = 0.0; + real_t q_acc_0_5 = 0.0; + real_t q_acc_1_0 = 0.0; + real_t q_acc_1_1 = 0.0; + real_t q_acc_1_2 = 0.0; + real_t q_acc_1_3 = 0.0; + real_t q_acc_1_4 = 0.0; + real_t q_acc_1_5 = 0.0; + real_t q_acc_2_0 = 0.0; + real_t q_acc_2_1 = 0.0; + real_t q_acc_2_2 = 0.0; + real_t q_acc_2_3 = 0.0; + real_t q_acc_2_4 = 0.0; + real_t q_acc_2_5 = 0.0; + real_t q_acc_3_0 = 0.0; + real_t q_acc_3_1 = 0.0; + real_t q_acc_3_2 = 0.0; + real_t q_acc_3_3 = 0.0; + real_t q_acc_3_4 = 0.0; + real_t q_acc_3_5 = 0.0; + real_t q_acc_4_0 = 0.0; + real_t q_acc_4_1 = 0.0; + real_t q_acc_4_2 = 0.0; + real_t q_acc_4_3 = 0.0; + real_t q_acc_4_4 = 0.0; + real_t q_acc_4_5 = 0.0; + real_t q_acc_5_0 = 0.0; + real_t q_acc_5_1 = 0.0; + real_t q_acc_5_2 = 0.0; + real_t q_acc_5_3 = 0.0; + real_t q_acc_5_4 = 0.0; + real_t q_acc_5_5 = 0.0; + for (int64_t q = 0; q < 4; q += 1) + { + const real_t tmp_qloop_0 = p_affine_0_0 + (-p_affine_0_0 + p_affine_1_0)*_data_q_p_0[q] + (-p_affine_0_0 + p_affine_2_0)*_data_q_p_1[q]; + const real_t tmp_qloop_2 = (tmp_qloop_0*tmp_qloop_0); + const real_t tmp_qloop_3 = p_affine_0_1 + (-p_affine_0_1 + p_affine_1_1)*_data_q_p_0[q] + (-p_affine_0_1 + p_affine_2_1)*_data_q_p_1[q]; + const real_t tmp_qloop_4 = (tmp_qloop_3*tmp_qloop_3); + const real_t tmp_qloop_5 = tmp_qloop_2 + tmp_qloop_4; + const real_t tmp_qloop_6 = pow(tmp_qloop_5, -0.50000000000000000); + const real_t tmp_qloop_13 = tmp_qloop_12*tmp_qloop_6; + const real_t tmp_qloop_14 = tmp_qloop_1*tmp_qloop_13; + const real_t tmp_qloop_15 = pow(tmp_qloop_5, -1.5000000000000000); + const real_t tmp_qloop_16 = radRayVertex + tmp_qloop_11*(tmp_qloop_1*(-rayVertex_0 + tmp_qloop_0) - tmp_qloop_8*(-rayVertex_1 + tmp_qloop_3)); + const real_t tmp_qloop_17 = tmp_qloop_15*tmp_qloop_16; + const real_t tmp_qloop_18 = tmp_qloop_17*1.0; + const real_t tmp_qloop_19 = tmp_qloop_13*tmp_qloop_8; + const real_t tmp_qloop_20 = tmp_qloop_0*tmp_qloop_18; + const real_t tmp_qloop_23 = tmp_qloop_12*tmp_qloop_15; + const real_t tmp_qloop_24 = tmp_qloop_1*tmp_qloop_23; + const real_t tmp_qloop_25 = tmp_qloop_2*tmp_qloop_24; + const real_t tmp_qloop_26 = tmp_qloop_16*pow(tmp_qloop_5, -2.5000000000000000)*3.0; + const real_t tmp_qloop_27 = tmp_qloop_0*tmp_qloop_26*tmp_qloop_4; + const real_t tmp_qloop_28 = tmp_qloop_24*tmp_qloop_4 - tmp_qloop_27; + const real_t tmp_qloop_29 = tmp_qloop_23*tmp_qloop_7; + const real_t tmp_qloop_30 = tmp_qloop_0*tmp_qloop_3; + const real_t tmp_qloop_31 = tmp_qloop_24*tmp_qloop_30; + const real_t tmp_qloop_32 = tmp_qloop_17*2.0; + const real_t tmp_qloop_33 = tmp_qloop_2*tmp_qloop_26*tmp_qloop_3; + const real_t tmp_qloop_34 = tmp_qloop_18*tmp_qloop_3 - tmp_qloop_33; + const real_t tmp_qloop_35 = tmp_qloop_29*tmp_qloop_30; + const real_t tmp_qloop_36 = tmp_qloop_23*tmp_qloop_30*tmp_qloop_8; + const real_t tmp_qloop_37 = 4.0*_data_q_p_0[q]; + const real_t tmp_qloop_38 = 4.0*_data_q_p_1[q]; + const real_t tmp_qloop_39 = tmp_qloop_37 + tmp_qloop_38 - 3.0; + const real_t tmp_qloop_40 = jac_affine_inv_0_0_GRAY*tmp_qloop_39 + jac_affine_inv_1_0_GRAY*tmp_qloop_39; + const real_t tmp_qloop_41 = jac_affine_inv_0_1_GRAY*tmp_qloop_39 + jac_affine_inv_1_1_GRAY*tmp_qloop_39; + const real_t tmp_qloop_42 = tmp_qloop_37*_data_q_p_1[q]; + const real_t tmp_qloop_43 = (_data_q_p_0[q]*_data_q_p_0[q]); + const real_t tmp_qloop_44 = tmp_qloop_43*2.0; + const real_t tmp_qloop_45 = tmp_qloop_44 - _data_q_p_0[q]; + const real_t tmp_qloop_46 = (_data_q_p_1[q]*_data_q_p_1[q]); + const real_t tmp_qloop_47 = tmp_qloop_46*2.0; + const real_t tmp_qloop_48 = tmp_qloop_47 - _data_q_p_1[q]; + const real_t tmp_qloop_49 = tmp_qloop_38 - tmp_qloop_42 + tmp_qloop_46*-4.0; + const real_t tmp_qloop_50 = tmp_qloop_37 - tmp_qloop_42 + tmp_qloop_43*-4.0; + const real_t tmp_qloop_51 = tmp_qloop_42 + tmp_qloop_44 + tmp_qloop_47 - 3.0*_data_q_p_0[q] - 3.0*_data_q_p_1[q] + 1.0; + const real_t tmp_qloop_52 = tmp_qloop_42*ux_dof_3 + tmp_qloop_45*ux_dof_1 + tmp_qloop_48*ux_dof_2 + tmp_qloop_49*ux_dof_4 + tmp_qloop_50*ux_dof_5 + tmp_qloop_51*ux_dof_0; + const real_t tmp_qloop_53 = tmp_qloop_42*uy_dof_3 + tmp_qloop_45*uy_dof_1 + tmp_qloop_48*uy_dof_2 + tmp_qloop_49*uy_dof_4 + tmp_qloop_50*uy_dof_5 + tmp_qloop_51*uy_dof_0; + const real_t tmp_qloop_56 = tmp_qloop_37 - 1.0; + const real_t tmp_qloop_57 = jac_affine_inv_0_0_GRAY*tmp_qloop_56; + const real_t tmp_qloop_58 = jac_affine_inv_0_1_GRAY*tmp_qloop_56; + const real_t tmp_qloop_62 = tmp_qloop_38 - 1.0; + const real_t tmp_qloop_63 = jac_affine_inv_1_0_GRAY*tmp_qloop_62; + const real_t tmp_qloop_64 = jac_affine_inv_1_1_GRAY*tmp_qloop_62; + const real_t tmp_qloop_67 = jac_affine_inv_1_0_GRAY*tmp_qloop_37; + const real_t tmp_qloop_68 = jac_affine_inv_0_0_GRAY*tmp_qloop_38; + const real_t tmp_qloop_69 = tmp_qloop_67 + tmp_qloop_68; + const real_t tmp_qloop_70 = jac_affine_inv_1_1_GRAY*tmp_qloop_37; + const real_t tmp_qloop_71 = jac_affine_inv_0_1_GRAY*tmp_qloop_38; + const real_t tmp_qloop_72 = tmp_qloop_70 + tmp_qloop_71; + const real_t tmp_qloop_75 = -tmp_qloop_37 - 8.0*_data_q_p_1[q] + 4.0; + const real_t tmp_qloop_76 = jac_affine_inv_1_0_GRAY*tmp_qloop_75 - tmp_qloop_68; + const real_t tmp_qloop_77 = jac_affine_inv_1_1_GRAY*tmp_qloop_75 - tmp_qloop_71; + const real_t tmp_qloop_80 = -tmp_qloop_38 - 8.0*_data_q_p_0[q] + 4.0; + const real_t tmp_qloop_81 = jac_affine_inv_0_0_GRAY*tmp_qloop_80 - tmp_qloop_67; + const real_t tmp_qloop_82 = jac_affine_inv_0_1_GRAY*tmp_qloop_80 - tmp_qloop_70; + const real_t jac_blending_0_0 = tmp_qloop_0*tmp_qloop_14 + tmp_qloop_18*tmp_qloop_4; + const real_t jac_blending_0_1 = -tmp_qloop_0*tmp_qloop_15*tmp_qloop_16*tmp_qloop_3 - tmp_qloop_0*tmp_qloop_19; + const real_t jac_blending_1_0 = tmp_qloop_14*tmp_qloop_3 - tmp_qloop_20*tmp_qloop_3; + const real_t jac_blending_1_1 = tmp_qloop_15*tmp_qloop_16*tmp_qloop_2*1.0 - tmp_qloop_19*tmp_qloop_3; + const real_t tmp_qloop_21 = jac_blending_0_0*jac_blending_1_1 - jac_blending_0_1*jac_blending_1_0; + const real_t tmp_qloop_22 = 1.0 / (tmp_qloop_21); + const real_t abs_det_jac_blending = tmp_qloop_21; + const real_t tmp_qloop_55 = abs_det_jac_affine_GRAY*abs_det_jac_blending*(cp_times_delta_dof_0*tmp_qloop_51 + cp_times_delta_dof_1*tmp_qloop_45 + cp_times_delta_dof_2*tmp_qloop_48 + cp_times_delta_dof_3*tmp_qloop_42 + cp_times_delta_dof_4*tmp_qloop_49 + cp_times_delta_dof_5*tmp_qloop_50)*_data_q_w[q]; + const real_t jac_blending_inv_0_0 = jac_blending_1_1*tmp_qloop_22; + const real_t jac_blending_inv_0_1 = -jac_blending_0_1*tmp_qloop_22; + const real_t jac_blending_inv_1_0 = -jac_blending_1_0*tmp_qloop_22; + const real_t jac_blending_inv_1_1 = jac_blending_0_0*tmp_qloop_22; + const real_t tmp_qloop_54 = tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_40 + jac_blending_inv_1_0*tmp_qloop_41) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_40 + jac_blending_inv_1_1*tmp_qloop_41); + const real_t tmp_qloop_59 = tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_57 + jac_blending_inv_1_0*tmp_qloop_58) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_57 + jac_blending_inv_1_1*tmp_qloop_58); + const real_t tmp_qloop_60 = tmp_qloop_54*tmp_qloop_55; + const real_t tmp_qloop_61 = tmp_qloop_59*tmp_qloop_60; + const real_t tmp_qloop_65 = tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_63 + jac_blending_inv_1_0*tmp_qloop_64) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_63 + jac_blending_inv_1_1*tmp_qloop_64); + const real_t tmp_qloop_66 = tmp_qloop_60*tmp_qloop_65; + const real_t tmp_qloop_73 = tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_69 + jac_blending_inv_1_0*tmp_qloop_72) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_69 + jac_blending_inv_1_1*tmp_qloop_72); + const real_t tmp_qloop_74 = tmp_qloop_60*tmp_qloop_73; + const real_t tmp_qloop_78 = tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_76 + jac_blending_inv_1_0*tmp_qloop_77) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_76 + jac_blending_inv_1_1*tmp_qloop_77); + const real_t tmp_qloop_79 = tmp_qloop_60*tmp_qloop_78; + const real_t tmp_qloop_83 = tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_81 + jac_blending_inv_1_0*tmp_qloop_82) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_81 + jac_blending_inv_1_1*tmp_qloop_82); + const real_t tmp_qloop_84 = tmp_qloop_60*tmp_qloop_83; + const real_t tmp_qloop_85 = tmp_qloop_55*tmp_qloop_59; + const real_t tmp_qloop_86 = tmp_qloop_65*tmp_qloop_85; + const real_t tmp_qloop_87 = tmp_qloop_73*tmp_qloop_85; + const real_t tmp_qloop_88 = tmp_qloop_78*tmp_qloop_85; + const real_t tmp_qloop_89 = tmp_qloop_83*tmp_qloop_85; + const real_t tmp_qloop_90 = tmp_qloop_55*tmp_qloop_65; + const real_t tmp_qloop_91 = tmp_qloop_73*tmp_qloop_90; + const real_t tmp_qloop_92 = tmp_qloop_78*tmp_qloop_90; + const real_t tmp_qloop_93 = tmp_qloop_83*tmp_qloop_90; + const real_t tmp_qloop_94 = tmp_qloop_55*tmp_qloop_73; + const real_t tmp_qloop_95 = tmp_qloop_78*tmp_qloop_94; + const real_t tmp_qloop_96 = tmp_qloop_83*tmp_qloop_94; + const real_t tmp_qloop_97 = tmp_qloop_55*tmp_qloop_78*tmp_qloop_83; + const real_t hessian_blending_0_0_0 = tmp_qloop_14 - tmp_qloop_25 + tmp_qloop_28; + const real_t hessian_blending_1_0_0 = -tmp_qloop_26*(tmp_qloop_3*tmp_qloop_3*tmp_qloop_3) + tmp_qloop_29*tmp_qloop_4 + tmp_qloop_3*tmp_qloop_32 - tmp_qloop_31; + const real_t hessian_blending_0_0_1 = tmp_qloop_1*tmp_qloop_11*tmp_qloop_15*tmp_qloop_30*-2.0 - tmp_qloop_34; + const real_t hessian_blending_1_0_1 = tmp_qloop_1*tmp_qloop_10*tmp_qloop_6*tmp_qloop_9*1.0 - tmp_qloop_20 - tmp_qloop_28 - tmp_qloop_35; + const real_t hessian_blending_0_1_0 = tmp_qloop_10*tmp_qloop_15*tmp_qloop_2*tmp_qloop_8*tmp_qloop_9*1.0 - tmp_qloop_19 - tmp_qloop_31 - tmp_qloop_34; + const real_t hessian_blending_1_1_0 = -tmp_qloop_20 + tmp_qloop_27 - tmp_qloop_35 + tmp_qloop_36; + const real_t hessian_blending_0_1_1 = tmp_qloop_0*tmp_qloop_32 - (tmp_qloop_0*tmp_qloop_0*tmp_qloop_0)*tmp_qloop_26 + tmp_qloop_25 + tmp_qloop_36; + const real_t hessian_blending_1_1_1 = tmp_qloop_10*tmp_qloop_15*tmp_qloop_2*tmp_qloop_7*tmp_qloop_9*1.0 + tmp_qloop_10*tmp_qloop_15*tmp_qloop_4*tmp_qloop_8*tmp_qloop_9*1.0 - tmp_qloop_19 - tmp_qloop_33; + const real_t q_tmp_0_0 = (tmp_qloop_54*tmp_qloop_54)*tmp_qloop_55; + const real_t q_tmp_0_1 = tmp_qloop_61; + const real_t q_tmp_0_2 = tmp_qloop_66; + const real_t q_tmp_0_3 = tmp_qloop_74; + const real_t q_tmp_0_4 = tmp_qloop_79; + const real_t q_tmp_0_5 = tmp_qloop_84; + const real_t q_tmp_1_0 = tmp_qloop_61; + const real_t q_tmp_1_1 = tmp_qloop_55*(tmp_qloop_59*tmp_qloop_59); + const real_t q_tmp_1_2 = tmp_qloop_86; + const real_t q_tmp_1_3 = tmp_qloop_87; + const real_t q_tmp_1_4 = tmp_qloop_88; + const real_t q_tmp_1_5 = tmp_qloop_89; + const real_t q_tmp_2_0 = tmp_qloop_66; + const real_t q_tmp_2_1 = tmp_qloop_86; + const real_t q_tmp_2_2 = tmp_qloop_55*(tmp_qloop_65*tmp_qloop_65); + const real_t q_tmp_2_3 = tmp_qloop_91; + const real_t q_tmp_2_4 = tmp_qloop_92; + const real_t q_tmp_2_5 = tmp_qloop_93; + const real_t q_tmp_3_0 = tmp_qloop_74; + const real_t q_tmp_3_1 = tmp_qloop_87; + const real_t q_tmp_3_2 = tmp_qloop_91; + const real_t q_tmp_3_3 = tmp_qloop_55*(tmp_qloop_73*tmp_qloop_73); + const real_t q_tmp_3_4 = tmp_qloop_95; + const real_t q_tmp_3_5 = tmp_qloop_96; + const real_t q_tmp_4_0 = tmp_qloop_79; + const real_t q_tmp_4_1 = tmp_qloop_88; + const real_t q_tmp_4_2 = tmp_qloop_92; + const real_t q_tmp_4_3 = tmp_qloop_95; + const real_t q_tmp_4_4 = tmp_qloop_55*(tmp_qloop_78*tmp_qloop_78); + const real_t q_tmp_4_5 = tmp_qloop_97; + const real_t q_tmp_5_0 = tmp_qloop_84; + const real_t q_tmp_5_1 = tmp_qloop_89; + const real_t q_tmp_5_2 = tmp_qloop_93; + const real_t q_tmp_5_3 = tmp_qloop_96; + const real_t q_tmp_5_4 = tmp_qloop_97; + const real_t q_tmp_5_5 = tmp_qloop_55*(tmp_qloop_83*tmp_qloop_83); + q_acc_0_0 = q_acc_0_0 + q_tmp_0_0; + q_acc_0_1 = q_acc_0_1 + q_tmp_0_1; + q_acc_0_2 = q_acc_0_2 + q_tmp_0_2; + q_acc_0_3 = q_acc_0_3 + q_tmp_0_3; + q_acc_0_4 = q_acc_0_4 + q_tmp_0_4; + q_acc_0_5 = q_acc_0_5 + q_tmp_0_5; + q_acc_1_0 = q_acc_1_0 + q_tmp_1_0; + q_acc_1_1 = q_acc_1_1 + q_tmp_1_1; + q_acc_1_2 = q_acc_1_2 + q_tmp_1_2; + q_acc_1_3 = q_acc_1_3 + q_tmp_1_3; + q_acc_1_4 = q_acc_1_4 + q_tmp_1_4; + q_acc_1_5 = q_acc_1_5 + q_tmp_1_5; + q_acc_2_0 = q_acc_2_0 + q_tmp_2_0; + q_acc_2_1 = q_acc_2_1 + q_tmp_2_1; + q_acc_2_2 = q_acc_2_2 + q_tmp_2_2; + q_acc_2_3 = q_acc_2_3 + q_tmp_2_3; + q_acc_2_4 = q_acc_2_4 + q_tmp_2_4; + q_acc_2_5 = q_acc_2_5 + q_tmp_2_5; + q_acc_3_0 = q_acc_3_0 + q_tmp_3_0; + q_acc_3_1 = q_acc_3_1 + q_tmp_3_1; + q_acc_3_2 = q_acc_3_2 + q_tmp_3_2; + q_acc_3_3 = q_acc_3_3 + q_tmp_3_3; + q_acc_3_4 = q_acc_3_4 + q_tmp_3_4; + q_acc_3_5 = q_acc_3_5 + q_tmp_3_5; + q_acc_4_0 = q_acc_4_0 + q_tmp_4_0; + q_acc_4_1 = q_acc_4_1 + q_tmp_4_1; + q_acc_4_2 = q_acc_4_2 + q_tmp_4_2; + q_acc_4_3 = q_acc_4_3 + q_tmp_4_3; + q_acc_4_4 = q_acc_4_4 + q_tmp_4_4; + q_acc_4_5 = q_acc_4_5 + q_tmp_4_5; + q_acc_5_0 = q_acc_5_0 + q_tmp_5_0; + q_acc_5_1 = q_acc_5_1 + q_tmp_5_1; + q_acc_5_2 = q_acc_5_2 + q_tmp_5_2; + q_acc_5_3 = q_acc_5_3 + q_tmp_5_3; + q_acc_5_4 = q_acc_5_4 + q_tmp_5_4; + q_acc_5_5 = q_acc_5_5 + q_tmp_5_5; + } + const real_t elMatVec_0 = q_acc_0_0*src_dof_0 + q_acc_0_1*src_dof_1 + q_acc_0_2*src_dof_2 + q_acc_0_3*src_dof_3 + q_acc_0_4*src_dof_4 + q_acc_0_5*src_dof_5; + const real_t elMatVec_1 = q_acc_1_0*src_dof_0 + q_acc_1_1*src_dof_1 + q_acc_1_2*src_dof_2 + q_acc_1_3*src_dof_3 + q_acc_1_4*src_dof_4 + q_acc_1_5*src_dof_5; + const real_t elMatVec_2 = q_acc_2_0*src_dof_0 + q_acc_2_1*src_dof_1 + q_acc_2_2*src_dof_2 + q_acc_2_3*src_dof_3 + q_acc_2_4*src_dof_4 + q_acc_2_5*src_dof_5; + const real_t elMatVec_3 = q_acc_3_0*src_dof_0 + q_acc_3_1*src_dof_1 + q_acc_3_2*src_dof_2 + q_acc_3_3*src_dof_3 + q_acc_3_4*src_dof_4 + q_acc_3_5*src_dof_5; + const real_t elMatVec_4 = q_acc_4_0*src_dof_0 + q_acc_4_1*src_dof_1 + q_acc_4_2*src_dof_2 + q_acc_4_3*src_dof_3 + q_acc_4_4*src_dof_4 + q_acc_4_5*src_dof_5; + const real_t elMatVec_5 = q_acc_5_0*src_dof_0 + q_acc_5_1*src_dof_1 + q_acc_5_2*src_dof_2 + q_acc_5_3*src_dof_3 + q_acc_5_4*src_dof_4 + q_acc_5_5*src_dof_5; + _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))] = elMatVec_0 + _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]; + _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1] = elMatVec_1 + _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatVec_2 + _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))] = elMatVec_3 + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))] = elMatVec_4 + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))] = elMatVec_5 + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]; + } + } + } + const real_t tmp_coords_jac_0_BLUE = 1.0 / (micro_edges_per_macro_edge_float)*1.0; + const real_t tmp_coords_jac_1_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0); + const real_t tmp_coords_jac_2_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1); + const real_t tmp_coords_jac_3_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0); + const real_t tmp_coords_jac_4_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1); + const real_t p_affine_const_0_0_BLUE = tmp_coords_jac_1_BLUE; + const real_t p_affine_const_0_1_BLUE = tmp_coords_jac_2_BLUE; + const real_t p_affine_const_1_0_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_3_BLUE; + const real_t p_affine_const_1_1_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_4_BLUE; + const real_t p_affine_const_2_0_BLUE = tmp_coords_jac_1_BLUE + tmp_coords_jac_3_BLUE; + const real_t p_affine_const_2_1_BLUE = tmp_coords_jac_2_BLUE + tmp_coords_jac_4_BLUE; + const real_t jac_affine_0_0_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_1_0_BLUE; + const real_t jac_affine_0_1_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_2_0_BLUE; + const real_t jac_affine_1_0_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_1_1_BLUE; + const real_t jac_affine_1_1_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_2_1_BLUE; + const real_t tmp_coords_jac_5_BLUE = jac_affine_0_0_BLUE*jac_affine_1_1_BLUE - jac_affine_0_1_BLUE*jac_affine_1_0_BLUE; + const real_t tmp_coords_jac_6_BLUE = 1.0 / (tmp_coords_jac_5_BLUE); + const real_t jac_affine_inv_0_0_BLUE = jac_affine_1_1_BLUE*tmp_coords_jac_6_BLUE; + const real_t jac_affine_inv_0_1_BLUE = -jac_affine_0_1_BLUE*tmp_coords_jac_6_BLUE; + const real_t jac_affine_inv_1_0_BLUE = -jac_affine_1_0_BLUE*tmp_coords_jac_6_BLUE; + const real_t jac_affine_inv_1_1_BLUE = jac_affine_0_0_BLUE*tmp_coords_jac_6_BLUE; + const real_t abs_det_jac_affine_BLUE = abs(tmp_coords_jac_5_BLUE); + { + /* FaceType.BLUE */ + for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1) + { + for (int64_t ctr_0 = 0; ctr_0 < (int64_t)((-ctr_1 + micro_edges_per_macro_edge - 1) / (4)) * (4); ctr_0 += 4) + { + + const int64_t phantom_ctr_0 = ctr_0; + real_t _data_float_loop_ctr_array_dim_0[4]; + _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0; + _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1; + _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2; + _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3; + real_t _data_float_loop_ctr_array_dim_1[4]; + _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1; + + const __m256d p_affine_0_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)); + const __m256d p_affine_0_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)); + const __m256d p_affine_1_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)); + const __m256d p_affine_1_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)); + const __m256d p_affine_2_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)); + const __m256d p_affine_2_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)); + const __m256d src_dof_0 = _mm256_loadu_pd(& _data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]); + const __m256d src_dof_1 = _mm256_loadu_pd(& _data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]); + const __m256d src_dof_2 = _mm256_loadu_pd(& _data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]); + const __m256d src_dof_3 = _mm256_loadu_pd(& _data_srcEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]); + const __m256d src_dof_4 = _mm256_loadu_pd(& _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]); + const __m256d src_dof_5 = _mm256_loadu_pd(& _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]); + const __m256d cp_times_delta_dof_0 = _mm256_loadu_pd(& _data_cp_times_deltaVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]); + const __m256d cp_times_delta_dof_1 = _mm256_loadu_pd(& _data_cp_times_deltaVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]); + const __m256d cp_times_delta_dof_2 = _mm256_loadu_pd(& _data_cp_times_deltaVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]); + const __m256d cp_times_delta_dof_3 = _mm256_loadu_pd(& _data_cp_times_deltaEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]); + const __m256d cp_times_delta_dof_4 = _mm256_loadu_pd(& _data_cp_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]); + const __m256d cp_times_delta_dof_5 = _mm256_loadu_pd(& _data_cp_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]); + const __m256d ux_dof_0 = _mm256_loadu_pd(& _data_uxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]); + const __m256d ux_dof_1 = _mm256_loadu_pd(& _data_uxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]); + const __m256d ux_dof_2 = _mm256_loadu_pd(& _data_uxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]); + const __m256d ux_dof_3 = _mm256_loadu_pd(& _data_uxEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]); + const __m256d ux_dof_4 = _mm256_loadu_pd(& _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]); + const __m256d ux_dof_5 = _mm256_loadu_pd(& _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]); + const __m256d uy_dof_0 = _mm256_loadu_pd(& _data_uyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]); + const __m256d uy_dof_1 = _mm256_loadu_pd(& _data_uyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]); + const __m256d uy_dof_2 = _mm256_loadu_pd(& _data_uyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]); + const __m256d uy_dof_3 = _mm256_loadu_pd(& _data_uyEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]); + const __m256d uy_dof_4 = _mm256_loadu_pd(& _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]); + const __m256d uy_dof_5 = _mm256_loadu_pd(& _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]); + __m256d q_acc_0_0 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_1 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_2 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_3 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_4 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_5 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_0 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_1 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_2 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_3 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_4 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_5 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_0 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_1 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_2 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_3 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_4 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_5 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_0 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_1 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_2 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_3 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_4 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_5 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_4_0 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_4_1 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_4_2 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_4_3 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_4_4 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_4_5 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_5_0 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_5_1 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_5_2 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_5_3 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_5_4 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_5_5 = _mm256_set_pd(0.0,0.0,0.0,0.0); + for (int64_t q = 0; q < 4; q += 1) + { + const __m256d tmp_qloop_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_1_0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_2_0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),p_affine_0_0); + const __m256d tmp_qloop_2 = _mm256_mul_pd(tmp_qloop_0,tmp_qloop_0); + const __m256d tmp_qloop_3 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_1_1),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_2_1),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),p_affine_0_1); + const __m256d tmp_qloop_4 = _mm256_mul_pd(tmp_qloop_3,tmp_qloop_3); + const __m256d tmp_qloop_5 = _mm256_add_pd(tmp_qloop_2,tmp_qloop_4); + const __m256d tmp_qloop_6 = _mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_sqrt_pd(tmp_qloop_5)); + const __m256d tmp_qloop_13 = _mm256_mul_pd(tmp_qloop_6,_mm256_set_pd(tmp_qloop_12,tmp_qloop_12,tmp_qloop_12,tmp_qloop_12)); + const __m256d tmp_qloop_14 = _mm256_mul_pd(tmp_qloop_13,_mm256_set_pd(tmp_qloop_1,tmp_qloop_1,tmp_qloop_1,tmp_qloop_1)); + const __m256d tmp_qloop_15 = _mm256_div_pd(_mm256_sqrt_pd(tmp_qloop_5),_mm256_mul_pd(tmp_qloop_5,tmp_qloop_5)); + const __m256d tmp_qloop_16 = _mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(rayVertex_0,rayVertex_0,rayVertex_0,rayVertex_0)),tmp_qloop_0),_mm256_set_pd(tmp_qloop_1,tmp_qloop_1,tmp_qloop_1,tmp_qloop_1)),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(rayVertex_1,rayVertex_1,rayVertex_1,rayVertex_1)),tmp_qloop_3),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_8,tmp_qloop_8,tmp_qloop_8,tmp_qloop_8))),_mm256_set_pd(tmp_qloop_11,tmp_qloop_11,tmp_qloop_11,tmp_qloop_11)),_mm256_set_pd(radRayVertex,radRayVertex,radRayVertex,radRayVertex)); + const __m256d tmp_qloop_17 = _mm256_mul_pd(tmp_qloop_15,tmp_qloop_16); + const __m256d tmp_qloop_18 = _mm256_mul_pd(tmp_qloop_17,_mm256_set_pd(1.0,1.0,1.0,1.0)); + const __m256d tmp_qloop_19 = _mm256_mul_pd(tmp_qloop_13,_mm256_set_pd(tmp_qloop_8,tmp_qloop_8,tmp_qloop_8,tmp_qloop_8)); + const __m256d tmp_qloop_20 = _mm256_mul_pd(tmp_qloop_0,tmp_qloop_18); + const __m256d tmp_qloop_23 = _mm256_mul_pd(tmp_qloop_15,_mm256_set_pd(tmp_qloop_12,tmp_qloop_12,tmp_qloop_12,tmp_qloop_12)); + const __m256d tmp_qloop_24 = _mm256_mul_pd(tmp_qloop_23,_mm256_set_pd(tmp_qloop_1,tmp_qloop_1,tmp_qloop_1,tmp_qloop_1)); + const __m256d tmp_qloop_25 = _mm256_mul_pd(tmp_qloop_2,tmp_qloop_24); + const __m256d tmp_qloop_26 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_16,_mm256_div_pd(_mm256_sqrt_pd(tmp_qloop_5),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_5,tmp_qloop_5),tmp_qloop_5))),_mm256_set_pd(3.0,3.0,3.0,3.0)); + const __m256d tmp_qloop_27 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_0,tmp_qloop_26),tmp_qloop_4); + const __m256d tmp_qloop_28 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_27,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_24,tmp_qloop_4)); + const __m256d tmp_qloop_29 = _mm256_mul_pd(tmp_qloop_23,_mm256_set_pd(tmp_qloop_7,tmp_qloop_7,tmp_qloop_7,tmp_qloop_7)); + const __m256d tmp_qloop_30 = _mm256_mul_pd(tmp_qloop_0,tmp_qloop_3); + const __m256d tmp_qloop_31 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_30); + const __m256d tmp_qloop_32 = _mm256_mul_pd(tmp_qloop_17,_mm256_set_pd(2.0,2.0,2.0,2.0)); + const __m256d tmp_qloop_33 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_2,tmp_qloop_26),tmp_qloop_3); + const __m256d tmp_qloop_34 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_33,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_18,tmp_qloop_3)); + const __m256d tmp_qloop_35 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_30); + const __m256d tmp_qloop_36 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_23,tmp_qloop_30),_mm256_set_pd(tmp_qloop_8,tmp_qloop_8,tmp_qloop_8,tmp_qloop_8)); + const __m256d tmp_qloop_37 = _mm256_mul_pd(_mm256_set_pd(4.0,4.0,4.0,4.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])); + const __m256d tmp_qloop_38 = _mm256_mul_pd(_mm256_set_pd(4.0,4.0,4.0,4.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])); + const __m256d tmp_qloop_39 = _mm256_add_pd(_mm256_add_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),tmp_qloop_37),tmp_qloop_38); + const __m256d tmp_qloop_40 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_39,_mm256_set_pd(jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE)),_mm256_mul_pd(tmp_qloop_39,_mm256_set_pd(jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE))); + const __m256d tmp_qloop_41 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_39,_mm256_set_pd(jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE)),_mm256_mul_pd(tmp_qloop_39,_mm256_set_pd(jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE))); + const __m256d tmp_qloop_42 = _mm256_mul_pd(tmp_qloop_37,_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])); + const __m256d tmp_qloop_43 = _mm256_mul_pd(_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])); + const __m256d tmp_qloop_44 = _mm256_mul_pd(tmp_qloop_43,_mm256_set_pd(2.0,2.0,2.0,2.0)); + const __m256d tmp_qloop_45 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),tmp_qloop_44); + const __m256d tmp_qloop_46 = _mm256_mul_pd(_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])); + const __m256d tmp_qloop_47 = _mm256_mul_pd(tmp_qloop_46,_mm256_set_pd(2.0,2.0,2.0,2.0)); + const __m256d tmp_qloop_48 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])),tmp_qloop_47); + const __m256d tmp_qloop_49 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_46,_mm256_set_pd(-4.0,-4.0,-4.0,-4.0)),_mm256_mul_pd(tmp_qloop_42,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_38); + const __m256d tmp_qloop_50 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_43,_mm256_set_pd(-4.0,-4.0,-4.0,-4.0)),_mm256_mul_pd(tmp_qloop_42,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_37); + const __m256d tmp_qloop_51 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),tmp_qloop_42),tmp_qloop_44),tmp_qloop_47); + const __m256d tmp_qloop_52 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_42,ux_dof_3),_mm256_mul_pd(tmp_qloop_45,ux_dof_1)),_mm256_mul_pd(tmp_qloop_48,ux_dof_2)),_mm256_mul_pd(tmp_qloop_49,ux_dof_4)),_mm256_mul_pd(tmp_qloop_50,ux_dof_5)),_mm256_mul_pd(tmp_qloop_51,ux_dof_0)); + const __m256d tmp_qloop_53 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_42,uy_dof_3),_mm256_mul_pd(tmp_qloop_45,uy_dof_1)),_mm256_mul_pd(tmp_qloop_48,uy_dof_2)),_mm256_mul_pd(tmp_qloop_49,uy_dof_4)),_mm256_mul_pd(tmp_qloop_50,uy_dof_5)),_mm256_mul_pd(tmp_qloop_51,uy_dof_0)); + const __m256d tmp_qloop_56 = _mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),tmp_qloop_37); + const __m256d tmp_qloop_57 = _mm256_mul_pd(tmp_qloop_56,_mm256_set_pd(jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE)); + const __m256d tmp_qloop_58 = _mm256_mul_pd(tmp_qloop_56,_mm256_set_pd(jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE)); + const __m256d tmp_qloop_62 = _mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),tmp_qloop_38); + const __m256d tmp_qloop_63 = _mm256_mul_pd(tmp_qloop_62,_mm256_set_pd(jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE)); + const __m256d tmp_qloop_64 = _mm256_mul_pd(tmp_qloop_62,_mm256_set_pd(jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE)); + const __m256d tmp_qloop_67 = _mm256_mul_pd(tmp_qloop_37,_mm256_set_pd(jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE)); + const __m256d tmp_qloop_68 = _mm256_mul_pd(tmp_qloop_38,_mm256_set_pd(jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE)); + const __m256d tmp_qloop_69 = _mm256_add_pd(tmp_qloop_67,tmp_qloop_68); + const __m256d tmp_qloop_70 = _mm256_mul_pd(tmp_qloop_37,_mm256_set_pd(jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE)); + const __m256d tmp_qloop_71 = _mm256_mul_pd(tmp_qloop_38,_mm256_set_pd(jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE)); + const __m256d tmp_qloop_72 = _mm256_add_pd(tmp_qloop_70,tmp_qloop_71); + const __m256d tmp_qloop_75 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-8.0,-8.0,-8.0,-8.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])),_mm256_mul_pd(tmp_qloop_37,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_set_pd(4.0,4.0,4.0,4.0)); + const __m256d tmp_qloop_76 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_68,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_75,_mm256_set_pd(jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE))); + const __m256d tmp_qloop_77 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_71,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_75,_mm256_set_pd(jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE))); + const __m256d tmp_qloop_80 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-8.0,-8.0,-8.0,-8.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(tmp_qloop_38,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_set_pd(4.0,4.0,4.0,4.0)); + const __m256d tmp_qloop_81 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_67,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_80,_mm256_set_pd(jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE))); + const __m256d tmp_qloop_82 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_70,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_80,_mm256_set_pd(jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE))); + const __m256d jac_blending_0_0 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_0,tmp_qloop_14),_mm256_mul_pd(tmp_qloop_18,tmp_qloop_4)); + const __m256d jac_blending_0_1 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_0,tmp_qloop_19),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_0,tmp_qloop_15),tmp_qloop_16),tmp_qloop_3),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))); + const __m256d jac_blending_1_0 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_14,tmp_qloop_3),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_20,tmp_qloop_3),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))); + const __m256d jac_blending_1_1 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_19,tmp_qloop_3),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_15,tmp_qloop_16),tmp_qloop_2),_mm256_set_pd(1.0,1.0,1.0,1.0))); + const __m256d tmp_qloop_21 = _mm256_add_pd(_mm256_mul_pd(jac_blending_0_0,jac_blending_1_1),_mm256_mul_pd(_mm256_mul_pd(jac_blending_0_1,jac_blending_1_0),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))); + const __m256d tmp_qloop_22 = _mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),tmp_qloop_21); + const __m256d abs_det_jac_blending = tmp_qloop_21; + const __m256d tmp_qloop_55 = _mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(abs_det_jac_blending,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(cp_times_delta_dof_0,tmp_qloop_51),_mm256_mul_pd(cp_times_delta_dof_1,tmp_qloop_45)),_mm256_mul_pd(cp_times_delta_dof_2,tmp_qloop_48)),_mm256_mul_pd(cp_times_delta_dof_3,tmp_qloop_42)),_mm256_mul_pd(cp_times_delta_dof_4,tmp_qloop_49)),_mm256_mul_pd(cp_times_delta_dof_5,tmp_qloop_50))),_mm256_set_pd(_data_q_w[q],_data_q_w[q],_data_q_w[q],_data_q_w[q])),_mm256_set_pd(abs_det_jac_affine_BLUE,abs_det_jac_affine_BLUE,abs_det_jac_affine_BLUE,abs_det_jac_affine_BLUE)); + const __m256d jac_blending_inv_0_0 = _mm256_mul_pd(jac_blending_1_1,tmp_qloop_22); + const __m256d jac_blending_inv_0_1 = _mm256_mul_pd(_mm256_mul_pd(jac_blending_0_1,tmp_qloop_22),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)); + const __m256d jac_blending_inv_1_0 = _mm256_mul_pd(_mm256_mul_pd(jac_blending_1_0,tmp_qloop_22),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)); + const __m256d jac_blending_inv_1_1 = _mm256_mul_pd(jac_blending_0_0,tmp_qloop_22); + const __m256d tmp_qloop_54 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_52,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,tmp_qloop_40),_mm256_mul_pd(jac_blending_inv_1_0,tmp_qloop_41))),_mm256_mul_pd(tmp_qloop_53,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,tmp_qloop_40),_mm256_mul_pd(jac_blending_inv_1_1,tmp_qloop_41)))); + const __m256d tmp_qloop_59 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_52,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,tmp_qloop_57),_mm256_mul_pd(jac_blending_inv_1_0,tmp_qloop_58))),_mm256_mul_pd(tmp_qloop_53,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,tmp_qloop_57),_mm256_mul_pd(jac_blending_inv_1_1,tmp_qloop_58)))); + const __m256d tmp_qloop_60 = _mm256_mul_pd(tmp_qloop_54,tmp_qloop_55); + const __m256d tmp_qloop_61 = _mm256_mul_pd(tmp_qloop_59,tmp_qloop_60); + const __m256d tmp_qloop_65 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_52,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,tmp_qloop_63),_mm256_mul_pd(jac_blending_inv_1_0,tmp_qloop_64))),_mm256_mul_pd(tmp_qloop_53,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,tmp_qloop_63),_mm256_mul_pd(jac_blending_inv_1_1,tmp_qloop_64)))); + const __m256d tmp_qloop_66 = _mm256_mul_pd(tmp_qloop_60,tmp_qloop_65); + const __m256d tmp_qloop_73 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_52,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,tmp_qloop_69),_mm256_mul_pd(jac_blending_inv_1_0,tmp_qloop_72))),_mm256_mul_pd(tmp_qloop_53,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,tmp_qloop_69),_mm256_mul_pd(jac_blending_inv_1_1,tmp_qloop_72)))); + const __m256d tmp_qloop_74 = _mm256_mul_pd(tmp_qloop_60,tmp_qloop_73); + const __m256d tmp_qloop_78 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_52,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,tmp_qloop_76),_mm256_mul_pd(jac_blending_inv_1_0,tmp_qloop_77))),_mm256_mul_pd(tmp_qloop_53,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,tmp_qloop_76),_mm256_mul_pd(jac_blending_inv_1_1,tmp_qloop_77)))); + const __m256d tmp_qloop_79 = _mm256_mul_pd(tmp_qloop_60,tmp_qloop_78); + const __m256d tmp_qloop_83 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_52,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,tmp_qloop_81),_mm256_mul_pd(jac_blending_inv_1_0,tmp_qloop_82))),_mm256_mul_pd(tmp_qloop_53,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,tmp_qloop_81),_mm256_mul_pd(jac_blending_inv_1_1,tmp_qloop_82)))); + const __m256d tmp_qloop_84 = _mm256_mul_pd(tmp_qloop_60,tmp_qloop_83); + const __m256d tmp_qloop_85 = _mm256_mul_pd(tmp_qloop_55,tmp_qloop_59); + const __m256d tmp_qloop_86 = _mm256_mul_pd(tmp_qloop_65,tmp_qloop_85); + const __m256d tmp_qloop_87 = _mm256_mul_pd(tmp_qloop_73,tmp_qloop_85); + const __m256d tmp_qloop_88 = _mm256_mul_pd(tmp_qloop_78,tmp_qloop_85); + const __m256d tmp_qloop_89 = _mm256_mul_pd(tmp_qloop_83,tmp_qloop_85); + const __m256d tmp_qloop_90 = _mm256_mul_pd(tmp_qloop_55,tmp_qloop_65); + const __m256d tmp_qloop_91 = _mm256_mul_pd(tmp_qloop_73,tmp_qloop_90); + const __m256d tmp_qloop_92 = _mm256_mul_pd(tmp_qloop_78,tmp_qloop_90); + const __m256d tmp_qloop_93 = _mm256_mul_pd(tmp_qloop_83,tmp_qloop_90); + const __m256d tmp_qloop_94 = _mm256_mul_pd(tmp_qloop_55,tmp_qloop_73); + const __m256d tmp_qloop_95 = _mm256_mul_pd(tmp_qloop_78,tmp_qloop_94); + const __m256d tmp_qloop_96 = _mm256_mul_pd(tmp_qloop_83,tmp_qloop_94); + const __m256d tmp_qloop_97 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_55,tmp_qloop_78),tmp_qloop_83); + const __m256d hessian_blending_0_0_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_25,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),tmp_qloop_14),tmp_qloop_28); + const __m256d hessian_blending_1_0_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_31,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_29,tmp_qloop_4)),_mm256_mul_pd(tmp_qloop_3,tmp_qloop_32)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_26,_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_3,tmp_qloop_3),tmp_qloop_3)),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))); + const __m256d hessian_blending_0_0_1 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_34,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_15,tmp_qloop_30),_mm256_set_pd(-2.0,-2.0,-2.0,-2.0)),_mm256_set_pd(tmp_qloop_1,tmp_qloop_1,tmp_qloop_1,tmp_qloop_1)),_mm256_set_pd(tmp_qloop_11,tmp_qloop_11,tmp_qloop_11,tmp_qloop_11))); + const __m256d hessian_blending_1_0_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_20,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_28,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(tmp_qloop_35,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_6,_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_1,tmp_qloop_1,tmp_qloop_1,tmp_qloop_1)),_mm256_set_pd(tmp_qloop_10,tmp_qloop_10,tmp_qloop_10,tmp_qloop_10)),_mm256_set_pd(tmp_qloop_9,tmp_qloop_9,tmp_qloop_9,tmp_qloop_9))); + const __m256d hessian_blending_0_1_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_19,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_31,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(tmp_qloop_34,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_15,tmp_qloop_2),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_10,tmp_qloop_10,tmp_qloop_10,tmp_qloop_10)),_mm256_set_pd(tmp_qloop_8,tmp_qloop_8,tmp_qloop_8,tmp_qloop_8)),_mm256_set_pd(tmp_qloop_9,tmp_qloop_9,tmp_qloop_9,tmp_qloop_9))); + const __m256d hessian_blending_1_1_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_20,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_35,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_27),tmp_qloop_36); + const __m256d hessian_blending_0_1_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_0,tmp_qloop_32),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_0,tmp_qloop_0),tmp_qloop_0),tmp_qloop_26),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_25),tmp_qloop_36); + const __m256d hessian_blending_1_1_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_19,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_33,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_15,tmp_qloop_2),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_10,tmp_qloop_10,tmp_qloop_10,tmp_qloop_10)),_mm256_set_pd(tmp_qloop_7,tmp_qloop_7,tmp_qloop_7,tmp_qloop_7)),_mm256_set_pd(tmp_qloop_9,tmp_qloop_9,tmp_qloop_9,tmp_qloop_9))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_15,tmp_qloop_4),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_10,tmp_qloop_10,tmp_qloop_10,tmp_qloop_10)),_mm256_set_pd(tmp_qloop_8,tmp_qloop_8,tmp_qloop_8,tmp_qloop_8)),_mm256_set_pd(tmp_qloop_9,tmp_qloop_9,tmp_qloop_9,tmp_qloop_9))); + const __m256d q_tmp_0_0 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_54,tmp_qloop_54),tmp_qloop_55); + const __m256d q_tmp_0_1 = tmp_qloop_61; + const __m256d q_tmp_0_2 = tmp_qloop_66; + const __m256d q_tmp_0_3 = tmp_qloop_74; + const __m256d q_tmp_0_4 = tmp_qloop_79; + const __m256d q_tmp_0_5 = tmp_qloop_84; + const __m256d q_tmp_1_0 = tmp_qloop_61; + const __m256d q_tmp_1_1 = _mm256_mul_pd(tmp_qloop_55,_mm256_mul_pd(tmp_qloop_59,tmp_qloop_59)); + const __m256d q_tmp_1_2 = tmp_qloop_86; + const __m256d q_tmp_1_3 = tmp_qloop_87; + const __m256d q_tmp_1_4 = tmp_qloop_88; + const __m256d q_tmp_1_5 = tmp_qloop_89; + const __m256d q_tmp_2_0 = tmp_qloop_66; + const __m256d q_tmp_2_1 = tmp_qloop_86; + const __m256d q_tmp_2_2 = _mm256_mul_pd(tmp_qloop_55,_mm256_mul_pd(tmp_qloop_65,tmp_qloop_65)); + const __m256d q_tmp_2_3 = tmp_qloop_91; + const __m256d q_tmp_2_4 = tmp_qloop_92; + const __m256d q_tmp_2_5 = tmp_qloop_93; + const __m256d q_tmp_3_0 = tmp_qloop_74; + const __m256d q_tmp_3_1 = tmp_qloop_87; + const __m256d q_tmp_3_2 = tmp_qloop_91; + const __m256d q_tmp_3_3 = _mm256_mul_pd(tmp_qloop_55,_mm256_mul_pd(tmp_qloop_73,tmp_qloop_73)); + const __m256d q_tmp_3_4 = tmp_qloop_95; + const __m256d q_tmp_3_5 = tmp_qloop_96; + const __m256d q_tmp_4_0 = tmp_qloop_79; + const __m256d q_tmp_4_1 = tmp_qloop_88; + const __m256d q_tmp_4_2 = tmp_qloop_92; + const __m256d q_tmp_4_3 = tmp_qloop_95; + const __m256d q_tmp_4_4 = _mm256_mul_pd(tmp_qloop_55,_mm256_mul_pd(tmp_qloop_78,tmp_qloop_78)); + const __m256d q_tmp_4_5 = tmp_qloop_97; + const __m256d q_tmp_5_0 = tmp_qloop_84; + const __m256d q_tmp_5_1 = tmp_qloop_89; + const __m256d q_tmp_5_2 = tmp_qloop_93; + const __m256d q_tmp_5_3 = tmp_qloop_96; + const __m256d q_tmp_5_4 = tmp_qloop_97; + const __m256d q_tmp_5_5 = _mm256_mul_pd(tmp_qloop_55,_mm256_mul_pd(tmp_qloop_83,tmp_qloop_83)); + q_acc_0_0 = _mm256_add_pd(q_acc_0_0,q_tmp_0_0); + q_acc_0_1 = _mm256_add_pd(q_acc_0_1,q_tmp_0_1); + q_acc_0_2 = _mm256_add_pd(q_acc_0_2,q_tmp_0_2); + q_acc_0_3 = _mm256_add_pd(q_acc_0_3,q_tmp_0_3); + q_acc_0_4 = _mm256_add_pd(q_acc_0_4,q_tmp_0_4); + q_acc_0_5 = _mm256_add_pd(q_acc_0_5,q_tmp_0_5); + q_acc_1_0 = _mm256_add_pd(q_acc_1_0,q_tmp_1_0); + q_acc_1_1 = _mm256_add_pd(q_acc_1_1,q_tmp_1_1); + q_acc_1_2 = _mm256_add_pd(q_acc_1_2,q_tmp_1_2); + q_acc_1_3 = _mm256_add_pd(q_acc_1_3,q_tmp_1_3); + q_acc_1_4 = _mm256_add_pd(q_acc_1_4,q_tmp_1_4); + q_acc_1_5 = _mm256_add_pd(q_acc_1_5,q_tmp_1_5); + q_acc_2_0 = _mm256_add_pd(q_acc_2_0,q_tmp_2_0); + q_acc_2_1 = _mm256_add_pd(q_acc_2_1,q_tmp_2_1); + q_acc_2_2 = _mm256_add_pd(q_acc_2_2,q_tmp_2_2); + q_acc_2_3 = _mm256_add_pd(q_acc_2_3,q_tmp_2_3); + q_acc_2_4 = _mm256_add_pd(q_acc_2_4,q_tmp_2_4); + q_acc_2_5 = _mm256_add_pd(q_acc_2_5,q_tmp_2_5); + q_acc_3_0 = _mm256_add_pd(q_acc_3_0,q_tmp_3_0); + q_acc_3_1 = _mm256_add_pd(q_acc_3_1,q_tmp_3_1); + q_acc_3_2 = _mm256_add_pd(q_acc_3_2,q_tmp_3_2); + q_acc_3_3 = _mm256_add_pd(q_acc_3_3,q_tmp_3_3); + q_acc_3_4 = _mm256_add_pd(q_acc_3_4,q_tmp_3_4); + q_acc_3_5 = _mm256_add_pd(q_acc_3_5,q_tmp_3_5); + q_acc_4_0 = _mm256_add_pd(q_acc_4_0,q_tmp_4_0); + q_acc_4_1 = _mm256_add_pd(q_acc_4_1,q_tmp_4_1); + q_acc_4_2 = _mm256_add_pd(q_acc_4_2,q_tmp_4_2); + q_acc_4_3 = _mm256_add_pd(q_acc_4_3,q_tmp_4_3); + q_acc_4_4 = _mm256_add_pd(q_acc_4_4,q_tmp_4_4); + q_acc_4_5 = _mm256_add_pd(q_acc_4_5,q_tmp_4_5); + q_acc_5_0 = _mm256_add_pd(q_acc_5_0,q_tmp_5_0); + q_acc_5_1 = _mm256_add_pd(q_acc_5_1,q_tmp_5_1); + q_acc_5_2 = _mm256_add_pd(q_acc_5_2,q_tmp_5_2); + q_acc_5_3 = _mm256_add_pd(q_acc_5_3,q_tmp_5_3); + q_acc_5_4 = _mm256_add_pd(q_acc_5_4,q_tmp_5_4); + q_acc_5_5 = _mm256_add_pd(q_acc_5_5,q_tmp_5_5); + } + const __m256d elMatVec_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_0_0,src_dof_0),_mm256_mul_pd(q_acc_0_1,src_dof_1)),_mm256_mul_pd(q_acc_0_2,src_dof_2)),_mm256_mul_pd(q_acc_0_3,src_dof_3)),_mm256_mul_pd(q_acc_0_4,src_dof_4)),_mm256_mul_pd(q_acc_0_5,src_dof_5)); + const __m256d elMatVec_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_1_0,src_dof_0),_mm256_mul_pd(q_acc_1_1,src_dof_1)),_mm256_mul_pd(q_acc_1_2,src_dof_2)),_mm256_mul_pd(q_acc_1_3,src_dof_3)),_mm256_mul_pd(q_acc_1_4,src_dof_4)),_mm256_mul_pd(q_acc_1_5,src_dof_5)); + const __m256d elMatVec_2 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_2_0,src_dof_0),_mm256_mul_pd(q_acc_2_1,src_dof_1)),_mm256_mul_pd(q_acc_2_2,src_dof_2)),_mm256_mul_pd(q_acc_2_3,src_dof_3)),_mm256_mul_pd(q_acc_2_4,src_dof_4)),_mm256_mul_pd(q_acc_2_5,src_dof_5)); + const __m256d elMatVec_3 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_3_0,src_dof_0),_mm256_mul_pd(q_acc_3_1,src_dof_1)),_mm256_mul_pd(q_acc_3_2,src_dof_2)),_mm256_mul_pd(q_acc_3_3,src_dof_3)),_mm256_mul_pd(q_acc_3_4,src_dof_4)),_mm256_mul_pd(q_acc_3_5,src_dof_5)); + const __m256d elMatVec_4 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_4_0,src_dof_0),_mm256_mul_pd(q_acc_4_1,src_dof_1)),_mm256_mul_pd(q_acc_4_2,src_dof_2)),_mm256_mul_pd(q_acc_4_3,src_dof_3)),_mm256_mul_pd(q_acc_4_4,src_dof_4)),_mm256_mul_pd(q_acc_4_5,src_dof_5)); + const __m256d elMatVec_5 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_5_0,src_dof_0),_mm256_mul_pd(q_acc_5_1,src_dof_1)),_mm256_mul_pd(q_acc_5_2,src_dof_2)),_mm256_mul_pd(q_acc_5_3,src_dof_3)),_mm256_mul_pd(q_acc_5_4,src_dof_4)),_mm256_mul_pd(q_acc_5_5,src_dof_5)); + _mm256_storeu_pd(&_data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1],_mm256_add_pd(elMatVec_0,_mm256_loadu_pd(& _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]))); + _mm256_storeu_pd(&_data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))],_mm256_add_pd(elMatVec_1,_mm256_loadu_pd(& _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]))); + _mm256_storeu_pd(&_data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1],_mm256_add_pd(elMatVec_2,_mm256_loadu_pd(& _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]))); + _mm256_storeu_pd(&_data_dstEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))],_mm256_add_pd(elMatVec_3,_mm256_loadu_pd(& _data_dstEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]))); + _mm256_storeu_pd(&_data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1],_mm256_add_pd(elMatVec_4,_mm256_loadu_pd(& _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]))); + _mm256_storeu_pd(&_data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))],_mm256_add_pd(elMatVec_5,_mm256_loadu_pd(& _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]))); + } + for (int64_t ctr_0 = (int64_t)((-ctr_1 + micro_edges_per_macro_edge - 1) / (4)) * (4); ctr_0 < -ctr_1 + micro_edges_per_macro_edge - 1; ctr_0 += 1) + { + + const int64_t phantom_ctr_0 = ctr_0; + real_t _data_float_loop_ctr_array_dim_0[4]; + _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0; + _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1; + _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2; + _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3; + real_t _data_float_loop_ctr_array_dim_1[4]; + _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1; + + const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; + const real_t src_dof_0 = _data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + const real_t src_dof_1 = _data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t src_dof_2 = _data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]; + const real_t src_dof_3 = _data_srcEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t src_dof_4 = _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]; + const real_t src_dof_5 = _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t cp_times_delta_dof_0 = _data_cp_times_deltaVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + const real_t cp_times_delta_dof_1 = _data_cp_times_deltaVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t cp_times_delta_dof_2 = _data_cp_times_deltaVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]; + const real_t cp_times_delta_dof_3 = _data_cp_times_deltaEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t cp_times_delta_dof_4 = _data_cp_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]; + const real_t cp_times_delta_dof_5 = _data_cp_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t ux_dof_0 = _data_uxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + const real_t ux_dof_1 = _data_uxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t ux_dof_2 = _data_uxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]; + const real_t ux_dof_3 = _data_uxEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t ux_dof_4 = _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]; + const real_t ux_dof_5 = _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t uy_dof_0 = _data_uyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + const real_t uy_dof_1 = _data_uyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t uy_dof_2 = _data_uyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]; + const real_t uy_dof_3 = _data_uyEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t uy_dof_4 = _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]; + const real_t uy_dof_5 = _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + real_t q_acc_0_0 = 0.0; + real_t q_acc_0_1 = 0.0; + real_t q_acc_0_2 = 0.0; + real_t q_acc_0_3 = 0.0; + real_t q_acc_0_4 = 0.0; + real_t q_acc_0_5 = 0.0; + real_t q_acc_1_0 = 0.0; + real_t q_acc_1_1 = 0.0; + real_t q_acc_1_2 = 0.0; + real_t q_acc_1_3 = 0.0; + real_t q_acc_1_4 = 0.0; + real_t q_acc_1_5 = 0.0; + real_t q_acc_2_0 = 0.0; + real_t q_acc_2_1 = 0.0; + real_t q_acc_2_2 = 0.0; + real_t q_acc_2_3 = 0.0; + real_t q_acc_2_4 = 0.0; + real_t q_acc_2_5 = 0.0; + real_t q_acc_3_0 = 0.0; + real_t q_acc_3_1 = 0.0; + real_t q_acc_3_2 = 0.0; + real_t q_acc_3_3 = 0.0; + real_t q_acc_3_4 = 0.0; + real_t q_acc_3_5 = 0.0; + real_t q_acc_4_0 = 0.0; + real_t q_acc_4_1 = 0.0; + real_t q_acc_4_2 = 0.0; + real_t q_acc_4_3 = 0.0; + real_t q_acc_4_4 = 0.0; + real_t q_acc_4_5 = 0.0; + real_t q_acc_5_0 = 0.0; + real_t q_acc_5_1 = 0.0; + real_t q_acc_5_2 = 0.0; + real_t q_acc_5_3 = 0.0; + real_t q_acc_5_4 = 0.0; + real_t q_acc_5_5 = 0.0; + for (int64_t q = 0; q < 4; q += 1) + { + const real_t tmp_qloop_0 = p_affine_0_0 + (-p_affine_0_0 + p_affine_1_0)*_data_q_p_0[q] + (-p_affine_0_0 + p_affine_2_0)*_data_q_p_1[q]; + const real_t tmp_qloop_2 = (tmp_qloop_0*tmp_qloop_0); + const real_t tmp_qloop_3 = p_affine_0_1 + (-p_affine_0_1 + p_affine_1_1)*_data_q_p_0[q] + (-p_affine_0_1 + p_affine_2_1)*_data_q_p_1[q]; + const real_t tmp_qloop_4 = (tmp_qloop_3*tmp_qloop_3); + const real_t tmp_qloop_5 = tmp_qloop_2 + tmp_qloop_4; + const real_t tmp_qloop_6 = pow(tmp_qloop_5, -0.50000000000000000); + const real_t tmp_qloop_13 = tmp_qloop_12*tmp_qloop_6; + const real_t tmp_qloop_14 = tmp_qloop_1*tmp_qloop_13; + const real_t tmp_qloop_15 = pow(tmp_qloop_5, -1.5000000000000000); + const real_t tmp_qloop_16 = radRayVertex + tmp_qloop_11*(tmp_qloop_1*(-rayVertex_0 + tmp_qloop_0) - tmp_qloop_8*(-rayVertex_1 + tmp_qloop_3)); + const real_t tmp_qloop_17 = tmp_qloop_15*tmp_qloop_16; + const real_t tmp_qloop_18 = tmp_qloop_17*1.0; + const real_t tmp_qloop_19 = tmp_qloop_13*tmp_qloop_8; + const real_t tmp_qloop_20 = tmp_qloop_0*tmp_qloop_18; + const real_t tmp_qloop_23 = tmp_qloop_12*tmp_qloop_15; + const real_t tmp_qloop_24 = tmp_qloop_1*tmp_qloop_23; + const real_t tmp_qloop_25 = tmp_qloop_2*tmp_qloop_24; + const real_t tmp_qloop_26 = tmp_qloop_16*pow(tmp_qloop_5, -2.5000000000000000)*3.0; + const real_t tmp_qloop_27 = tmp_qloop_0*tmp_qloop_26*tmp_qloop_4; + const real_t tmp_qloop_28 = tmp_qloop_24*tmp_qloop_4 - tmp_qloop_27; + const real_t tmp_qloop_29 = tmp_qloop_23*tmp_qloop_7; + const real_t tmp_qloop_30 = tmp_qloop_0*tmp_qloop_3; + const real_t tmp_qloop_31 = tmp_qloop_24*tmp_qloop_30; + const real_t tmp_qloop_32 = tmp_qloop_17*2.0; + const real_t tmp_qloop_33 = tmp_qloop_2*tmp_qloop_26*tmp_qloop_3; + const real_t tmp_qloop_34 = tmp_qloop_18*tmp_qloop_3 - tmp_qloop_33; + const real_t tmp_qloop_35 = tmp_qloop_29*tmp_qloop_30; + const real_t tmp_qloop_36 = tmp_qloop_23*tmp_qloop_30*tmp_qloop_8; + const real_t tmp_qloop_37 = 4.0*_data_q_p_0[q]; + const real_t tmp_qloop_38 = 4.0*_data_q_p_1[q]; + const real_t tmp_qloop_39 = tmp_qloop_37 + tmp_qloop_38 - 3.0; + const real_t tmp_qloop_40 = jac_affine_inv_0_0_BLUE*tmp_qloop_39 + jac_affine_inv_1_0_BLUE*tmp_qloop_39; + const real_t tmp_qloop_41 = jac_affine_inv_0_1_BLUE*tmp_qloop_39 + jac_affine_inv_1_1_BLUE*tmp_qloop_39; + const real_t tmp_qloop_42 = tmp_qloop_37*_data_q_p_1[q]; + const real_t tmp_qloop_43 = (_data_q_p_0[q]*_data_q_p_0[q]); + const real_t tmp_qloop_44 = tmp_qloop_43*2.0; + const real_t tmp_qloop_45 = tmp_qloop_44 - _data_q_p_0[q]; + const real_t tmp_qloop_46 = (_data_q_p_1[q]*_data_q_p_1[q]); + const real_t tmp_qloop_47 = tmp_qloop_46*2.0; + const real_t tmp_qloop_48 = tmp_qloop_47 - _data_q_p_1[q]; + const real_t tmp_qloop_49 = tmp_qloop_38 - tmp_qloop_42 + tmp_qloop_46*-4.0; + const real_t tmp_qloop_50 = tmp_qloop_37 - tmp_qloop_42 + tmp_qloop_43*-4.0; + const real_t tmp_qloop_51 = tmp_qloop_42 + tmp_qloop_44 + tmp_qloop_47 - 3.0*_data_q_p_0[q] - 3.0*_data_q_p_1[q] + 1.0; + const real_t tmp_qloop_52 = tmp_qloop_42*ux_dof_3 + tmp_qloop_45*ux_dof_1 + tmp_qloop_48*ux_dof_2 + tmp_qloop_49*ux_dof_4 + tmp_qloop_50*ux_dof_5 + tmp_qloop_51*ux_dof_0; + const real_t tmp_qloop_53 = tmp_qloop_42*uy_dof_3 + tmp_qloop_45*uy_dof_1 + tmp_qloop_48*uy_dof_2 + tmp_qloop_49*uy_dof_4 + tmp_qloop_50*uy_dof_5 + tmp_qloop_51*uy_dof_0; + const real_t tmp_qloop_56 = tmp_qloop_37 - 1.0; + const real_t tmp_qloop_57 = jac_affine_inv_0_0_BLUE*tmp_qloop_56; + const real_t tmp_qloop_58 = jac_affine_inv_0_1_BLUE*tmp_qloop_56; + const real_t tmp_qloop_62 = tmp_qloop_38 - 1.0; + const real_t tmp_qloop_63 = jac_affine_inv_1_0_BLUE*tmp_qloop_62; + const real_t tmp_qloop_64 = jac_affine_inv_1_1_BLUE*tmp_qloop_62; + const real_t tmp_qloop_67 = jac_affine_inv_1_0_BLUE*tmp_qloop_37; + const real_t tmp_qloop_68 = jac_affine_inv_0_0_BLUE*tmp_qloop_38; + const real_t tmp_qloop_69 = tmp_qloop_67 + tmp_qloop_68; + const real_t tmp_qloop_70 = jac_affine_inv_1_1_BLUE*tmp_qloop_37; + const real_t tmp_qloop_71 = jac_affine_inv_0_1_BLUE*tmp_qloop_38; + const real_t tmp_qloop_72 = tmp_qloop_70 + tmp_qloop_71; + const real_t tmp_qloop_75 = -tmp_qloop_37 - 8.0*_data_q_p_1[q] + 4.0; + const real_t tmp_qloop_76 = jac_affine_inv_1_0_BLUE*tmp_qloop_75 - tmp_qloop_68; + const real_t tmp_qloop_77 = jac_affine_inv_1_1_BLUE*tmp_qloop_75 - tmp_qloop_71; + const real_t tmp_qloop_80 = -tmp_qloop_38 - 8.0*_data_q_p_0[q] + 4.0; + const real_t tmp_qloop_81 = jac_affine_inv_0_0_BLUE*tmp_qloop_80 - tmp_qloop_67; + const real_t tmp_qloop_82 = jac_affine_inv_0_1_BLUE*tmp_qloop_80 - tmp_qloop_70; + const real_t jac_blending_0_0 = tmp_qloop_0*tmp_qloop_14 + tmp_qloop_18*tmp_qloop_4; + const real_t jac_blending_0_1 = -tmp_qloop_0*tmp_qloop_15*tmp_qloop_16*tmp_qloop_3 - tmp_qloop_0*tmp_qloop_19; + const real_t jac_blending_1_0 = tmp_qloop_14*tmp_qloop_3 - tmp_qloop_20*tmp_qloop_3; + const real_t jac_blending_1_1 = tmp_qloop_15*tmp_qloop_16*tmp_qloop_2*1.0 - tmp_qloop_19*tmp_qloop_3; + const real_t tmp_qloop_21 = jac_blending_0_0*jac_blending_1_1 - jac_blending_0_1*jac_blending_1_0; + const real_t tmp_qloop_22 = 1.0 / (tmp_qloop_21); + const real_t abs_det_jac_blending = tmp_qloop_21; + const real_t tmp_qloop_55 = abs_det_jac_affine_BLUE*abs_det_jac_blending*(cp_times_delta_dof_0*tmp_qloop_51 + cp_times_delta_dof_1*tmp_qloop_45 + cp_times_delta_dof_2*tmp_qloop_48 + cp_times_delta_dof_3*tmp_qloop_42 + cp_times_delta_dof_4*tmp_qloop_49 + cp_times_delta_dof_5*tmp_qloop_50)*_data_q_w[q]; + const real_t jac_blending_inv_0_0 = jac_blending_1_1*tmp_qloop_22; + const real_t jac_blending_inv_0_1 = -jac_blending_0_1*tmp_qloop_22; + const real_t jac_blending_inv_1_0 = -jac_blending_1_0*tmp_qloop_22; + const real_t jac_blending_inv_1_1 = jac_blending_0_0*tmp_qloop_22; + const real_t tmp_qloop_54 = tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_40 + jac_blending_inv_1_0*tmp_qloop_41) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_40 + jac_blending_inv_1_1*tmp_qloop_41); + const real_t tmp_qloop_59 = tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_57 + jac_blending_inv_1_0*tmp_qloop_58) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_57 + jac_blending_inv_1_1*tmp_qloop_58); + const real_t tmp_qloop_60 = tmp_qloop_54*tmp_qloop_55; + const real_t tmp_qloop_61 = tmp_qloop_59*tmp_qloop_60; + const real_t tmp_qloop_65 = tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_63 + jac_blending_inv_1_0*tmp_qloop_64) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_63 + jac_blending_inv_1_1*tmp_qloop_64); + const real_t tmp_qloop_66 = tmp_qloop_60*tmp_qloop_65; + const real_t tmp_qloop_73 = tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_69 + jac_blending_inv_1_0*tmp_qloop_72) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_69 + jac_blending_inv_1_1*tmp_qloop_72); + const real_t tmp_qloop_74 = tmp_qloop_60*tmp_qloop_73; + const real_t tmp_qloop_78 = tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_76 + jac_blending_inv_1_0*tmp_qloop_77) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_76 + jac_blending_inv_1_1*tmp_qloop_77); + const real_t tmp_qloop_79 = tmp_qloop_60*tmp_qloop_78; + const real_t tmp_qloop_83 = tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_81 + jac_blending_inv_1_0*tmp_qloop_82) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_81 + jac_blending_inv_1_1*tmp_qloop_82); + const real_t tmp_qloop_84 = tmp_qloop_60*tmp_qloop_83; + const real_t tmp_qloop_85 = tmp_qloop_55*tmp_qloop_59; + const real_t tmp_qloop_86 = tmp_qloop_65*tmp_qloop_85; + const real_t tmp_qloop_87 = tmp_qloop_73*tmp_qloop_85; + const real_t tmp_qloop_88 = tmp_qloop_78*tmp_qloop_85; + const real_t tmp_qloop_89 = tmp_qloop_83*tmp_qloop_85; + const real_t tmp_qloop_90 = tmp_qloop_55*tmp_qloop_65; + const real_t tmp_qloop_91 = tmp_qloop_73*tmp_qloop_90; + const real_t tmp_qloop_92 = tmp_qloop_78*tmp_qloop_90; + const real_t tmp_qloop_93 = tmp_qloop_83*tmp_qloop_90; + const real_t tmp_qloop_94 = tmp_qloop_55*tmp_qloop_73; + const real_t tmp_qloop_95 = tmp_qloop_78*tmp_qloop_94; + const real_t tmp_qloop_96 = tmp_qloop_83*tmp_qloop_94; + const real_t tmp_qloop_97 = tmp_qloop_55*tmp_qloop_78*tmp_qloop_83; + const real_t hessian_blending_0_0_0 = tmp_qloop_14 - tmp_qloop_25 + tmp_qloop_28; + const real_t hessian_blending_1_0_0 = -tmp_qloop_26*(tmp_qloop_3*tmp_qloop_3*tmp_qloop_3) + tmp_qloop_29*tmp_qloop_4 + tmp_qloop_3*tmp_qloop_32 - tmp_qloop_31; + const real_t hessian_blending_0_0_1 = tmp_qloop_1*tmp_qloop_11*tmp_qloop_15*tmp_qloop_30*-2.0 - tmp_qloop_34; + const real_t hessian_blending_1_0_1 = tmp_qloop_1*tmp_qloop_10*tmp_qloop_6*tmp_qloop_9*1.0 - tmp_qloop_20 - tmp_qloop_28 - tmp_qloop_35; + const real_t hessian_blending_0_1_0 = tmp_qloop_10*tmp_qloop_15*tmp_qloop_2*tmp_qloop_8*tmp_qloop_9*1.0 - tmp_qloop_19 - tmp_qloop_31 - tmp_qloop_34; + const real_t hessian_blending_1_1_0 = -tmp_qloop_20 + tmp_qloop_27 - tmp_qloop_35 + tmp_qloop_36; + const real_t hessian_blending_0_1_1 = tmp_qloop_0*tmp_qloop_32 - (tmp_qloop_0*tmp_qloop_0*tmp_qloop_0)*tmp_qloop_26 + tmp_qloop_25 + tmp_qloop_36; + const real_t hessian_blending_1_1_1 = tmp_qloop_10*tmp_qloop_15*tmp_qloop_2*tmp_qloop_7*tmp_qloop_9*1.0 + tmp_qloop_10*tmp_qloop_15*tmp_qloop_4*tmp_qloop_8*tmp_qloop_9*1.0 - tmp_qloop_19 - tmp_qloop_33; + const real_t q_tmp_0_0 = (tmp_qloop_54*tmp_qloop_54)*tmp_qloop_55; + const real_t q_tmp_0_1 = tmp_qloop_61; + const real_t q_tmp_0_2 = tmp_qloop_66; + const real_t q_tmp_0_3 = tmp_qloop_74; + const real_t q_tmp_0_4 = tmp_qloop_79; + const real_t q_tmp_0_5 = tmp_qloop_84; + const real_t q_tmp_1_0 = tmp_qloop_61; + const real_t q_tmp_1_1 = tmp_qloop_55*(tmp_qloop_59*tmp_qloop_59); + const real_t q_tmp_1_2 = tmp_qloop_86; + const real_t q_tmp_1_3 = tmp_qloop_87; + const real_t q_tmp_1_4 = tmp_qloop_88; + const real_t q_tmp_1_5 = tmp_qloop_89; + const real_t q_tmp_2_0 = tmp_qloop_66; + const real_t q_tmp_2_1 = tmp_qloop_86; + const real_t q_tmp_2_2 = tmp_qloop_55*(tmp_qloop_65*tmp_qloop_65); + const real_t q_tmp_2_3 = tmp_qloop_91; + const real_t q_tmp_2_4 = tmp_qloop_92; + const real_t q_tmp_2_5 = tmp_qloop_93; + const real_t q_tmp_3_0 = tmp_qloop_74; + const real_t q_tmp_3_1 = tmp_qloop_87; + const real_t q_tmp_3_2 = tmp_qloop_91; + const real_t q_tmp_3_3 = tmp_qloop_55*(tmp_qloop_73*tmp_qloop_73); + const real_t q_tmp_3_4 = tmp_qloop_95; + const real_t q_tmp_3_5 = tmp_qloop_96; + const real_t q_tmp_4_0 = tmp_qloop_79; + const real_t q_tmp_4_1 = tmp_qloop_88; + const real_t q_tmp_4_2 = tmp_qloop_92; + const real_t q_tmp_4_3 = tmp_qloop_95; + const real_t q_tmp_4_4 = tmp_qloop_55*(tmp_qloop_78*tmp_qloop_78); + const real_t q_tmp_4_5 = tmp_qloop_97; + const real_t q_tmp_5_0 = tmp_qloop_84; + const real_t q_tmp_5_1 = tmp_qloop_89; + const real_t q_tmp_5_2 = tmp_qloop_93; + const real_t q_tmp_5_3 = tmp_qloop_96; + const real_t q_tmp_5_4 = tmp_qloop_97; + const real_t q_tmp_5_5 = tmp_qloop_55*(tmp_qloop_83*tmp_qloop_83); + q_acc_0_0 = q_acc_0_0 + q_tmp_0_0; + q_acc_0_1 = q_acc_0_1 + q_tmp_0_1; + q_acc_0_2 = q_acc_0_2 + q_tmp_0_2; + q_acc_0_3 = q_acc_0_3 + q_tmp_0_3; + q_acc_0_4 = q_acc_0_4 + q_tmp_0_4; + q_acc_0_5 = q_acc_0_5 + q_tmp_0_5; + q_acc_1_0 = q_acc_1_0 + q_tmp_1_0; + q_acc_1_1 = q_acc_1_1 + q_tmp_1_1; + q_acc_1_2 = q_acc_1_2 + q_tmp_1_2; + q_acc_1_3 = q_acc_1_3 + q_tmp_1_3; + q_acc_1_4 = q_acc_1_4 + q_tmp_1_4; + q_acc_1_5 = q_acc_1_5 + q_tmp_1_5; + q_acc_2_0 = q_acc_2_0 + q_tmp_2_0; + q_acc_2_1 = q_acc_2_1 + q_tmp_2_1; + q_acc_2_2 = q_acc_2_2 + q_tmp_2_2; + q_acc_2_3 = q_acc_2_3 + q_tmp_2_3; + q_acc_2_4 = q_acc_2_4 + q_tmp_2_4; + q_acc_2_5 = q_acc_2_5 + q_tmp_2_5; + q_acc_3_0 = q_acc_3_0 + q_tmp_3_0; + q_acc_3_1 = q_acc_3_1 + q_tmp_3_1; + q_acc_3_2 = q_acc_3_2 + q_tmp_3_2; + q_acc_3_3 = q_acc_3_3 + q_tmp_3_3; + q_acc_3_4 = q_acc_3_4 + q_tmp_3_4; + q_acc_3_5 = q_acc_3_5 + q_tmp_3_5; + q_acc_4_0 = q_acc_4_0 + q_tmp_4_0; + q_acc_4_1 = q_acc_4_1 + q_tmp_4_1; + q_acc_4_2 = q_acc_4_2 + q_tmp_4_2; + q_acc_4_3 = q_acc_4_3 + q_tmp_4_3; + q_acc_4_4 = q_acc_4_4 + q_tmp_4_4; + q_acc_4_5 = q_acc_4_5 + q_tmp_4_5; + q_acc_5_0 = q_acc_5_0 + q_tmp_5_0; + q_acc_5_1 = q_acc_5_1 + q_tmp_5_1; + q_acc_5_2 = q_acc_5_2 + q_tmp_5_2; + q_acc_5_3 = q_acc_5_3 + q_tmp_5_3; + q_acc_5_4 = q_acc_5_4 + q_tmp_5_4; + q_acc_5_5 = q_acc_5_5 + q_tmp_5_5; + } + const real_t elMatVec_0 = q_acc_0_0*src_dof_0 + q_acc_0_1*src_dof_1 + q_acc_0_2*src_dof_2 + q_acc_0_3*src_dof_3 + q_acc_0_4*src_dof_4 + q_acc_0_5*src_dof_5; + const real_t elMatVec_1 = q_acc_1_0*src_dof_0 + q_acc_1_1*src_dof_1 + q_acc_1_2*src_dof_2 + q_acc_1_3*src_dof_3 + q_acc_1_4*src_dof_4 + q_acc_1_5*src_dof_5; + const real_t elMatVec_2 = q_acc_2_0*src_dof_0 + q_acc_2_1*src_dof_1 + q_acc_2_2*src_dof_2 + q_acc_2_3*src_dof_3 + q_acc_2_4*src_dof_4 + q_acc_2_5*src_dof_5; + const real_t elMatVec_3 = q_acc_3_0*src_dof_0 + q_acc_3_1*src_dof_1 + q_acc_3_2*src_dof_2 + q_acc_3_3*src_dof_3 + q_acc_3_4*src_dof_4 + q_acc_3_5*src_dof_5; + const real_t elMatVec_4 = q_acc_4_0*src_dof_0 + q_acc_4_1*src_dof_1 + q_acc_4_2*src_dof_2 + q_acc_4_3*src_dof_3 + q_acc_4_4*src_dof_4 + q_acc_4_5*src_dof_5; + const real_t elMatVec_5 = q_acc_5_0*src_dof_0 + q_acc_5_1*src_dof_1 + q_acc_5_2*src_dof_2 + q_acc_5_3*src_dof_3 + q_acc_5_4*src_dof_4 + q_acc_5_5*src_dof_5; + _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1] = elMatVec_0 + _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatVec_1 + _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1] = elMatVec_2 + _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]; + _data_dstEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatVec_3 + _data_dstEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1] = elMatVec_4 + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]; + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))] = elMatVec_5 + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + } + } + } + } +} +} // namespace operatorgeneration + +} // namespace hyteg diff --git a/operators/supg_advection/avx/P2ElementwiseSupgAdvectionAnnulusMap_computeInverseDiagonalOperatorValues_P2ElementwiseSupgAdvectionAnnulusMap_macro_2D.cpp b/operators/supg_advection/avx/P2ElementwiseSupgAdvectionAnnulusMap_computeInverseDiagonalOperatorValues_P2ElementwiseSupgAdvectionAnnulusMap_macro_2D.cpp new file mode 100644 index 0000000000000000000000000000000000000000..3652b354c8920b8aa252f6722b12f4bd3f0db2a6 --- /dev/null +++ b/operators/supg_advection/avx/P2ElementwiseSupgAdvectionAnnulusMap_computeInverseDiagonalOperatorValues_P2ElementwiseSupgAdvectionAnnulusMap_macro_2D.cpp @@ -0,0 +1,761 @@ +/* +* Copyright (c) 2017-2024 Nils Kohl, Daniel Bauer, Fabian Böhm. +* +* This file is part of HyTeG +* (see https://i10git.cs.fau.de/hyteg/hyteg). +* +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. +* +* You should have received a copy of the GNU General Public License +* along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +/* +* The entire file was generated with the HyTeG Operator Generator. +* +* Avoid modifying this file. If buggy, consider fixing the generator itself. +*/ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +#include "../P2ElementwiseSupgAdvectionAnnulusMap.hpp" + +#define FUNC_PREFIX + +namespace hyteg { + +namespace operatorgeneration { + +void P2ElementwiseSupgAdvectionAnnulusMap::computeInverseDiagonalOperatorValues_P2ElementwiseSupgAdvectionAnnulusMap_macro_2D( real_t * RESTRICT _data_cp_times_deltaEdge, real_t * RESTRICT _data_cp_times_deltaVertex, real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_uxEdge, real_t * RESTRICT _data_uxVertex, real_t * RESTRICT _data_uyEdge, real_t * RESTRICT _data_uyVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const +{ + { + const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669}; + + const real_t _data_q_p_0 [] = {0.33333333333333331, 0.20000000000000001, 0.59999999999999998, 0.20000000000000001}; + + const real_t _data_q_p_1 [] = {0.33333333333333331, 0.59999999999999998, 0.20000000000000001, 0.20000000000000001}; + + const real_t tmp_coords_jac_0_GRAY = 1.0 / (micro_edges_per_macro_edge_float)*1.0; + const real_t p_affine_const_0_0_GRAY = macro_vertex_coord_id_0comp0; + const real_t p_affine_const_0_1_GRAY = macro_vertex_coord_id_0comp1; + const real_t p_affine_const_1_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0); + const real_t p_affine_const_1_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1); + const real_t p_affine_const_2_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0); + const real_t p_affine_const_2_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1); + const real_t jac_affine_0_0_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_1_0_GRAY; + const real_t jac_affine_0_1_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_2_0_GRAY; + const real_t jac_affine_1_0_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_1_1_GRAY; + const real_t jac_affine_1_1_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_2_1_GRAY; + const real_t tmp_coords_jac_1_GRAY = jac_affine_0_0_GRAY*jac_affine_1_1_GRAY - jac_affine_0_1_GRAY*jac_affine_1_0_GRAY; + const real_t tmp_coords_jac_2_GRAY = 1.0 / (tmp_coords_jac_1_GRAY); + const real_t jac_affine_inv_0_0_GRAY = jac_affine_1_1_GRAY*tmp_coords_jac_2_GRAY; + const real_t jac_affine_inv_0_1_GRAY = -jac_affine_0_1_GRAY*tmp_coords_jac_2_GRAY; + const real_t jac_affine_inv_1_0_GRAY = -jac_affine_1_0_GRAY*tmp_coords_jac_2_GRAY; + const real_t jac_affine_inv_1_1_GRAY = jac_affine_0_0_GRAY*tmp_coords_jac_2_GRAY; + const real_t abs_det_jac_affine_GRAY = abs(tmp_coords_jac_1_GRAY); + const real_t tmp_qloop_1 = -rayVertex_1 + thrVertex_1; + const real_t tmp_qloop_7 = rayVertex_0 - thrVertex_0; + const real_t tmp_qloop_8 = -tmp_qloop_7; + const real_t tmp_qloop_9 = 1.0 / (tmp_qloop_1*(-rayVertex_0 + refVertex_0) - tmp_qloop_8*(-rayVertex_1 + refVertex_1)); + const real_t tmp_qloop_10 = -radRayVertex + radRefVertex; + const real_t tmp_qloop_11 = tmp_qloop_10*tmp_qloop_9; + const real_t tmp_qloop_12 = tmp_qloop_11*1.0; + { + /* FaceType.GRAY */ + for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1) + { + for (int64_t ctr_0 = 0; ctr_0 < (int64_t)((-ctr_1 + micro_edges_per_macro_edge) / (4)) * (4); ctr_0 += 4) + { + + const int64_t phantom_ctr_0 = ctr_0; + real_t _data_float_loop_ctr_array_dim_0[4]; + _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0; + _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1; + _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2; + _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3; + real_t _data_float_loop_ctr_array_dim_1[4]; + _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1; + + const __m256d p_affine_0_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)); + const __m256d p_affine_0_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)); + const __m256d p_affine_1_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)); + const __m256d p_affine_1_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)); + const __m256d p_affine_2_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)); + const __m256d p_affine_2_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)); + const __m256d cp_times_delta_dof_0 = _mm256_loadu_pd(& _data_cp_times_deltaVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]); + const __m256d cp_times_delta_dof_1 = _mm256_loadu_pd(& _data_cp_times_deltaVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]); + const __m256d cp_times_delta_dof_2 = _mm256_loadu_pd(& _data_cp_times_deltaVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]); + const __m256d cp_times_delta_dof_3 = _mm256_loadu_pd(& _data_cp_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]); + const __m256d cp_times_delta_dof_4 = _mm256_loadu_pd(& _data_cp_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]); + const __m256d cp_times_delta_dof_5 = _mm256_loadu_pd(& _data_cp_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]); + const __m256d ux_dof_0 = _mm256_loadu_pd(& _data_uxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]); + const __m256d ux_dof_1 = _mm256_loadu_pd(& _data_uxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]); + const __m256d ux_dof_2 = _mm256_loadu_pd(& _data_uxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]); + const __m256d ux_dof_3 = _mm256_loadu_pd(& _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]); + const __m256d ux_dof_4 = _mm256_loadu_pd(& _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]); + const __m256d ux_dof_5 = _mm256_loadu_pd(& _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]); + const __m256d uy_dof_0 = _mm256_loadu_pd(& _data_uyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]); + const __m256d uy_dof_1 = _mm256_loadu_pd(& _data_uyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]); + const __m256d uy_dof_2 = _mm256_loadu_pd(& _data_uyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]); + const __m256d uy_dof_3 = _mm256_loadu_pd(& _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]); + const __m256d uy_dof_4 = _mm256_loadu_pd(& _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]); + const __m256d uy_dof_5 = _mm256_loadu_pd(& _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]); + __m256d q_acc_0_0 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_1 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_2 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_3 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_4_4 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_5_5 = _mm256_set_pd(0.0,0.0,0.0,0.0); + for (int64_t q = 0; q < 4; q += 1) + { + const __m256d tmp_qloop_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_1_0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_2_0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),p_affine_0_0); + const __m256d tmp_qloop_2 = _mm256_mul_pd(tmp_qloop_0,tmp_qloop_0); + const __m256d tmp_qloop_3 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_1_1),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_2_1),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),p_affine_0_1); + const __m256d tmp_qloop_4 = _mm256_mul_pd(tmp_qloop_3,tmp_qloop_3); + const __m256d tmp_qloop_5 = _mm256_add_pd(tmp_qloop_2,tmp_qloop_4); + const __m256d tmp_qloop_6 = _mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_sqrt_pd(tmp_qloop_5)); + const __m256d tmp_qloop_13 = _mm256_mul_pd(tmp_qloop_6,_mm256_set_pd(tmp_qloop_12,tmp_qloop_12,tmp_qloop_12,tmp_qloop_12)); + const __m256d tmp_qloop_14 = _mm256_mul_pd(tmp_qloop_13,_mm256_set_pd(tmp_qloop_1,tmp_qloop_1,tmp_qloop_1,tmp_qloop_1)); + const __m256d tmp_qloop_15 = _mm256_div_pd(_mm256_sqrt_pd(tmp_qloop_5),_mm256_mul_pd(tmp_qloop_5,tmp_qloop_5)); + const __m256d tmp_qloop_16 = _mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(rayVertex_0,rayVertex_0,rayVertex_0,rayVertex_0)),tmp_qloop_0),_mm256_set_pd(tmp_qloop_1,tmp_qloop_1,tmp_qloop_1,tmp_qloop_1)),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(rayVertex_1,rayVertex_1,rayVertex_1,rayVertex_1)),tmp_qloop_3),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_8,tmp_qloop_8,tmp_qloop_8,tmp_qloop_8))),_mm256_set_pd(tmp_qloop_11,tmp_qloop_11,tmp_qloop_11,tmp_qloop_11)),_mm256_set_pd(radRayVertex,radRayVertex,radRayVertex,radRayVertex)); + const __m256d tmp_qloop_17 = _mm256_mul_pd(tmp_qloop_15,tmp_qloop_16); + const __m256d tmp_qloop_18 = _mm256_mul_pd(tmp_qloop_17,_mm256_set_pd(1.0,1.0,1.0,1.0)); + const __m256d tmp_qloop_19 = _mm256_mul_pd(tmp_qloop_13,_mm256_set_pd(tmp_qloop_8,tmp_qloop_8,tmp_qloop_8,tmp_qloop_8)); + const __m256d tmp_qloop_20 = _mm256_mul_pd(tmp_qloop_0,tmp_qloop_18); + const __m256d tmp_qloop_23 = _mm256_mul_pd(tmp_qloop_15,_mm256_set_pd(tmp_qloop_12,tmp_qloop_12,tmp_qloop_12,tmp_qloop_12)); + const __m256d tmp_qloop_24 = _mm256_mul_pd(tmp_qloop_23,_mm256_set_pd(tmp_qloop_1,tmp_qloop_1,tmp_qloop_1,tmp_qloop_1)); + const __m256d tmp_qloop_25 = _mm256_mul_pd(tmp_qloop_2,tmp_qloop_24); + const __m256d tmp_qloop_26 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_16,_mm256_div_pd(_mm256_sqrt_pd(tmp_qloop_5),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_5,tmp_qloop_5),tmp_qloop_5))),_mm256_set_pd(3.0,3.0,3.0,3.0)); + const __m256d tmp_qloop_27 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_0,tmp_qloop_26),tmp_qloop_4); + const __m256d tmp_qloop_28 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_27,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_24,tmp_qloop_4)); + const __m256d tmp_qloop_29 = _mm256_mul_pd(tmp_qloop_23,_mm256_set_pd(tmp_qloop_7,tmp_qloop_7,tmp_qloop_7,tmp_qloop_7)); + const __m256d tmp_qloop_30 = _mm256_mul_pd(tmp_qloop_0,tmp_qloop_3); + const __m256d tmp_qloop_31 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_30); + const __m256d tmp_qloop_32 = _mm256_mul_pd(tmp_qloop_17,_mm256_set_pd(2.0,2.0,2.0,2.0)); + const __m256d tmp_qloop_33 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_2,tmp_qloop_26),tmp_qloop_3); + const __m256d tmp_qloop_34 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_33,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_18,tmp_qloop_3)); + const __m256d tmp_qloop_35 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_30); + const __m256d tmp_qloop_36 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_23,tmp_qloop_30),_mm256_set_pd(tmp_qloop_8,tmp_qloop_8,tmp_qloop_8,tmp_qloop_8)); + const __m256d tmp_qloop_37 = _mm256_mul_pd(_mm256_set_pd(4.0,4.0,4.0,4.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])); + const __m256d tmp_qloop_38 = _mm256_mul_pd(_mm256_set_pd(4.0,4.0,4.0,4.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])); + const __m256d tmp_qloop_39 = _mm256_add_pd(_mm256_add_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),tmp_qloop_37),tmp_qloop_38); + const __m256d tmp_qloop_40 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_39,_mm256_set_pd(jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY)),_mm256_mul_pd(tmp_qloop_39,_mm256_set_pd(jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY))); + const __m256d tmp_qloop_41 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_39,_mm256_set_pd(jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY)),_mm256_mul_pd(tmp_qloop_39,_mm256_set_pd(jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY))); + const __m256d tmp_qloop_42 = _mm256_mul_pd(tmp_qloop_37,_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])); + const __m256d tmp_qloop_43 = _mm256_mul_pd(_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])); + const __m256d tmp_qloop_44 = _mm256_mul_pd(tmp_qloop_43,_mm256_set_pd(2.0,2.0,2.0,2.0)); + const __m256d tmp_qloop_45 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),tmp_qloop_44); + const __m256d tmp_qloop_46 = _mm256_mul_pd(_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])); + const __m256d tmp_qloop_47 = _mm256_mul_pd(tmp_qloop_46,_mm256_set_pd(2.0,2.0,2.0,2.0)); + const __m256d tmp_qloop_48 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])),tmp_qloop_47); + const __m256d tmp_qloop_49 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_46,_mm256_set_pd(-4.0,-4.0,-4.0,-4.0)),_mm256_mul_pd(tmp_qloop_42,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_38); + const __m256d tmp_qloop_50 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_43,_mm256_set_pd(-4.0,-4.0,-4.0,-4.0)),_mm256_mul_pd(tmp_qloop_42,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_37); + const __m256d tmp_qloop_51 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),tmp_qloop_42),tmp_qloop_44),tmp_qloop_47); + const __m256d tmp_qloop_52 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_42,ux_dof_3),_mm256_mul_pd(tmp_qloop_45,ux_dof_1)),_mm256_mul_pd(tmp_qloop_48,ux_dof_2)),_mm256_mul_pd(tmp_qloop_49,ux_dof_4)),_mm256_mul_pd(tmp_qloop_50,ux_dof_5)),_mm256_mul_pd(tmp_qloop_51,ux_dof_0)); + const __m256d tmp_qloop_53 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_42,uy_dof_3),_mm256_mul_pd(tmp_qloop_45,uy_dof_1)),_mm256_mul_pd(tmp_qloop_48,uy_dof_2)),_mm256_mul_pd(tmp_qloop_49,uy_dof_4)),_mm256_mul_pd(tmp_qloop_50,uy_dof_5)),_mm256_mul_pd(tmp_qloop_51,uy_dof_0)); + const __m256d tmp_qloop_55 = _mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),tmp_qloop_37); + const __m256d tmp_qloop_56 = _mm256_mul_pd(tmp_qloop_55,_mm256_set_pd(jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY)); + const __m256d tmp_qloop_57 = _mm256_mul_pd(tmp_qloop_55,_mm256_set_pd(jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY)); + const __m256d tmp_qloop_58 = _mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),tmp_qloop_38); + const __m256d tmp_qloop_59 = _mm256_mul_pd(tmp_qloop_58,_mm256_set_pd(jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY)); + const __m256d tmp_qloop_60 = _mm256_mul_pd(tmp_qloop_58,_mm256_set_pd(jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY)); + const __m256d tmp_qloop_61 = _mm256_mul_pd(tmp_qloop_37,_mm256_set_pd(jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY)); + const __m256d tmp_qloop_62 = _mm256_mul_pd(tmp_qloop_38,_mm256_set_pd(jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY)); + const __m256d tmp_qloop_63 = _mm256_add_pd(tmp_qloop_61,tmp_qloop_62); + const __m256d tmp_qloop_64 = _mm256_mul_pd(tmp_qloop_37,_mm256_set_pd(jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY)); + const __m256d tmp_qloop_65 = _mm256_mul_pd(tmp_qloop_38,_mm256_set_pd(jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY)); + const __m256d tmp_qloop_66 = _mm256_add_pd(tmp_qloop_64,tmp_qloop_65); + const __m256d tmp_qloop_67 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-8.0,-8.0,-8.0,-8.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])),_mm256_mul_pd(tmp_qloop_37,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_set_pd(4.0,4.0,4.0,4.0)); + const __m256d tmp_qloop_68 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_62,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_67,_mm256_set_pd(jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY))); + const __m256d tmp_qloop_69 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_65,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_67,_mm256_set_pd(jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY))); + const __m256d tmp_qloop_70 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-8.0,-8.0,-8.0,-8.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(tmp_qloop_38,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_set_pd(4.0,4.0,4.0,4.0)); + const __m256d tmp_qloop_71 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_61,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_70,_mm256_set_pd(jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY))); + const __m256d tmp_qloop_72 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_64,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_70,_mm256_set_pd(jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY))); + const __m256d jac_blending_0_0 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_0,tmp_qloop_14),_mm256_mul_pd(tmp_qloop_18,tmp_qloop_4)); + const __m256d jac_blending_0_1 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_0,tmp_qloop_19),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_0,tmp_qloop_15),tmp_qloop_16),tmp_qloop_3),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))); + const __m256d jac_blending_1_0 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_14,tmp_qloop_3),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_20,tmp_qloop_3),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))); + const __m256d jac_blending_1_1 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_19,tmp_qloop_3),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_15,tmp_qloop_16),tmp_qloop_2),_mm256_set_pd(1.0,1.0,1.0,1.0))); + const __m256d tmp_qloop_21 = _mm256_add_pd(_mm256_mul_pd(jac_blending_0_0,jac_blending_1_1),_mm256_mul_pd(_mm256_mul_pd(jac_blending_0_1,jac_blending_1_0),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))); + const __m256d tmp_qloop_22 = _mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),tmp_qloop_21); + const __m256d abs_det_jac_blending = tmp_qloop_21; + const __m256d tmp_qloop_54 = _mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(abs_det_jac_blending,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(cp_times_delta_dof_0,tmp_qloop_51),_mm256_mul_pd(cp_times_delta_dof_1,tmp_qloop_45)),_mm256_mul_pd(cp_times_delta_dof_2,tmp_qloop_48)),_mm256_mul_pd(cp_times_delta_dof_3,tmp_qloop_42)),_mm256_mul_pd(cp_times_delta_dof_4,tmp_qloop_49)),_mm256_mul_pd(cp_times_delta_dof_5,tmp_qloop_50))),_mm256_set_pd(_data_q_w[q],_data_q_w[q],_data_q_w[q],_data_q_w[q])),_mm256_set_pd(abs_det_jac_affine_GRAY,abs_det_jac_affine_GRAY,abs_det_jac_affine_GRAY,abs_det_jac_affine_GRAY)); + const __m256d jac_blending_inv_0_0 = _mm256_mul_pd(jac_blending_1_1,tmp_qloop_22); + const __m256d jac_blending_inv_0_1 = _mm256_mul_pd(_mm256_mul_pd(jac_blending_0_1,tmp_qloop_22),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)); + const __m256d jac_blending_inv_1_0 = _mm256_mul_pd(_mm256_mul_pd(jac_blending_1_0,tmp_qloop_22),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)); + const __m256d jac_blending_inv_1_1 = _mm256_mul_pd(jac_blending_0_0,tmp_qloop_22); + const __m256d hessian_blending_0_0_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_25,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),tmp_qloop_14),tmp_qloop_28); + const __m256d hessian_blending_1_0_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_31,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_29,tmp_qloop_4)),_mm256_mul_pd(tmp_qloop_3,tmp_qloop_32)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_26,_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_3,tmp_qloop_3),tmp_qloop_3)),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))); + const __m256d hessian_blending_0_0_1 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_34,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_15,tmp_qloop_30),_mm256_set_pd(-2.0,-2.0,-2.0,-2.0)),_mm256_set_pd(tmp_qloop_1,tmp_qloop_1,tmp_qloop_1,tmp_qloop_1)),_mm256_set_pd(tmp_qloop_11,tmp_qloop_11,tmp_qloop_11,tmp_qloop_11))); + const __m256d hessian_blending_1_0_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_20,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_28,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(tmp_qloop_35,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_6,_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_1,tmp_qloop_1,tmp_qloop_1,tmp_qloop_1)),_mm256_set_pd(tmp_qloop_10,tmp_qloop_10,tmp_qloop_10,tmp_qloop_10)),_mm256_set_pd(tmp_qloop_9,tmp_qloop_9,tmp_qloop_9,tmp_qloop_9))); + const __m256d hessian_blending_0_1_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_19,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_31,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(tmp_qloop_34,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_15,tmp_qloop_2),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_10,tmp_qloop_10,tmp_qloop_10,tmp_qloop_10)),_mm256_set_pd(tmp_qloop_8,tmp_qloop_8,tmp_qloop_8,tmp_qloop_8)),_mm256_set_pd(tmp_qloop_9,tmp_qloop_9,tmp_qloop_9,tmp_qloop_9))); + const __m256d hessian_blending_1_1_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_20,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_35,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_27),tmp_qloop_36); + const __m256d hessian_blending_0_1_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_0,tmp_qloop_32),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_0,tmp_qloop_0),tmp_qloop_0),tmp_qloop_26),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_25),tmp_qloop_36); + const __m256d hessian_blending_1_1_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_19,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_33,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_15,tmp_qloop_2),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_10,tmp_qloop_10,tmp_qloop_10,tmp_qloop_10)),_mm256_set_pd(tmp_qloop_7,tmp_qloop_7,tmp_qloop_7,tmp_qloop_7)),_mm256_set_pd(tmp_qloop_9,tmp_qloop_9,tmp_qloop_9,tmp_qloop_9))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_15,tmp_qloop_4),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_10,tmp_qloop_10,tmp_qloop_10,tmp_qloop_10)),_mm256_set_pd(tmp_qloop_8,tmp_qloop_8,tmp_qloop_8,tmp_qloop_8)),_mm256_set_pd(tmp_qloop_9,tmp_qloop_9,tmp_qloop_9,tmp_qloop_9))); + const __m256d q_tmp_0_0 = _mm256_mul_pd(tmp_qloop_54,_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_52,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,tmp_qloop_40),_mm256_mul_pd(jac_blending_inv_1_0,tmp_qloop_41))),_mm256_mul_pd(tmp_qloop_53,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,tmp_qloop_40),_mm256_mul_pd(jac_blending_inv_1_1,tmp_qloop_41)))),_mm256_add_pd(_mm256_mul_pd(tmp_qloop_52,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,tmp_qloop_40),_mm256_mul_pd(jac_blending_inv_1_0,tmp_qloop_41))),_mm256_mul_pd(tmp_qloop_53,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,tmp_qloop_40),_mm256_mul_pd(jac_blending_inv_1_1,tmp_qloop_41)))))); + const __m256d q_tmp_1_1 = _mm256_mul_pd(tmp_qloop_54,_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_52,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,tmp_qloop_56),_mm256_mul_pd(jac_blending_inv_1_0,tmp_qloop_57))),_mm256_mul_pd(tmp_qloop_53,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,tmp_qloop_56),_mm256_mul_pd(jac_blending_inv_1_1,tmp_qloop_57)))),_mm256_add_pd(_mm256_mul_pd(tmp_qloop_52,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,tmp_qloop_56),_mm256_mul_pd(jac_blending_inv_1_0,tmp_qloop_57))),_mm256_mul_pd(tmp_qloop_53,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,tmp_qloop_56),_mm256_mul_pd(jac_blending_inv_1_1,tmp_qloop_57)))))); + const __m256d q_tmp_2_2 = _mm256_mul_pd(tmp_qloop_54,_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_52,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,tmp_qloop_59),_mm256_mul_pd(jac_blending_inv_1_0,tmp_qloop_60))),_mm256_mul_pd(tmp_qloop_53,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,tmp_qloop_59),_mm256_mul_pd(jac_blending_inv_1_1,tmp_qloop_60)))),_mm256_add_pd(_mm256_mul_pd(tmp_qloop_52,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,tmp_qloop_59),_mm256_mul_pd(jac_blending_inv_1_0,tmp_qloop_60))),_mm256_mul_pd(tmp_qloop_53,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,tmp_qloop_59),_mm256_mul_pd(jac_blending_inv_1_1,tmp_qloop_60)))))); + const __m256d q_tmp_3_3 = _mm256_mul_pd(tmp_qloop_54,_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_52,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,tmp_qloop_63),_mm256_mul_pd(jac_blending_inv_1_0,tmp_qloop_66))),_mm256_mul_pd(tmp_qloop_53,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,tmp_qloop_63),_mm256_mul_pd(jac_blending_inv_1_1,tmp_qloop_66)))),_mm256_add_pd(_mm256_mul_pd(tmp_qloop_52,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,tmp_qloop_63),_mm256_mul_pd(jac_blending_inv_1_0,tmp_qloop_66))),_mm256_mul_pd(tmp_qloop_53,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,tmp_qloop_63),_mm256_mul_pd(jac_blending_inv_1_1,tmp_qloop_66)))))); + const __m256d q_tmp_4_4 = _mm256_mul_pd(tmp_qloop_54,_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_52,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,tmp_qloop_68),_mm256_mul_pd(jac_blending_inv_1_0,tmp_qloop_69))),_mm256_mul_pd(tmp_qloop_53,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,tmp_qloop_68),_mm256_mul_pd(jac_blending_inv_1_1,tmp_qloop_69)))),_mm256_add_pd(_mm256_mul_pd(tmp_qloop_52,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,tmp_qloop_68),_mm256_mul_pd(jac_blending_inv_1_0,tmp_qloop_69))),_mm256_mul_pd(tmp_qloop_53,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,tmp_qloop_68),_mm256_mul_pd(jac_blending_inv_1_1,tmp_qloop_69)))))); + const __m256d q_tmp_5_5 = _mm256_mul_pd(tmp_qloop_54,_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_52,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,tmp_qloop_71),_mm256_mul_pd(jac_blending_inv_1_0,tmp_qloop_72))),_mm256_mul_pd(tmp_qloop_53,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,tmp_qloop_71),_mm256_mul_pd(jac_blending_inv_1_1,tmp_qloop_72)))),_mm256_add_pd(_mm256_mul_pd(tmp_qloop_52,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,tmp_qloop_71),_mm256_mul_pd(jac_blending_inv_1_0,tmp_qloop_72))),_mm256_mul_pd(tmp_qloop_53,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,tmp_qloop_71),_mm256_mul_pd(jac_blending_inv_1_1,tmp_qloop_72)))))); + q_acc_0_0 = _mm256_add_pd(q_acc_0_0,q_tmp_0_0); + q_acc_1_1 = _mm256_add_pd(q_acc_1_1,q_tmp_1_1); + q_acc_2_2 = _mm256_add_pd(q_acc_2_2,q_tmp_2_2); + q_acc_3_3 = _mm256_add_pd(q_acc_3_3,q_tmp_3_3); + q_acc_4_4 = _mm256_add_pd(q_acc_4_4,q_tmp_4_4); + q_acc_5_5 = _mm256_add_pd(q_acc_5_5,q_tmp_5_5); + } + const __m256d elMatDiag_0 = q_acc_0_0; + const __m256d elMatDiag_1 = q_acc_1_1; + const __m256d elMatDiag_2 = q_acc_2_2; + const __m256d elMatDiag_3 = q_acc_3_3; + const __m256d elMatDiag_4 = q_acc_4_4; + const __m256d elMatDiag_5 = q_acc_5_5; + _mm256_storeu_pd(&_data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))],_mm256_add_pd(elMatDiag_0,_mm256_loadu_pd(& _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]))); + _mm256_storeu_pd(&_data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1],_mm256_add_pd(elMatDiag_1,_mm256_loadu_pd(& _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]))); + _mm256_storeu_pd(&_data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))],_mm256_add_pd(elMatDiag_2,_mm256_loadu_pd(& _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]))); + _mm256_storeu_pd(&_data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))],_mm256_add_pd(elMatDiag_3,_mm256_loadu_pd(& _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]))); + _mm256_storeu_pd(&_data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))],_mm256_add_pd(elMatDiag_4,_mm256_loadu_pd(& _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]))); + _mm256_storeu_pd(&_data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))],_mm256_add_pd(elMatDiag_5,_mm256_loadu_pd(& _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]))); + } + for (int64_t ctr_0 = (int64_t)((-ctr_1 + micro_edges_per_macro_edge) / (4)) * (4); ctr_0 < -ctr_1 + micro_edges_per_macro_edge; ctr_0 += 1) + { + + const int64_t phantom_ctr_0 = ctr_0; + real_t _data_float_loop_ctr_array_dim_0[4]; + _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0; + _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1; + _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2; + _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3; + real_t _data_float_loop_ctr_array_dim_1[4]; + _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1; + + const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; + const real_t cp_times_delta_dof_0 = _data_cp_times_deltaVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]; + const real_t cp_times_delta_dof_1 = _data_cp_times_deltaVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + const real_t cp_times_delta_dof_2 = _data_cp_times_deltaVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t cp_times_delta_dof_3 = _data_cp_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t cp_times_delta_dof_4 = _data_cp_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t cp_times_delta_dof_5 = _data_cp_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]; + const real_t ux_dof_0 = _data_uxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]; + const real_t ux_dof_1 = _data_uxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + const real_t ux_dof_2 = _data_uxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t ux_dof_3 = _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t ux_dof_4 = _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t ux_dof_5 = _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]; + const real_t uy_dof_0 = _data_uyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]; + const real_t uy_dof_1 = _data_uyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + const real_t uy_dof_2 = _data_uyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t uy_dof_3 = _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t uy_dof_4 = _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t uy_dof_5 = _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]; + real_t q_acc_0_0 = 0.0; + real_t q_acc_1_1 = 0.0; + real_t q_acc_2_2 = 0.0; + real_t q_acc_3_3 = 0.0; + real_t q_acc_4_4 = 0.0; + real_t q_acc_5_5 = 0.0; + for (int64_t q = 0; q < 4; q += 1) + { + const real_t tmp_qloop_0 = p_affine_0_0 + (-p_affine_0_0 + p_affine_1_0)*_data_q_p_0[q] + (-p_affine_0_0 + p_affine_2_0)*_data_q_p_1[q]; + const real_t tmp_qloop_2 = (tmp_qloop_0*tmp_qloop_0); + const real_t tmp_qloop_3 = p_affine_0_1 + (-p_affine_0_1 + p_affine_1_1)*_data_q_p_0[q] + (-p_affine_0_1 + p_affine_2_1)*_data_q_p_1[q]; + const real_t tmp_qloop_4 = (tmp_qloop_3*tmp_qloop_3); + const real_t tmp_qloop_5 = tmp_qloop_2 + tmp_qloop_4; + const real_t tmp_qloop_6 = pow(tmp_qloop_5, -0.50000000000000000); + const real_t tmp_qloop_13 = tmp_qloop_12*tmp_qloop_6; + const real_t tmp_qloop_14 = tmp_qloop_1*tmp_qloop_13; + const real_t tmp_qloop_15 = pow(tmp_qloop_5, -1.5000000000000000); + const real_t tmp_qloop_16 = radRayVertex + tmp_qloop_11*(tmp_qloop_1*(-rayVertex_0 + tmp_qloop_0) - tmp_qloop_8*(-rayVertex_1 + tmp_qloop_3)); + const real_t tmp_qloop_17 = tmp_qloop_15*tmp_qloop_16; + const real_t tmp_qloop_18 = tmp_qloop_17*1.0; + const real_t tmp_qloop_19 = tmp_qloop_13*tmp_qloop_8; + const real_t tmp_qloop_20 = tmp_qloop_0*tmp_qloop_18; + const real_t tmp_qloop_23 = tmp_qloop_12*tmp_qloop_15; + const real_t tmp_qloop_24 = tmp_qloop_1*tmp_qloop_23; + const real_t tmp_qloop_25 = tmp_qloop_2*tmp_qloop_24; + const real_t tmp_qloop_26 = tmp_qloop_16*pow(tmp_qloop_5, -2.5000000000000000)*3.0; + const real_t tmp_qloop_27 = tmp_qloop_0*tmp_qloop_26*tmp_qloop_4; + const real_t tmp_qloop_28 = tmp_qloop_24*tmp_qloop_4 - tmp_qloop_27; + const real_t tmp_qloop_29 = tmp_qloop_23*tmp_qloop_7; + const real_t tmp_qloop_30 = tmp_qloop_0*tmp_qloop_3; + const real_t tmp_qloop_31 = tmp_qloop_24*tmp_qloop_30; + const real_t tmp_qloop_32 = tmp_qloop_17*2.0; + const real_t tmp_qloop_33 = tmp_qloop_2*tmp_qloop_26*tmp_qloop_3; + const real_t tmp_qloop_34 = tmp_qloop_18*tmp_qloop_3 - tmp_qloop_33; + const real_t tmp_qloop_35 = tmp_qloop_29*tmp_qloop_30; + const real_t tmp_qloop_36 = tmp_qloop_23*tmp_qloop_30*tmp_qloop_8; + const real_t tmp_qloop_37 = 4.0*_data_q_p_0[q]; + const real_t tmp_qloop_38 = 4.0*_data_q_p_1[q]; + const real_t tmp_qloop_39 = tmp_qloop_37 + tmp_qloop_38 - 3.0; + const real_t tmp_qloop_40 = jac_affine_inv_0_0_GRAY*tmp_qloop_39 + jac_affine_inv_1_0_GRAY*tmp_qloop_39; + const real_t tmp_qloop_41 = jac_affine_inv_0_1_GRAY*tmp_qloop_39 + jac_affine_inv_1_1_GRAY*tmp_qloop_39; + const real_t tmp_qloop_42 = tmp_qloop_37*_data_q_p_1[q]; + const real_t tmp_qloop_43 = (_data_q_p_0[q]*_data_q_p_0[q]); + const real_t tmp_qloop_44 = tmp_qloop_43*2.0; + const real_t tmp_qloop_45 = tmp_qloop_44 - _data_q_p_0[q]; + const real_t tmp_qloop_46 = (_data_q_p_1[q]*_data_q_p_1[q]); + const real_t tmp_qloop_47 = tmp_qloop_46*2.0; + const real_t tmp_qloop_48 = tmp_qloop_47 - _data_q_p_1[q]; + const real_t tmp_qloop_49 = tmp_qloop_38 - tmp_qloop_42 + tmp_qloop_46*-4.0; + const real_t tmp_qloop_50 = tmp_qloop_37 - tmp_qloop_42 + tmp_qloop_43*-4.0; + const real_t tmp_qloop_51 = tmp_qloop_42 + tmp_qloop_44 + tmp_qloop_47 - 3.0*_data_q_p_0[q] - 3.0*_data_q_p_1[q] + 1.0; + const real_t tmp_qloop_52 = tmp_qloop_42*ux_dof_3 + tmp_qloop_45*ux_dof_1 + tmp_qloop_48*ux_dof_2 + tmp_qloop_49*ux_dof_4 + tmp_qloop_50*ux_dof_5 + tmp_qloop_51*ux_dof_0; + const real_t tmp_qloop_53 = tmp_qloop_42*uy_dof_3 + tmp_qloop_45*uy_dof_1 + tmp_qloop_48*uy_dof_2 + tmp_qloop_49*uy_dof_4 + tmp_qloop_50*uy_dof_5 + tmp_qloop_51*uy_dof_0; + const real_t tmp_qloop_55 = tmp_qloop_37 - 1.0; + const real_t tmp_qloop_56 = jac_affine_inv_0_0_GRAY*tmp_qloop_55; + const real_t tmp_qloop_57 = jac_affine_inv_0_1_GRAY*tmp_qloop_55; + const real_t tmp_qloop_58 = tmp_qloop_38 - 1.0; + const real_t tmp_qloop_59 = jac_affine_inv_1_0_GRAY*tmp_qloop_58; + const real_t tmp_qloop_60 = jac_affine_inv_1_1_GRAY*tmp_qloop_58; + const real_t tmp_qloop_61 = jac_affine_inv_1_0_GRAY*tmp_qloop_37; + const real_t tmp_qloop_62 = jac_affine_inv_0_0_GRAY*tmp_qloop_38; + const real_t tmp_qloop_63 = tmp_qloop_61 + tmp_qloop_62; + const real_t tmp_qloop_64 = jac_affine_inv_1_1_GRAY*tmp_qloop_37; + const real_t tmp_qloop_65 = jac_affine_inv_0_1_GRAY*tmp_qloop_38; + const real_t tmp_qloop_66 = tmp_qloop_64 + tmp_qloop_65; + const real_t tmp_qloop_67 = -tmp_qloop_37 - 8.0*_data_q_p_1[q] + 4.0; + const real_t tmp_qloop_68 = jac_affine_inv_1_0_GRAY*tmp_qloop_67 - tmp_qloop_62; + const real_t tmp_qloop_69 = jac_affine_inv_1_1_GRAY*tmp_qloop_67 - tmp_qloop_65; + const real_t tmp_qloop_70 = -tmp_qloop_38 - 8.0*_data_q_p_0[q] + 4.0; + const real_t tmp_qloop_71 = jac_affine_inv_0_0_GRAY*tmp_qloop_70 - tmp_qloop_61; + const real_t tmp_qloop_72 = jac_affine_inv_0_1_GRAY*tmp_qloop_70 - tmp_qloop_64; + const real_t jac_blending_0_0 = tmp_qloop_0*tmp_qloop_14 + tmp_qloop_18*tmp_qloop_4; + const real_t jac_blending_0_1 = -tmp_qloop_0*tmp_qloop_15*tmp_qloop_16*tmp_qloop_3 - tmp_qloop_0*tmp_qloop_19; + const real_t jac_blending_1_0 = tmp_qloop_14*tmp_qloop_3 - tmp_qloop_20*tmp_qloop_3; + const real_t jac_blending_1_1 = tmp_qloop_15*tmp_qloop_16*tmp_qloop_2*1.0 - tmp_qloop_19*tmp_qloop_3; + const real_t tmp_qloop_21 = jac_blending_0_0*jac_blending_1_1 - jac_blending_0_1*jac_blending_1_0; + const real_t tmp_qloop_22 = 1.0 / (tmp_qloop_21); + const real_t abs_det_jac_blending = tmp_qloop_21; + const real_t tmp_qloop_54 = abs_det_jac_affine_GRAY*abs_det_jac_blending*(cp_times_delta_dof_0*tmp_qloop_51 + cp_times_delta_dof_1*tmp_qloop_45 + cp_times_delta_dof_2*tmp_qloop_48 + cp_times_delta_dof_3*tmp_qloop_42 + cp_times_delta_dof_4*tmp_qloop_49 + cp_times_delta_dof_5*tmp_qloop_50)*_data_q_w[q]; + const real_t jac_blending_inv_0_0 = jac_blending_1_1*tmp_qloop_22; + const real_t jac_blending_inv_0_1 = -jac_blending_0_1*tmp_qloop_22; + const real_t jac_blending_inv_1_0 = -jac_blending_1_0*tmp_qloop_22; + const real_t jac_blending_inv_1_1 = jac_blending_0_0*tmp_qloop_22; + const real_t hessian_blending_0_0_0 = tmp_qloop_14 - tmp_qloop_25 + tmp_qloop_28; + const real_t hessian_blending_1_0_0 = -tmp_qloop_26*(tmp_qloop_3*tmp_qloop_3*tmp_qloop_3) + tmp_qloop_29*tmp_qloop_4 + tmp_qloop_3*tmp_qloop_32 - tmp_qloop_31; + const real_t hessian_blending_0_0_1 = tmp_qloop_1*tmp_qloop_11*tmp_qloop_15*tmp_qloop_30*-2.0 - tmp_qloop_34; + const real_t hessian_blending_1_0_1 = tmp_qloop_1*tmp_qloop_10*tmp_qloop_6*tmp_qloop_9*1.0 - tmp_qloop_20 - tmp_qloop_28 - tmp_qloop_35; + const real_t hessian_blending_0_1_0 = tmp_qloop_10*tmp_qloop_15*tmp_qloop_2*tmp_qloop_8*tmp_qloop_9*1.0 - tmp_qloop_19 - tmp_qloop_31 - tmp_qloop_34; + const real_t hessian_blending_1_1_0 = -tmp_qloop_20 + tmp_qloop_27 - tmp_qloop_35 + tmp_qloop_36; + const real_t hessian_blending_0_1_1 = tmp_qloop_0*tmp_qloop_32 - (tmp_qloop_0*tmp_qloop_0*tmp_qloop_0)*tmp_qloop_26 + tmp_qloop_25 + tmp_qloop_36; + const real_t hessian_blending_1_1_1 = tmp_qloop_10*tmp_qloop_15*tmp_qloop_2*tmp_qloop_7*tmp_qloop_9*1.0 + tmp_qloop_10*tmp_qloop_15*tmp_qloop_4*tmp_qloop_8*tmp_qloop_9*1.0 - tmp_qloop_19 - tmp_qloop_33; + const real_t q_tmp_0_0 = tmp_qloop_54*((tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_40 + jac_blending_inv_1_0*tmp_qloop_41) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_40 + jac_blending_inv_1_1*tmp_qloop_41))*(tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_40 + jac_blending_inv_1_0*tmp_qloop_41) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_40 + jac_blending_inv_1_1*tmp_qloop_41))); + const real_t q_tmp_1_1 = tmp_qloop_54*((tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_56 + jac_blending_inv_1_0*tmp_qloop_57) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_56 + jac_blending_inv_1_1*tmp_qloop_57))*(tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_56 + jac_blending_inv_1_0*tmp_qloop_57) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_56 + jac_blending_inv_1_1*tmp_qloop_57))); + const real_t q_tmp_2_2 = tmp_qloop_54*((tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_59 + jac_blending_inv_1_0*tmp_qloop_60) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_59 + jac_blending_inv_1_1*tmp_qloop_60))*(tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_59 + jac_blending_inv_1_0*tmp_qloop_60) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_59 + jac_blending_inv_1_1*tmp_qloop_60))); + const real_t q_tmp_3_3 = tmp_qloop_54*((tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_63 + jac_blending_inv_1_0*tmp_qloop_66) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_63 + jac_blending_inv_1_1*tmp_qloop_66))*(tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_63 + jac_blending_inv_1_0*tmp_qloop_66) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_63 + jac_blending_inv_1_1*tmp_qloop_66))); + const real_t q_tmp_4_4 = tmp_qloop_54*((tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_68 + jac_blending_inv_1_0*tmp_qloop_69) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_68 + jac_blending_inv_1_1*tmp_qloop_69))*(tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_68 + jac_blending_inv_1_0*tmp_qloop_69) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_68 + jac_blending_inv_1_1*tmp_qloop_69))); + const real_t q_tmp_5_5 = tmp_qloop_54*((tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_71 + jac_blending_inv_1_0*tmp_qloop_72) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_71 + jac_blending_inv_1_1*tmp_qloop_72))*(tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_71 + jac_blending_inv_1_0*tmp_qloop_72) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_71 + jac_blending_inv_1_1*tmp_qloop_72))); + q_acc_0_0 = q_acc_0_0 + q_tmp_0_0; + q_acc_1_1 = q_acc_1_1 + q_tmp_1_1; + q_acc_2_2 = q_acc_2_2 + q_tmp_2_2; + q_acc_3_3 = q_acc_3_3 + q_tmp_3_3; + q_acc_4_4 = q_acc_4_4 + q_tmp_4_4; + q_acc_5_5 = q_acc_5_5 + q_tmp_5_5; + } + const real_t elMatDiag_0 = q_acc_0_0; + const real_t elMatDiag_1 = q_acc_1_1; + const real_t elMatDiag_2 = q_acc_2_2; + const real_t elMatDiag_3 = q_acc_3_3; + const real_t elMatDiag_4 = q_acc_4_4; + const real_t elMatDiag_5 = q_acc_5_5; + _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))] = elMatDiag_0 + _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]; + _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1] = elMatDiag_1 + _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatDiag_2 + _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))] = elMatDiag_3 + _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))] = elMatDiag_4 + _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))] = elMatDiag_5 + _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]; + } + } + } + const real_t tmp_coords_jac_0_BLUE = 1.0 / (micro_edges_per_macro_edge_float)*1.0; + const real_t tmp_coords_jac_1_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0); + const real_t tmp_coords_jac_2_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1); + const real_t tmp_coords_jac_3_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0); + const real_t tmp_coords_jac_4_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1); + const real_t p_affine_const_0_0_BLUE = tmp_coords_jac_1_BLUE; + const real_t p_affine_const_0_1_BLUE = tmp_coords_jac_2_BLUE; + const real_t p_affine_const_1_0_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_3_BLUE; + const real_t p_affine_const_1_1_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_4_BLUE; + const real_t p_affine_const_2_0_BLUE = tmp_coords_jac_1_BLUE + tmp_coords_jac_3_BLUE; + const real_t p_affine_const_2_1_BLUE = tmp_coords_jac_2_BLUE + tmp_coords_jac_4_BLUE; + const real_t jac_affine_0_0_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_1_0_BLUE; + const real_t jac_affine_0_1_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_2_0_BLUE; + const real_t jac_affine_1_0_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_1_1_BLUE; + const real_t jac_affine_1_1_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_2_1_BLUE; + const real_t tmp_coords_jac_5_BLUE = jac_affine_0_0_BLUE*jac_affine_1_1_BLUE - jac_affine_0_1_BLUE*jac_affine_1_0_BLUE; + const real_t tmp_coords_jac_6_BLUE = 1.0 / (tmp_coords_jac_5_BLUE); + const real_t jac_affine_inv_0_0_BLUE = jac_affine_1_1_BLUE*tmp_coords_jac_6_BLUE; + const real_t jac_affine_inv_0_1_BLUE = -jac_affine_0_1_BLUE*tmp_coords_jac_6_BLUE; + const real_t jac_affine_inv_1_0_BLUE = -jac_affine_1_0_BLUE*tmp_coords_jac_6_BLUE; + const real_t jac_affine_inv_1_1_BLUE = jac_affine_0_0_BLUE*tmp_coords_jac_6_BLUE; + const real_t abs_det_jac_affine_BLUE = abs(tmp_coords_jac_5_BLUE); + { + /* FaceType.BLUE */ + for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1) + { + for (int64_t ctr_0 = 0; ctr_0 < (int64_t)((-ctr_1 + micro_edges_per_macro_edge - 1) / (4)) * (4); ctr_0 += 4) + { + + const int64_t phantom_ctr_0 = ctr_0; + real_t _data_float_loop_ctr_array_dim_0[4]; + _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0; + _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1; + _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2; + _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3; + real_t _data_float_loop_ctr_array_dim_1[4]; + _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1; + + const __m256d p_affine_0_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)); + const __m256d p_affine_0_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)); + const __m256d p_affine_1_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)); + const __m256d p_affine_1_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)); + const __m256d p_affine_2_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)); + const __m256d p_affine_2_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)); + const __m256d cp_times_delta_dof_0 = _mm256_loadu_pd(& _data_cp_times_deltaVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]); + const __m256d cp_times_delta_dof_1 = _mm256_loadu_pd(& _data_cp_times_deltaVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]); + const __m256d cp_times_delta_dof_2 = _mm256_loadu_pd(& _data_cp_times_deltaVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]); + const __m256d cp_times_delta_dof_3 = _mm256_loadu_pd(& _data_cp_times_deltaEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]); + const __m256d cp_times_delta_dof_4 = _mm256_loadu_pd(& _data_cp_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]); + const __m256d cp_times_delta_dof_5 = _mm256_loadu_pd(& _data_cp_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]); + const __m256d ux_dof_0 = _mm256_loadu_pd(& _data_uxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]); + const __m256d ux_dof_1 = _mm256_loadu_pd(& _data_uxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]); + const __m256d ux_dof_2 = _mm256_loadu_pd(& _data_uxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]); + const __m256d ux_dof_3 = _mm256_loadu_pd(& _data_uxEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]); + const __m256d ux_dof_4 = _mm256_loadu_pd(& _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]); + const __m256d ux_dof_5 = _mm256_loadu_pd(& _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]); + const __m256d uy_dof_0 = _mm256_loadu_pd(& _data_uyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]); + const __m256d uy_dof_1 = _mm256_loadu_pd(& _data_uyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]); + const __m256d uy_dof_2 = _mm256_loadu_pd(& _data_uyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]); + const __m256d uy_dof_3 = _mm256_loadu_pd(& _data_uyEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]); + const __m256d uy_dof_4 = _mm256_loadu_pd(& _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]); + const __m256d uy_dof_5 = _mm256_loadu_pd(& _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]); + __m256d q_acc_0_0 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_1 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_2 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_3 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_4_4 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_5_5 = _mm256_set_pd(0.0,0.0,0.0,0.0); + for (int64_t q = 0; q < 4; q += 1) + { + const __m256d tmp_qloop_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_1_0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_2_0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),p_affine_0_0); + const __m256d tmp_qloop_2 = _mm256_mul_pd(tmp_qloop_0,tmp_qloop_0); + const __m256d tmp_qloop_3 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_1_1),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_2_1),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),p_affine_0_1); + const __m256d tmp_qloop_4 = _mm256_mul_pd(tmp_qloop_3,tmp_qloop_3); + const __m256d tmp_qloop_5 = _mm256_add_pd(tmp_qloop_2,tmp_qloop_4); + const __m256d tmp_qloop_6 = _mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_sqrt_pd(tmp_qloop_5)); + const __m256d tmp_qloop_13 = _mm256_mul_pd(tmp_qloop_6,_mm256_set_pd(tmp_qloop_12,tmp_qloop_12,tmp_qloop_12,tmp_qloop_12)); + const __m256d tmp_qloop_14 = _mm256_mul_pd(tmp_qloop_13,_mm256_set_pd(tmp_qloop_1,tmp_qloop_1,tmp_qloop_1,tmp_qloop_1)); + const __m256d tmp_qloop_15 = _mm256_div_pd(_mm256_sqrt_pd(tmp_qloop_5),_mm256_mul_pd(tmp_qloop_5,tmp_qloop_5)); + const __m256d tmp_qloop_16 = _mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(rayVertex_0,rayVertex_0,rayVertex_0,rayVertex_0)),tmp_qloop_0),_mm256_set_pd(tmp_qloop_1,tmp_qloop_1,tmp_qloop_1,tmp_qloop_1)),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(rayVertex_1,rayVertex_1,rayVertex_1,rayVertex_1)),tmp_qloop_3),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_8,tmp_qloop_8,tmp_qloop_8,tmp_qloop_8))),_mm256_set_pd(tmp_qloop_11,tmp_qloop_11,tmp_qloop_11,tmp_qloop_11)),_mm256_set_pd(radRayVertex,radRayVertex,radRayVertex,radRayVertex)); + const __m256d tmp_qloop_17 = _mm256_mul_pd(tmp_qloop_15,tmp_qloop_16); + const __m256d tmp_qloop_18 = _mm256_mul_pd(tmp_qloop_17,_mm256_set_pd(1.0,1.0,1.0,1.0)); + const __m256d tmp_qloop_19 = _mm256_mul_pd(tmp_qloop_13,_mm256_set_pd(tmp_qloop_8,tmp_qloop_8,tmp_qloop_8,tmp_qloop_8)); + const __m256d tmp_qloop_20 = _mm256_mul_pd(tmp_qloop_0,tmp_qloop_18); + const __m256d tmp_qloop_23 = _mm256_mul_pd(tmp_qloop_15,_mm256_set_pd(tmp_qloop_12,tmp_qloop_12,tmp_qloop_12,tmp_qloop_12)); + const __m256d tmp_qloop_24 = _mm256_mul_pd(tmp_qloop_23,_mm256_set_pd(tmp_qloop_1,tmp_qloop_1,tmp_qloop_1,tmp_qloop_1)); + const __m256d tmp_qloop_25 = _mm256_mul_pd(tmp_qloop_2,tmp_qloop_24); + const __m256d tmp_qloop_26 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_16,_mm256_div_pd(_mm256_sqrt_pd(tmp_qloop_5),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_5,tmp_qloop_5),tmp_qloop_5))),_mm256_set_pd(3.0,3.0,3.0,3.0)); + const __m256d tmp_qloop_27 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_0,tmp_qloop_26),tmp_qloop_4); + const __m256d tmp_qloop_28 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_27,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_24,tmp_qloop_4)); + const __m256d tmp_qloop_29 = _mm256_mul_pd(tmp_qloop_23,_mm256_set_pd(tmp_qloop_7,tmp_qloop_7,tmp_qloop_7,tmp_qloop_7)); + const __m256d tmp_qloop_30 = _mm256_mul_pd(tmp_qloop_0,tmp_qloop_3); + const __m256d tmp_qloop_31 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_30); + const __m256d tmp_qloop_32 = _mm256_mul_pd(tmp_qloop_17,_mm256_set_pd(2.0,2.0,2.0,2.0)); + const __m256d tmp_qloop_33 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_2,tmp_qloop_26),tmp_qloop_3); + const __m256d tmp_qloop_34 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_33,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_18,tmp_qloop_3)); + const __m256d tmp_qloop_35 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_30); + const __m256d tmp_qloop_36 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_23,tmp_qloop_30),_mm256_set_pd(tmp_qloop_8,tmp_qloop_8,tmp_qloop_8,tmp_qloop_8)); + const __m256d tmp_qloop_37 = _mm256_mul_pd(_mm256_set_pd(4.0,4.0,4.0,4.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])); + const __m256d tmp_qloop_38 = _mm256_mul_pd(_mm256_set_pd(4.0,4.0,4.0,4.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])); + const __m256d tmp_qloop_39 = _mm256_add_pd(_mm256_add_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),tmp_qloop_37),tmp_qloop_38); + const __m256d tmp_qloop_40 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_39,_mm256_set_pd(jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE)),_mm256_mul_pd(tmp_qloop_39,_mm256_set_pd(jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE))); + const __m256d tmp_qloop_41 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_39,_mm256_set_pd(jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE)),_mm256_mul_pd(tmp_qloop_39,_mm256_set_pd(jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE))); + const __m256d tmp_qloop_42 = _mm256_mul_pd(tmp_qloop_37,_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])); + const __m256d tmp_qloop_43 = _mm256_mul_pd(_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])); + const __m256d tmp_qloop_44 = _mm256_mul_pd(tmp_qloop_43,_mm256_set_pd(2.0,2.0,2.0,2.0)); + const __m256d tmp_qloop_45 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),tmp_qloop_44); + const __m256d tmp_qloop_46 = _mm256_mul_pd(_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])); + const __m256d tmp_qloop_47 = _mm256_mul_pd(tmp_qloop_46,_mm256_set_pd(2.0,2.0,2.0,2.0)); + const __m256d tmp_qloop_48 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])),tmp_qloop_47); + const __m256d tmp_qloop_49 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_46,_mm256_set_pd(-4.0,-4.0,-4.0,-4.0)),_mm256_mul_pd(tmp_qloop_42,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_38); + const __m256d tmp_qloop_50 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_43,_mm256_set_pd(-4.0,-4.0,-4.0,-4.0)),_mm256_mul_pd(tmp_qloop_42,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_37); + const __m256d tmp_qloop_51 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),tmp_qloop_42),tmp_qloop_44),tmp_qloop_47); + const __m256d tmp_qloop_52 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_42,ux_dof_3),_mm256_mul_pd(tmp_qloop_45,ux_dof_1)),_mm256_mul_pd(tmp_qloop_48,ux_dof_2)),_mm256_mul_pd(tmp_qloop_49,ux_dof_4)),_mm256_mul_pd(tmp_qloop_50,ux_dof_5)),_mm256_mul_pd(tmp_qloop_51,ux_dof_0)); + const __m256d tmp_qloop_53 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_42,uy_dof_3),_mm256_mul_pd(tmp_qloop_45,uy_dof_1)),_mm256_mul_pd(tmp_qloop_48,uy_dof_2)),_mm256_mul_pd(tmp_qloop_49,uy_dof_4)),_mm256_mul_pd(tmp_qloop_50,uy_dof_5)),_mm256_mul_pd(tmp_qloop_51,uy_dof_0)); + const __m256d tmp_qloop_55 = _mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),tmp_qloop_37); + const __m256d tmp_qloop_56 = _mm256_mul_pd(tmp_qloop_55,_mm256_set_pd(jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE)); + const __m256d tmp_qloop_57 = _mm256_mul_pd(tmp_qloop_55,_mm256_set_pd(jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE)); + const __m256d tmp_qloop_58 = _mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),tmp_qloop_38); + const __m256d tmp_qloop_59 = _mm256_mul_pd(tmp_qloop_58,_mm256_set_pd(jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE)); + const __m256d tmp_qloop_60 = _mm256_mul_pd(tmp_qloop_58,_mm256_set_pd(jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE)); + const __m256d tmp_qloop_61 = _mm256_mul_pd(tmp_qloop_37,_mm256_set_pd(jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE)); + const __m256d tmp_qloop_62 = _mm256_mul_pd(tmp_qloop_38,_mm256_set_pd(jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE)); + const __m256d tmp_qloop_63 = _mm256_add_pd(tmp_qloop_61,tmp_qloop_62); + const __m256d tmp_qloop_64 = _mm256_mul_pd(tmp_qloop_37,_mm256_set_pd(jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE)); + const __m256d tmp_qloop_65 = _mm256_mul_pd(tmp_qloop_38,_mm256_set_pd(jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE)); + const __m256d tmp_qloop_66 = _mm256_add_pd(tmp_qloop_64,tmp_qloop_65); + const __m256d tmp_qloop_67 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-8.0,-8.0,-8.0,-8.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])),_mm256_mul_pd(tmp_qloop_37,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_set_pd(4.0,4.0,4.0,4.0)); + const __m256d tmp_qloop_68 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_62,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_67,_mm256_set_pd(jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE))); + const __m256d tmp_qloop_69 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_65,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_67,_mm256_set_pd(jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE))); + const __m256d tmp_qloop_70 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-8.0,-8.0,-8.0,-8.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(tmp_qloop_38,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_set_pd(4.0,4.0,4.0,4.0)); + const __m256d tmp_qloop_71 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_61,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_70,_mm256_set_pd(jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE))); + const __m256d tmp_qloop_72 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_64,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_70,_mm256_set_pd(jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE))); + const __m256d jac_blending_0_0 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_0,tmp_qloop_14),_mm256_mul_pd(tmp_qloop_18,tmp_qloop_4)); + const __m256d jac_blending_0_1 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_0,tmp_qloop_19),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_0,tmp_qloop_15),tmp_qloop_16),tmp_qloop_3),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))); + const __m256d jac_blending_1_0 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_14,tmp_qloop_3),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_20,tmp_qloop_3),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))); + const __m256d jac_blending_1_1 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_19,tmp_qloop_3),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_15,tmp_qloop_16),tmp_qloop_2),_mm256_set_pd(1.0,1.0,1.0,1.0))); + const __m256d tmp_qloop_21 = _mm256_add_pd(_mm256_mul_pd(jac_blending_0_0,jac_blending_1_1),_mm256_mul_pd(_mm256_mul_pd(jac_blending_0_1,jac_blending_1_0),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))); + const __m256d tmp_qloop_22 = _mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),tmp_qloop_21); + const __m256d abs_det_jac_blending = tmp_qloop_21; + const __m256d tmp_qloop_54 = _mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(abs_det_jac_blending,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(cp_times_delta_dof_0,tmp_qloop_51),_mm256_mul_pd(cp_times_delta_dof_1,tmp_qloop_45)),_mm256_mul_pd(cp_times_delta_dof_2,tmp_qloop_48)),_mm256_mul_pd(cp_times_delta_dof_3,tmp_qloop_42)),_mm256_mul_pd(cp_times_delta_dof_4,tmp_qloop_49)),_mm256_mul_pd(cp_times_delta_dof_5,tmp_qloop_50))),_mm256_set_pd(_data_q_w[q],_data_q_w[q],_data_q_w[q],_data_q_w[q])),_mm256_set_pd(abs_det_jac_affine_BLUE,abs_det_jac_affine_BLUE,abs_det_jac_affine_BLUE,abs_det_jac_affine_BLUE)); + const __m256d jac_blending_inv_0_0 = _mm256_mul_pd(jac_blending_1_1,tmp_qloop_22); + const __m256d jac_blending_inv_0_1 = _mm256_mul_pd(_mm256_mul_pd(jac_blending_0_1,tmp_qloop_22),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)); + const __m256d jac_blending_inv_1_0 = _mm256_mul_pd(_mm256_mul_pd(jac_blending_1_0,tmp_qloop_22),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)); + const __m256d jac_blending_inv_1_1 = _mm256_mul_pd(jac_blending_0_0,tmp_qloop_22); + const __m256d hessian_blending_0_0_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_25,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),tmp_qloop_14),tmp_qloop_28); + const __m256d hessian_blending_1_0_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_31,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_29,tmp_qloop_4)),_mm256_mul_pd(tmp_qloop_3,tmp_qloop_32)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_26,_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_3,tmp_qloop_3),tmp_qloop_3)),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))); + const __m256d hessian_blending_0_0_1 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_34,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_15,tmp_qloop_30),_mm256_set_pd(-2.0,-2.0,-2.0,-2.0)),_mm256_set_pd(tmp_qloop_1,tmp_qloop_1,tmp_qloop_1,tmp_qloop_1)),_mm256_set_pd(tmp_qloop_11,tmp_qloop_11,tmp_qloop_11,tmp_qloop_11))); + const __m256d hessian_blending_1_0_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_20,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_28,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(tmp_qloop_35,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_6,_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_1,tmp_qloop_1,tmp_qloop_1,tmp_qloop_1)),_mm256_set_pd(tmp_qloop_10,tmp_qloop_10,tmp_qloop_10,tmp_qloop_10)),_mm256_set_pd(tmp_qloop_9,tmp_qloop_9,tmp_qloop_9,tmp_qloop_9))); + const __m256d hessian_blending_0_1_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_19,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_31,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(tmp_qloop_34,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_15,tmp_qloop_2),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_10,tmp_qloop_10,tmp_qloop_10,tmp_qloop_10)),_mm256_set_pd(tmp_qloop_8,tmp_qloop_8,tmp_qloop_8,tmp_qloop_8)),_mm256_set_pd(tmp_qloop_9,tmp_qloop_9,tmp_qloop_9,tmp_qloop_9))); + const __m256d hessian_blending_1_1_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_20,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_35,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_27),tmp_qloop_36); + const __m256d hessian_blending_0_1_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_0,tmp_qloop_32),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_0,tmp_qloop_0),tmp_qloop_0),tmp_qloop_26),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_25),tmp_qloop_36); + const __m256d hessian_blending_1_1_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_19,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_33,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_15,tmp_qloop_2),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_10,tmp_qloop_10,tmp_qloop_10,tmp_qloop_10)),_mm256_set_pd(tmp_qloop_7,tmp_qloop_7,tmp_qloop_7,tmp_qloop_7)),_mm256_set_pd(tmp_qloop_9,tmp_qloop_9,tmp_qloop_9,tmp_qloop_9))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_15,tmp_qloop_4),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_10,tmp_qloop_10,tmp_qloop_10,tmp_qloop_10)),_mm256_set_pd(tmp_qloop_8,tmp_qloop_8,tmp_qloop_8,tmp_qloop_8)),_mm256_set_pd(tmp_qloop_9,tmp_qloop_9,tmp_qloop_9,tmp_qloop_9))); + const __m256d q_tmp_0_0 = _mm256_mul_pd(tmp_qloop_54,_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_52,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,tmp_qloop_40),_mm256_mul_pd(jac_blending_inv_1_0,tmp_qloop_41))),_mm256_mul_pd(tmp_qloop_53,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,tmp_qloop_40),_mm256_mul_pd(jac_blending_inv_1_1,tmp_qloop_41)))),_mm256_add_pd(_mm256_mul_pd(tmp_qloop_52,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,tmp_qloop_40),_mm256_mul_pd(jac_blending_inv_1_0,tmp_qloop_41))),_mm256_mul_pd(tmp_qloop_53,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,tmp_qloop_40),_mm256_mul_pd(jac_blending_inv_1_1,tmp_qloop_41)))))); + const __m256d q_tmp_1_1 = _mm256_mul_pd(tmp_qloop_54,_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_52,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,tmp_qloop_56),_mm256_mul_pd(jac_blending_inv_1_0,tmp_qloop_57))),_mm256_mul_pd(tmp_qloop_53,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,tmp_qloop_56),_mm256_mul_pd(jac_blending_inv_1_1,tmp_qloop_57)))),_mm256_add_pd(_mm256_mul_pd(tmp_qloop_52,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,tmp_qloop_56),_mm256_mul_pd(jac_blending_inv_1_0,tmp_qloop_57))),_mm256_mul_pd(tmp_qloop_53,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,tmp_qloop_56),_mm256_mul_pd(jac_blending_inv_1_1,tmp_qloop_57)))))); + const __m256d q_tmp_2_2 = _mm256_mul_pd(tmp_qloop_54,_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_52,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,tmp_qloop_59),_mm256_mul_pd(jac_blending_inv_1_0,tmp_qloop_60))),_mm256_mul_pd(tmp_qloop_53,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,tmp_qloop_59),_mm256_mul_pd(jac_blending_inv_1_1,tmp_qloop_60)))),_mm256_add_pd(_mm256_mul_pd(tmp_qloop_52,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,tmp_qloop_59),_mm256_mul_pd(jac_blending_inv_1_0,tmp_qloop_60))),_mm256_mul_pd(tmp_qloop_53,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,tmp_qloop_59),_mm256_mul_pd(jac_blending_inv_1_1,tmp_qloop_60)))))); + const __m256d q_tmp_3_3 = _mm256_mul_pd(tmp_qloop_54,_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_52,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,tmp_qloop_63),_mm256_mul_pd(jac_blending_inv_1_0,tmp_qloop_66))),_mm256_mul_pd(tmp_qloop_53,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,tmp_qloop_63),_mm256_mul_pd(jac_blending_inv_1_1,tmp_qloop_66)))),_mm256_add_pd(_mm256_mul_pd(tmp_qloop_52,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,tmp_qloop_63),_mm256_mul_pd(jac_blending_inv_1_0,tmp_qloop_66))),_mm256_mul_pd(tmp_qloop_53,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,tmp_qloop_63),_mm256_mul_pd(jac_blending_inv_1_1,tmp_qloop_66)))))); + const __m256d q_tmp_4_4 = _mm256_mul_pd(tmp_qloop_54,_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_52,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,tmp_qloop_68),_mm256_mul_pd(jac_blending_inv_1_0,tmp_qloop_69))),_mm256_mul_pd(tmp_qloop_53,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,tmp_qloop_68),_mm256_mul_pd(jac_blending_inv_1_1,tmp_qloop_69)))),_mm256_add_pd(_mm256_mul_pd(tmp_qloop_52,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,tmp_qloop_68),_mm256_mul_pd(jac_blending_inv_1_0,tmp_qloop_69))),_mm256_mul_pd(tmp_qloop_53,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,tmp_qloop_68),_mm256_mul_pd(jac_blending_inv_1_1,tmp_qloop_69)))))); + const __m256d q_tmp_5_5 = _mm256_mul_pd(tmp_qloop_54,_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_52,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,tmp_qloop_71),_mm256_mul_pd(jac_blending_inv_1_0,tmp_qloop_72))),_mm256_mul_pd(tmp_qloop_53,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,tmp_qloop_71),_mm256_mul_pd(jac_blending_inv_1_1,tmp_qloop_72)))),_mm256_add_pd(_mm256_mul_pd(tmp_qloop_52,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,tmp_qloop_71),_mm256_mul_pd(jac_blending_inv_1_0,tmp_qloop_72))),_mm256_mul_pd(tmp_qloop_53,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,tmp_qloop_71),_mm256_mul_pd(jac_blending_inv_1_1,tmp_qloop_72)))))); + q_acc_0_0 = _mm256_add_pd(q_acc_0_0,q_tmp_0_0); + q_acc_1_1 = _mm256_add_pd(q_acc_1_1,q_tmp_1_1); + q_acc_2_2 = _mm256_add_pd(q_acc_2_2,q_tmp_2_2); + q_acc_3_3 = _mm256_add_pd(q_acc_3_3,q_tmp_3_3); + q_acc_4_4 = _mm256_add_pd(q_acc_4_4,q_tmp_4_4); + q_acc_5_5 = _mm256_add_pd(q_acc_5_5,q_tmp_5_5); + } + const __m256d elMatDiag_0 = q_acc_0_0; + const __m256d elMatDiag_1 = q_acc_1_1; + const __m256d elMatDiag_2 = q_acc_2_2; + const __m256d elMatDiag_3 = q_acc_3_3; + const __m256d elMatDiag_4 = q_acc_4_4; + const __m256d elMatDiag_5 = q_acc_5_5; + _mm256_storeu_pd(&_data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1],_mm256_add_pd(elMatDiag_0,_mm256_loadu_pd(& _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]))); + _mm256_storeu_pd(&_data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))],_mm256_add_pd(elMatDiag_1,_mm256_loadu_pd(& _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]))); + _mm256_storeu_pd(&_data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1],_mm256_add_pd(elMatDiag_2,_mm256_loadu_pd(& _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]))); + _mm256_storeu_pd(&_data_invDiag_Edge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))],_mm256_add_pd(elMatDiag_3,_mm256_loadu_pd(& _data_invDiag_Edge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]))); + _mm256_storeu_pd(&_data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1],_mm256_add_pd(elMatDiag_4,_mm256_loadu_pd(& _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]))); + _mm256_storeu_pd(&_data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))],_mm256_add_pd(elMatDiag_5,_mm256_loadu_pd(& _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]))); + } + for (int64_t ctr_0 = (int64_t)((-ctr_1 + micro_edges_per_macro_edge - 1) / (4)) * (4); ctr_0 < -ctr_1 + micro_edges_per_macro_edge - 1; ctr_0 += 1) + { + + const int64_t phantom_ctr_0 = ctr_0; + real_t _data_float_loop_ctr_array_dim_0[4]; + _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0; + _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1; + _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2; + _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3; + real_t _data_float_loop_ctr_array_dim_1[4]; + _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1; + + const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; + const real_t cp_times_delta_dof_0 = _data_cp_times_deltaVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + const real_t cp_times_delta_dof_1 = _data_cp_times_deltaVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t cp_times_delta_dof_2 = _data_cp_times_deltaVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]; + const real_t cp_times_delta_dof_3 = _data_cp_times_deltaEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t cp_times_delta_dof_4 = _data_cp_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]; + const real_t cp_times_delta_dof_5 = _data_cp_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t ux_dof_0 = _data_uxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + const real_t ux_dof_1 = _data_uxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t ux_dof_2 = _data_uxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]; + const real_t ux_dof_3 = _data_uxEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t ux_dof_4 = _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]; + const real_t ux_dof_5 = _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t uy_dof_0 = _data_uyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + const real_t uy_dof_1 = _data_uyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t uy_dof_2 = _data_uyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]; + const real_t uy_dof_3 = _data_uyEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t uy_dof_4 = _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]; + const real_t uy_dof_5 = _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + real_t q_acc_0_0 = 0.0; + real_t q_acc_1_1 = 0.0; + real_t q_acc_2_2 = 0.0; + real_t q_acc_3_3 = 0.0; + real_t q_acc_4_4 = 0.0; + real_t q_acc_5_5 = 0.0; + for (int64_t q = 0; q < 4; q += 1) + { + const real_t tmp_qloop_0 = p_affine_0_0 + (-p_affine_0_0 + p_affine_1_0)*_data_q_p_0[q] + (-p_affine_0_0 + p_affine_2_0)*_data_q_p_1[q]; + const real_t tmp_qloop_2 = (tmp_qloop_0*tmp_qloop_0); + const real_t tmp_qloop_3 = p_affine_0_1 + (-p_affine_0_1 + p_affine_1_1)*_data_q_p_0[q] + (-p_affine_0_1 + p_affine_2_1)*_data_q_p_1[q]; + const real_t tmp_qloop_4 = (tmp_qloop_3*tmp_qloop_3); + const real_t tmp_qloop_5 = tmp_qloop_2 + tmp_qloop_4; + const real_t tmp_qloop_6 = pow(tmp_qloop_5, -0.50000000000000000); + const real_t tmp_qloop_13 = tmp_qloop_12*tmp_qloop_6; + const real_t tmp_qloop_14 = tmp_qloop_1*tmp_qloop_13; + const real_t tmp_qloop_15 = pow(tmp_qloop_5, -1.5000000000000000); + const real_t tmp_qloop_16 = radRayVertex + tmp_qloop_11*(tmp_qloop_1*(-rayVertex_0 + tmp_qloop_0) - tmp_qloop_8*(-rayVertex_1 + tmp_qloop_3)); + const real_t tmp_qloop_17 = tmp_qloop_15*tmp_qloop_16; + const real_t tmp_qloop_18 = tmp_qloop_17*1.0; + const real_t tmp_qloop_19 = tmp_qloop_13*tmp_qloop_8; + const real_t tmp_qloop_20 = tmp_qloop_0*tmp_qloop_18; + const real_t tmp_qloop_23 = tmp_qloop_12*tmp_qloop_15; + const real_t tmp_qloop_24 = tmp_qloop_1*tmp_qloop_23; + const real_t tmp_qloop_25 = tmp_qloop_2*tmp_qloop_24; + const real_t tmp_qloop_26 = tmp_qloop_16*pow(tmp_qloop_5, -2.5000000000000000)*3.0; + const real_t tmp_qloop_27 = tmp_qloop_0*tmp_qloop_26*tmp_qloop_4; + const real_t tmp_qloop_28 = tmp_qloop_24*tmp_qloop_4 - tmp_qloop_27; + const real_t tmp_qloop_29 = tmp_qloop_23*tmp_qloop_7; + const real_t tmp_qloop_30 = tmp_qloop_0*tmp_qloop_3; + const real_t tmp_qloop_31 = tmp_qloop_24*tmp_qloop_30; + const real_t tmp_qloop_32 = tmp_qloop_17*2.0; + const real_t tmp_qloop_33 = tmp_qloop_2*tmp_qloop_26*tmp_qloop_3; + const real_t tmp_qloop_34 = tmp_qloop_18*tmp_qloop_3 - tmp_qloop_33; + const real_t tmp_qloop_35 = tmp_qloop_29*tmp_qloop_30; + const real_t tmp_qloop_36 = tmp_qloop_23*tmp_qloop_30*tmp_qloop_8; + const real_t tmp_qloop_37 = 4.0*_data_q_p_0[q]; + const real_t tmp_qloop_38 = 4.0*_data_q_p_1[q]; + const real_t tmp_qloop_39 = tmp_qloop_37 + tmp_qloop_38 - 3.0; + const real_t tmp_qloop_40 = jac_affine_inv_0_0_BLUE*tmp_qloop_39 + jac_affine_inv_1_0_BLUE*tmp_qloop_39; + const real_t tmp_qloop_41 = jac_affine_inv_0_1_BLUE*tmp_qloop_39 + jac_affine_inv_1_1_BLUE*tmp_qloop_39; + const real_t tmp_qloop_42 = tmp_qloop_37*_data_q_p_1[q]; + const real_t tmp_qloop_43 = (_data_q_p_0[q]*_data_q_p_0[q]); + const real_t tmp_qloop_44 = tmp_qloop_43*2.0; + const real_t tmp_qloop_45 = tmp_qloop_44 - _data_q_p_0[q]; + const real_t tmp_qloop_46 = (_data_q_p_1[q]*_data_q_p_1[q]); + const real_t tmp_qloop_47 = tmp_qloop_46*2.0; + const real_t tmp_qloop_48 = tmp_qloop_47 - _data_q_p_1[q]; + const real_t tmp_qloop_49 = tmp_qloop_38 - tmp_qloop_42 + tmp_qloop_46*-4.0; + const real_t tmp_qloop_50 = tmp_qloop_37 - tmp_qloop_42 + tmp_qloop_43*-4.0; + const real_t tmp_qloop_51 = tmp_qloop_42 + tmp_qloop_44 + tmp_qloop_47 - 3.0*_data_q_p_0[q] - 3.0*_data_q_p_1[q] + 1.0; + const real_t tmp_qloop_52 = tmp_qloop_42*ux_dof_3 + tmp_qloop_45*ux_dof_1 + tmp_qloop_48*ux_dof_2 + tmp_qloop_49*ux_dof_4 + tmp_qloop_50*ux_dof_5 + tmp_qloop_51*ux_dof_0; + const real_t tmp_qloop_53 = tmp_qloop_42*uy_dof_3 + tmp_qloop_45*uy_dof_1 + tmp_qloop_48*uy_dof_2 + tmp_qloop_49*uy_dof_4 + tmp_qloop_50*uy_dof_5 + tmp_qloop_51*uy_dof_0; + const real_t tmp_qloop_55 = tmp_qloop_37 - 1.0; + const real_t tmp_qloop_56 = jac_affine_inv_0_0_BLUE*tmp_qloop_55; + const real_t tmp_qloop_57 = jac_affine_inv_0_1_BLUE*tmp_qloop_55; + const real_t tmp_qloop_58 = tmp_qloop_38 - 1.0; + const real_t tmp_qloop_59 = jac_affine_inv_1_0_BLUE*tmp_qloop_58; + const real_t tmp_qloop_60 = jac_affine_inv_1_1_BLUE*tmp_qloop_58; + const real_t tmp_qloop_61 = jac_affine_inv_1_0_BLUE*tmp_qloop_37; + const real_t tmp_qloop_62 = jac_affine_inv_0_0_BLUE*tmp_qloop_38; + const real_t tmp_qloop_63 = tmp_qloop_61 + tmp_qloop_62; + const real_t tmp_qloop_64 = jac_affine_inv_1_1_BLUE*tmp_qloop_37; + const real_t tmp_qloop_65 = jac_affine_inv_0_1_BLUE*tmp_qloop_38; + const real_t tmp_qloop_66 = tmp_qloop_64 + tmp_qloop_65; + const real_t tmp_qloop_67 = -tmp_qloop_37 - 8.0*_data_q_p_1[q] + 4.0; + const real_t tmp_qloop_68 = jac_affine_inv_1_0_BLUE*tmp_qloop_67 - tmp_qloop_62; + const real_t tmp_qloop_69 = jac_affine_inv_1_1_BLUE*tmp_qloop_67 - tmp_qloop_65; + const real_t tmp_qloop_70 = -tmp_qloop_38 - 8.0*_data_q_p_0[q] + 4.0; + const real_t tmp_qloop_71 = jac_affine_inv_0_0_BLUE*tmp_qloop_70 - tmp_qloop_61; + const real_t tmp_qloop_72 = jac_affine_inv_0_1_BLUE*tmp_qloop_70 - tmp_qloop_64; + const real_t jac_blending_0_0 = tmp_qloop_0*tmp_qloop_14 + tmp_qloop_18*tmp_qloop_4; + const real_t jac_blending_0_1 = -tmp_qloop_0*tmp_qloop_15*tmp_qloop_16*tmp_qloop_3 - tmp_qloop_0*tmp_qloop_19; + const real_t jac_blending_1_0 = tmp_qloop_14*tmp_qloop_3 - tmp_qloop_20*tmp_qloop_3; + const real_t jac_blending_1_1 = tmp_qloop_15*tmp_qloop_16*tmp_qloop_2*1.0 - tmp_qloop_19*tmp_qloop_3; + const real_t tmp_qloop_21 = jac_blending_0_0*jac_blending_1_1 - jac_blending_0_1*jac_blending_1_0; + const real_t tmp_qloop_22 = 1.0 / (tmp_qloop_21); + const real_t abs_det_jac_blending = tmp_qloop_21; + const real_t tmp_qloop_54 = abs_det_jac_affine_BLUE*abs_det_jac_blending*(cp_times_delta_dof_0*tmp_qloop_51 + cp_times_delta_dof_1*tmp_qloop_45 + cp_times_delta_dof_2*tmp_qloop_48 + cp_times_delta_dof_3*tmp_qloop_42 + cp_times_delta_dof_4*tmp_qloop_49 + cp_times_delta_dof_5*tmp_qloop_50)*_data_q_w[q]; + const real_t jac_blending_inv_0_0 = jac_blending_1_1*tmp_qloop_22; + const real_t jac_blending_inv_0_1 = -jac_blending_0_1*tmp_qloop_22; + const real_t jac_blending_inv_1_0 = -jac_blending_1_0*tmp_qloop_22; + const real_t jac_blending_inv_1_1 = jac_blending_0_0*tmp_qloop_22; + const real_t hessian_blending_0_0_0 = tmp_qloop_14 - tmp_qloop_25 + tmp_qloop_28; + const real_t hessian_blending_1_0_0 = -tmp_qloop_26*(tmp_qloop_3*tmp_qloop_3*tmp_qloop_3) + tmp_qloop_29*tmp_qloop_4 + tmp_qloop_3*tmp_qloop_32 - tmp_qloop_31; + const real_t hessian_blending_0_0_1 = tmp_qloop_1*tmp_qloop_11*tmp_qloop_15*tmp_qloop_30*-2.0 - tmp_qloop_34; + const real_t hessian_blending_1_0_1 = tmp_qloop_1*tmp_qloop_10*tmp_qloop_6*tmp_qloop_9*1.0 - tmp_qloop_20 - tmp_qloop_28 - tmp_qloop_35; + const real_t hessian_blending_0_1_0 = tmp_qloop_10*tmp_qloop_15*tmp_qloop_2*tmp_qloop_8*tmp_qloop_9*1.0 - tmp_qloop_19 - tmp_qloop_31 - tmp_qloop_34; + const real_t hessian_blending_1_1_0 = -tmp_qloop_20 + tmp_qloop_27 - tmp_qloop_35 + tmp_qloop_36; + const real_t hessian_blending_0_1_1 = tmp_qloop_0*tmp_qloop_32 - (tmp_qloop_0*tmp_qloop_0*tmp_qloop_0)*tmp_qloop_26 + tmp_qloop_25 + tmp_qloop_36; + const real_t hessian_blending_1_1_1 = tmp_qloop_10*tmp_qloop_15*tmp_qloop_2*tmp_qloop_7*tmp_qloop_9*1.0 + tmp_qloop_10*tmp_qloop_15*tmp_qloop_4*tmp_qloop_8*tmp_qloop_9*1.0 - tmp_qloop_19 - tmp_qloop_33; + const real_t q_tmp_0_0 = tmp_qloop_54*((tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_40 + jac_blending_inv_1_0*tmp_qloop_41) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_40 + jac_blending_inv_1_1*tmp_qloop_41))*(tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_40 + jac_blending_inv_1_0*tmp_qloop_41) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_40 + jac_blending_inv_1_1*tmp_qloop_41))); + const real_t q_tmp_1_1 = tmp_qloop_54*((tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_56 + jac_blending_inv_1_0*tmp_qloop_57) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_56 + jac_blending_inv_1_1*tmp_qloop_57))*(tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_56 + jac_blending_inv_1_0*tmp_qloop_57) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_56 + jac_blending_inv_1_1*tmp_qloop_57))); + const real_t q_tmp_2_2 = tmp_qloop_54*((tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_59 + jac_blending_inv_1_0*tmp_qloop_60) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_59 + jac_blending_inv_1_1*tmp_qloop_60))*(tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_59 + jac_blending_inv_1_0*tmp_qloop_60) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_59 + jac_blending_inv_1_1*tmp_qloop_60))); + const real_t q_tmp_3_3 = tmp_qloop_54*((tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_63 + jac_blending_inv_1_0*tmp_qloop_66) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_63 + jac_blending_inv_1_1*tmp_qloop_66))*(tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_63 + jac_blending_inv_1_0*tmp_qloop_66) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_63 + jac_blending_inv_1_1*tmp_qloop_66))); + const real_t q_tmp_4_4 = tmp_qloop_54*((tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_68 + jac_blending_inv_1_0*tmp_qloop_69) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_68 + jac_blending_inv_1_1*tmp_qloop_69))*(tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_68 + jac_blending_inv_1_0*tmp_qloop_69) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_68 + jac_blending_inv_1_1*tmp_qloop_69))); + const real_t q_tmp_5_5 = tmp_qloop_54*((tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_71 + jac_blending_inv_1_0*tmp_qloop_72) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_71 + jac_blending_inv_1_1*tmp_qloop_72))*(tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_71 + jac_blending_inv_1_0*tmp_qloop_72) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_71 + jac_blending_inv_1_1*tmp_qloop_72))); + q_acc_0_0 = q_acc_0_0 + q_tmp_0_0; + q_acc_1_1 = q_acc_1_1 + q_tmp_1_1; + q_acc_2_2 = q_acc_2_2 + q_tmp_2_2; + q_acc_3_3 = q_acc_3_3 + q_tmp_3_3; + q_acc_4_4 = q_acc_4_4 + q_tmp_4_4; + q_acc_5_5 = q_acc_5_5 + q_tmp_5_5; + } + const real_t elMatDiag_0 = q_acc_0_0; + const real_t elMatDiag_1 = q_acc_1_1; + const real_t elMatDiag_2 = q_acc_2_2; + const real_t elMatDiag_3 = q_acc_3_3; + const real_t elMatDiag_4 = q_acc_4_4; + const real_t elMatDiag_5 = q_acc_5_5; + _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1] = elMatDiag_0 + _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatDiag_1 + _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1] = elMatDiag_2 + _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]; + _data_invDiag_Edge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatDiag_3 + _data_invDiag_Edge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1] = elMatDiag_4 + _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]; + _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))] = elMatDiag_5 + _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + } + } + } + } +} +} // namespace operatorgeneration + +} // namespace hyteg diff --git a/operators/supg_advection/avx/P2ElementwiseSupgAdvection_apply_P2ElementwiseSupgAdvection_macro_2D.cpp b/operators/supg_advection/avx/P2ElementwiseSupgAdvection_apply_P2ElementwiseSupgAdvection_macro_2D.cpp new file mode 100644 index 0000000000000000000000000000000000000000..8a155120548926473fedd0755ac7b832dd89d364 --- /dev/null +++ b/operators/supg_advection/avx/P2ElementwiseSupgAdvection_apply_P2ElementwiseSupgAdvection_macro_2D.cpp @@ -0,0 +1,1000 @@ +/* +* Copyright (c) 2017-2024 Nils Kohl, Daniel Bauer, Fabian Böhm. +* +* This file is part of HyTeG +* (see https://i10git.cs.fau.de/hyteg/hyteg). +* +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. +* +* You should have received a copy of the GNU General Public License +* along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +/* +* The entire file was generated with the HyTeG Operator Generator. +* +* Avoid modifying this file. If buggy, consider fixing the generator itself. +*/ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +#include "../P2ElementwiseSupgAdvection.hpp" + +#define FUNC_PREFIX + +namespace hyteg { + +namespace operatorgeneration { + +void P2ElementwiseSupgAdvection::apply_P2ElementwiseSupgAdvection_macro_2D( real_t * RESTRICT _data_cp_times_deltaEdge, real_t * RESTRICT _data_cp_times_deltaVertex, real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t * RESTRICT _data_uxEdge, real_t * RESTRICT _data_uxVertex, real_t * RESTRICT _data_uyEdge, real_t * RESTRICT _data_uyVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +{ + { + const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669}; + + const real_t _data_q_p_0 [] = {0.33333333333333331, 0.20000000000000001, 0.59999999999999998, 0.20000000000000001}; + + const real_t _data_q_p_1 [] = {0.33333333333333331, 0.59999999999999998, 0.20000000000000001, 0.20000000000000001}; + + const real_t tmp_coords_jac_0_GRAY = 1.0 / (micro_edges_per_macro_edge_float)*1.0; + const real_t p_affine_const_0_0_GRAY = macro_vertex_coord_id_0comp0; + const real_t p_affine_const_0_1_GRAY = macro_vertex_coord_id_0comp1; + const real_t p_affine_const_1_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0); + const real_t p_affine_const_1_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1); + const real_t p_affine_const_2_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0); + const real_t p_affine_const_2_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1); + const real_t jac_affine_0_0_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_1_0_GRAY; + const real_t jac_affine_0_1_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_2_0_GRAY; + const real_t jac_affine_1_0_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_1_1_GRAY; + const real_t jac_affine_1_1_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_2_1_GRAY; + const real_t tmp_coords_jac_1_GRAY = jac_affine_0_0_GRAY*jac_affine_1_1_GRAY - jac_affine_0_1_GRAY*jac_affine_1_0_GRAY; + const real_t tmp_coords_jac_2_GRAY = 1.0 / (tmp_coords_jac_1_GRAY); + const real_t jac_affine_inv_0_0_GRAY = jac_affine_1_1_GRAY*tmp_coords_jac_2_GRAY; + const real_t jac_affine_inv_0_1_GRAY = -jac_affine_0_1_GRAY*tmp_coords_jac_2_GRAY; + const real_t jac_affine_inv_1_0_GRAY = -jac_affine_1_0_GRAY*tmp_coords_jac_2_GRAY; + const real_t jac_affine_inv_1_1_GRAY = jac_affine_0_0_GRAY*tmp_coords_jac_2_GRAY; + const real_t abs_det_jac_affine_GRAY = abs(tmp_coords_jac_1_GRAY); + { + /* FaceType.GRAY */ + for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1) + { + for (int64_t ctr_0 = 0; ctr_0 < (int64_t)((-ctr_1 + micro_edges_per_macro_edge) / (4)) * (4); ctr_0 += 4) + { + + const int64_t phantom_ctr_0 = ctr_0; + real_t _data_float_loop_ctr_array_dim_0[4]; + _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0; + _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1; + _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2; + _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3; + real_t _data_float_loop_ctr_array_dim_1[4]; + _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1; + + const __m256d p_affine_0_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)); + const __m256d p_affine_0_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)); + const __m256d p_affine_1_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)); + const __m256d p_affine_1_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)); + const __m256d p_affine_2_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)); + const __m256d p_affine_2_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)); + const __m256d src_dof_0 = _mm256_loadu_pd(& _data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]); + const __m256d src_dof_1 = _mm256_loadu_pd(& _data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]); + const __m256d src_dof_2 = _mm256_loadu_pd(& _data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]); + const __m256d src_dof_3 = _mm256_loadu_pd(& _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]); + const __m256d src_dof_4 = _mm256_loadu_pd(& _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]); + const __m256d src_dof_5 = _mm256_loadu_pd(& _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]); + const __m256d cp_times_delta_dof_0 = _mm256_loadu_pd(& _data_cp_times_deltaVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]); + const __m256d cp_times_delta_dof_1 = _mm256_loadu_pd(& _data_cp_times_deltaVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]); + const __m256d cp_times_delta_dof_2 = _mm256_loadu_pd(& _data_cp_times_deltaVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]); + const __m256d cp_times_delta_dof_3 = _mm256_loadu_pd(& _data_cp_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]); + const __m256d cp_times_delta_dof_4 = _mm256_loadu_pd(& _data_cp_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]); + const __m256d cp_times_delta_dof_5 = _mm256_loadu_pd(& _data_cp_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]); + const __m256d ux_dof_0 = _mm256_loadu_pd(& _data_uxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]); + const __m256d ux_dof_1 = _mm256_loadu_pd(& _data_uxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]); + const __m256d ux_dof_2 = _mm256_loadu_pd(& _data_uxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]); + const __m256d ux_dof_3 = _mm256_loadu_pd(& _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]); + const __m256d ux_dof_4 = _mm256_loadu_pd(& _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]); + const __m256d ux_dof_5 = _mm256_loadu_pd(& _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]); + const __m256d uy_dof_0 = _mm256_loadu_pd(& _data_uyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]); + const __m256d uy_dof_1 = _mm256_loadu_pd(& _data_uyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]); + const __m256d uy_dof_2 = _mm256_loadu_pd(& _data_uyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]); + const __m256d uy_dof_3 = _mm256_loadu_pd(& _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]); + const __m256d uy_dof_4 = _mm256_loadu_pd(& _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]); + const __m256d uy_dof_5 = _mm256_loadu_pd(& _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]); + __m256d q_acc_0_0 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_1 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_2 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_3 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_4 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_5 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_0 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_1 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_2 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_3 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_4 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_5 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_0 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_1 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_2 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_3 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_4 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_5 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_0 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_1 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_2 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_3 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_4 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_5 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_4_0 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_4_1 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_4_2 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_4_3 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_4_4 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_4_5 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_5_0 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_5_1 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_5_2 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_5_3 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_5_4 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_5_5 = _mm256_set_pd(0.0,0.0,0.0,0.0); + for (int64_t q = 0; q < 4; q += 1) + { + const __m256d tmp_qloop_0 = _mm256_mul_pd(_mm256_set_pd(4.0,4.0,4.0,4.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])); + const __m256d tmp_qloop_1 = _mm256_mul_pd(_mm256_set_pd(4.0,4.0,4.0,4.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])); + const __m256d tmp_qloop_2 = _mm256_add_pd(_mm256_add_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),tmp_qloop_0),tmp_qloop_1); + const __m256d tmp_qloop_3 = _mm256_mul_pd(tmp_qloop_0,_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])); + const __m256d tmp_qloop_4 = _mm256_mul_pd(_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])); + const __m256d tmp_qloop_5 = _mm256_mul_pd(tmp_qloop_4,_mm256_set_pd(2.0,2.0,2.0,2.0)); + const __m256d tmp_qloop_6 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),tmp_qloop_5); + const __m256d tmp_qloop_7 = _mm256_mul_pd(_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])); + const __m256d tmp_qloop_8 = _mm256_mul_pd(tmp_qloop_7,_mm256_set_pd(2.0,2.0,2.0,2.0)); + const __m256d tmp_qloop_9 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])),tmp_qloop_8); + const __m256d tmp_qloop_10 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_7,_mm256_set_pd(-4.0,-4.0,-4.0,-4.0)),_mm256_mul_pd(tmp_qloop_3,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_1); + const __m256d tmp_qloop_11 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_4,_mm256_set_pd(-4.0,-4.0,-4.0,-4.0)),_mm256_mul_pd(tmp_qloop_3,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_0); + const __m256d tmp_qloop_12 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),tmp_qloop_3),tmp_qloop_5),tmp_qloop_8); + const __m256d tmp_qloop_13 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_10,ux_dof_4),_mm256_mul_pd(tmp_qloop_11,ux_dof_5)),_mm256_mul_pd(tmp_qloop_12,ux_dof_0)),_mm256_mul_pd(tmp_qloop_3,ux_dof_3)),_mm256_mul_pd(tmp_qloop_6,ux_dof_1)),_mm256_mul_pd(tmp_qloop_9,ux_dof_2)); + const __m256d tmp_qloop_14 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_10,uy_dof_4),_mm256_mul_pd(tmp_qloop_11,uy_dof_5)),_mm256_mul_pd(tmp_qloop_12,uy_dof_0)),_mm256_mul_pd(tmp_qloop_3,uy_dof_3)),_mm256_mul_pd(tmp_qloop_6,uy_dof_1)),_mm256_mul_pd(tmp_qloop_9,uy_dof_2)); + const __m256d tmp_qloop_15 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_13,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_2,_mm256_set_pd(jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY)),_mm256_mul_pd(tmp_qloop_2,_mm256_set_pd(jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY)))),_mm256_mul_pd(tmp_qloop_14,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_2,_mm256_set_pd(jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY)),_mm256_mul_pd(tmp_qloop_2,_mm256_set_pd(jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY))))); + const __m256d tmp_qloop_16 = _mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(cp_times_delta_dof_0,tmp_qloop_12),_mm256_mul_pd(cp_times_delta_dof_1,tmp_qloop_6)),_mm256_mul_pd(cp_times_delta_dof_2,tmp_qloop_9)),_mm256_mul_pd(cp_times_delta_dof_3,tmp_qloop_3)),_mm256_mul_pd(cp_times_delta_dof_4,tmp_qloop_10)),_mm256_mul_pd(cp_times_delta_dof_5,tmp_qloop_11)),_mm256_set_pd(_data_q_w[q],_data_q_w[q],_data_q_w[q],_data_q_w[q])),_mm256_set_pd(abs_det_jac_affine_GRAY,abs_det_jac_affine_GRAY,abs_det_jac_affine_GRAY,abs_det_jac_affine_GRAY)); + const __m256d tmp_qloop_17 = _mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),tmp_qloop_0); + const __m256d tmp_qloop_18 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_13,tmp_qloop_17),_mm256_set_pd(jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_14,tmp_qloop_17),_mm256_set_pd(jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY))); + const __m256d tmp_qloop_19 = _mm256_mul_pd(tmp_qloop_15,tmp_qloop_16); + const __m256d tmp_qloop_20 = _mm256_mul_pd(tmp_qloop_18,tmp_qloop_19); + const __m256d tmp_qloop_21 = _mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),tmp_qloop_1); + const __m256d tmp_qloop_22 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_13,tmp_qloop_21),_mm256_set_pd(jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_14,tmp_qloop_21),_mm256_set_pd(jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY))); + const __m256d tmp_qloop_23 = _mm256_mul_pd(tmp_qloop_19,tmp_qloop_22); + const __m256d tmp_qloop_24 = _mm256_mul_pd(tmp_qloop_0,_mm256_set_pd(jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY)); + const __m256d tmp_qloop_25 = _mm256_mul_pd(tmp_qloop_1,_mm256_set_pd(jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY)); + const __m256d tmp_qloop_26 = _mm256_mul_pd(tmp_qloop_0,_mm256_set_pd(jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY)); + const __m256d tmp_qloop_27 = _mm256_mul_pd(tmp_qloop_1,_mm256_set_pd(jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY)); + const __m256d tmp_qloop_28 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_13,_mm256_add_pd(tmp_qloop_24,tmp_qloop_25)),_mm256_mul_pd(tmp_qloop_14,_mm256_add_pd(tmp_qloop_26,tmp_qloop_27))); + const __m256d tmp_qloop_29 = _mm256_mul_pd(tmp_qloop_19,tmp_qloop_28); + const __m256d tmp_qloop_30 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-8.0,-8.0,-8.0,-8.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])),_mm256_mul_pd(tmp_qloop_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_set_pd(4.0,4.0,4.0,4.0)); + const __m256d tmp_qloop_31 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_13,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_25,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_30,_mm256_set_pd(jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY)))),_mm256_mul_pd(tmp_qloop_14,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_27,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_30,_mm256_set_pd(jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY))))); + const __m256d tmp_qloop_32 = _mm256_mul_pd(tmp_qloop_19,tmp_qloop_31); + const __m256d tmp_qloop_33 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-8.0,-8.0,-8.0,-8.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(tmp_qloop_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_set_pd(4.0,4.0,4.0,4.0)); + const __m256d tmp_qloop_34 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_13,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_24,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_33,_mm256_set_pd(jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY)))),_mm256_mul_pd(tmp_qloop_14,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_26,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_33,_mm256_set_pd(jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY))))); + const __m256d tmp_qloop_35 = _mm256_mul_pd(tmp_qloop_19,tmp_qloop_34); + const __m256d tmp_qloop_36 = _mm256_mul_pd(tmp_qloop_16,tmp_qloop_18); + const __m256d tmp_qloop_37 = _mm256_mul_pd(tmp_qloop_22,tmp_qloop_36); + const __m256d tmp_qloop_38 = _mm256_mul_pd(tmp_qloop_28,tmp_qloop_36); + const __m256d tmp_qloop_39 = _mm256_mul_pd(tmp_qloop_31,tmp_qloop_36); + const __m256d tmp_qloop_40 = _mm256_mul_pd(tmp_qloop_34,tmp_qloop_36); + const __m256d tmp_qloop_41 = _mm256_mul_pd(tmp_qloop_16,tmp_qloop_22); + const __m256d tmp_qloop_42 = _mm256_mul_pd(tmp_qloop_28,tmp_qloop_41); + const __m256d tmp_qloop_43 = _mm256_mul_pd(tmp_qloop_31,tmp_qloop_41); + const __m256d tmp_qloop_44 = _mm256_mul_pd(tmp_qloop_34,tmp_qloop_41); + const __m256d tmp_qloop_45 = _mm256_mul_pd(tmp_qloop_16,tmp_qloop_28); + const __m256d tmp_qloop_46 = _mm256_mul_pd(tmp_qloop_31,tmp_qloop_45); + const __m256d tmp_qloop_47 = _mm256_mul_pd(tmp_qloop_34,tmp_qloop_45); + const __m256d tmp_qloop_48 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_16,tmp_qloop_31),tmp_qloop_34); + const __m256d q_tmp_0_0 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_15,tmp_qloop_15),tmp_qloop_16); + const __m256d q_tmp_0_1 = tmp_qloop_20; + const __m256d q_tmp_0_2 = tmp_qloop_23; + const __m256d q_tmp_0_3 = tmp_qloop_29; + const __m256d q_tmp_0_4 = tmp_qloop_32; + const __m256d q_tmp_0_5 = tmp_qloop_35; + const __m256d q_tmp_1_0 = tmp_qloop_20; + const __m256d q_tmp_1_1 = _mm256_mul_pd(tmp_qloop_16,_mm256_mul_pd(tmp_qloop_18,tmp_qloop_18)); + const __m256d q_tmp_1_2 = tmp_qloop_37; + const __m256d q_tmp_1_3 = tmp_qloop_38; + const __m256d q_tmp_1_4 = tmp_qloop_39; + const __m256d q_tmp_1_5 = tmp_qloop_40; + const __m256d q_tmp_2_0 = tmp_qloop_23; + const __m256d q_tmp_2_1 = tmp_qloop_37; + const __m256d q_tmp_2_2 = _mm256_mul_pd(tmp_qloop_16,_mm256_mul_pd(tmp_qloop_22,tmp_qloop_22)); + const __m256d q_tmp_2_3 = tmp_qloop_42; + const __m256d q_tmp_2_4 = tmp_qloop_43; + const __m256d q_tmp_2_5 = tmp_qloop_44; + const __m256d q_tmp_3_0 = tmp_qloop_29; + const __m256d q_tmp_3_1 = tmp_qloop_38; + const __m256d q_tmp_3_2 = tmp_qloop_42; + const __m256d q_tmp_3_3 = _mm256_mul_pd(tmp_qloop_16,_mm256_mul_pd(tmp_qloop_28,tmp_qloop_28)); + const __m256d q_tmp_3_4 = tmp_qloop_46; + const __m256d q_tmp_3_5 = tmp_qloop_47; + const __m256d q_tmp_4_0 = tmp_qloop_32; + const __m256d q_tmp_4_1 = tmp_qloop_39; + const __m256d q_tmp_4_2 = tmp_qloop_43; + const __m256d q_tmp_4_3 = tmp_qloop_46; + const __m256d q_tmp_4_4 = _mm256_mul_pd(tmp_qloop_16,_mm256_mul_pd(tmp_qloop_31,tmp_qloop_31)); + const __m256d q_tmp_4_5 = tmp_qloop_48; + const __m256d q_tmp_5_0 = tmp_qloop_35; + const __m256d q_tmp_5_1 = tmp_qloop_40; + const __m256d q_tmp_5_2 = tmp_qloop_44; + const __m256d q_tmp_5_3 = tmp_qloop_47; + const __m256d q_tmp_5_4 = tmp_qloop_48; + const __m256d q_tmp_5_5 = _mm256_mul_pd(tmp_qloop_16,_mm256_mul_pd(tmp_qloop_34,tmp_qloop_34)); + q_acc_0_0 = _mm256_add_pd(q_acc_0_0,q_tmp_0_0); + q_acc_0_1 = _mm256_add_pd(q_acc_0_1,q_tmp_0_1); + q_acc_0_2 = _mm256_add_pd(q_acc_0_2,q_tmp_0_2); + q_acc_0_3 = _mm256_add_pd(q_acc_0_3,q_tmp_0_3); + q_acc_0_4 = _mm256_add_pd(q_acc_0_4,q_tmp_0_4); + q_acc_0_5 = _mm256_add_pd(q_acc_0_5,q_tmp_0_5); + q_acc_1_0 = _mm256_add_pd(q_acc_1_0,q_tmp_1_0); + q_acc_1_1 = _mm256_add_pd(q_acc_1_1,q_tmp_1_1); + q_acc_1_2 = _mm256_add_pd(q_acc_1_2,q_tmp_1_2); + q_acc_1_3 = _mm256_add_pd(q_acc_1_3,q_tmp_1_3); + q_acc_1_4 = _mm256_add_pd(q_acc_1_4,q_tmp_1_4); + q_acc_1_5 = _mm256_add_pd(q_acc_1_5,q_tmp_1_5); + q_acc_2_0 = _mm256_add_pd(q_acc_2_0,q_tmp_2_0); + q_acc_2_1 = _mm256_add_pd(q_acc_2_1,q_tmp_2_1); + q_acc_2_2 = _mm256_add_pd(q_acc_2_2,q_tmp_2_2); + q_acc_2_3 = _mm256_add_pd(q_acc_2_3,q_tmp_2_3); + q_acc_2_4 = _mm256_add_pd(q_acc_2_4,q_tmp_2_4); + q_acc_2_5 = _mm256_add_pd(q_acc_2_5,q_tmp_2_5); + q_acc_3_0 = _mm256_add_pd(q_acc_3_0,q_tmp_3_0); + q_acc_3_1 = _mm256_add_pd(q_acc_3_1,q_tmp_3_1); + q_acc_3_2 = _mm256_add_pd(q_acc_3_2,q_tmp_3_2); + q_acc_3_3 = _mm256_add_pd(q_acc_3_3,q_tmp_3_3); + q_acc_3_4 = _mm256_add_pd(q_acc_3_4,q_tmp_3_4); + q_acc_3_5 = _mm256_add_pd(q_acc_3_5,q_tmp_3_5); + q_acc_4_0 = _mm256_add_pd(q_acc_4_0,q_tmp_4_0); + q_acc_4_1 = _mm256_add_pd(q_acc_4_1,q_tmp_4_1); + q_acc_4_2 = _mm256_add_pd(q_acc_4_2,q_tmp_4_2); + q_acc_4_3 = _mm256_add_pd(q_acc_4_3,q_tmp_4_3); + q_acc_4_4 = _mm256_add_pd(q_acc_4_4,q_tmp_4_4); + q_acc_4_5 = _mm256_add_pd(q_acc_4_5,q_tmp_4_5); + q_acc_5_0 = _mm256_add_pd(q_acc_5_0,q_tmp_5_0); + q_acc_5_1 = _mm256_add_pd(q_acc_5_1,q_tmp_5_1); + q_acc_5_2 = _mm256_add_pd(q_acc_5_2,q_tmp_5_2); + q_acc_5_3 = _mm256_add_pd(q_acc_5_3,q_tmp_5_3); + q_acc_5_4 = _mm256_add_pd(q_acc_5_4,q_tmp_5_4); + q_acc_5_5 = _mm256_add_pd(q_acc_5_5,q_tmp_5_5); + } + const __m256d elMatVec_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_0_0,src_dof_0),_mm256_mul_pd(q_acc_0_1,src_dof_1)),_mm256_mul_pd(q_acc_0_2,src_dof_2)),_mm256_mul_pd(q_acc_0_3,src_dof_3)),_mm256_mul_pd(q_acc_0_4,src_dof_4)),_mm256_mul_pd(q_acc_0_5,src_dof_5)); + const __m256d elMatVec_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_1_0,src_dof_0),_mm256_mul_pd(q_acc_1_1,src_dof_1)),_mm256_mul_pd(q_acc_1_2,src_dof_2)),_mm256_mul_pd(q_acc_1_3,src_dof_3)),_mm256_mul_pd(q_acc_1_4,src_dof_4)),_mm256_mul_pd(q_acc_1_5,src_dof_5)); + const __m256d elMatVec_2 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_2_0,src_dof_0),_mm256_mul_pd(q_acc_2_1,src_dof_1)),_mm256_mul_pd(q_acc_2_2,src_dof_2)),_mm256_mul_pd(q_acc_2_3,src_dof_3)),_mm256_mul_pd(q_acc_2_4,src_dof_4)),_mm256_mul_pd(q_acc_2_5,src_dof_5)); + const __m256d elMatVec_3 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_3_0,src_dof_0),_mm256_mul_pd(q_acc_3_1,src_dof_1)),_mm256_mul_pd(q_acc_3_2,src_dof_2)),_mm256_mul_pd(q_acc_3_3,src_dof_3)),_mm256_mul_pd(q_acc_3_4,src_dof_4)),_mm256_mul_pd(q_acc_3_5,src_dof_5)); + const __m256d elMatVec_4 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_4_0,src_dof_0),_mm256_mul_pd(q_acc_4_1,src_dof_1)),_mm256_mul_pd(q_acc_4_2,src_dof_2)),_mm256_mul_pd(q_acc_4_3,src_dof_3)),_mm256_mul_pd(q_acc_4_4,src_dof_4)),_mm256_mul_pd(q_acc_4_5,src_dof_5)); + const __m256d elMatVec_5 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_5_0,src_dof_0),_mm256_mul_pd(q_acc_5_1,src_dof_1)),_mm256_mul_pd(q_acc_5_2,src_dof_2)),_mm256_mul_pd(q_acc_5_3,src_dof_3)),_mm256_mul_pd(q_acc_5_4,src_dof_4)),_mm256_mul_pd(q_acc_5_5,src_dof_5)); + _mm256_storeu_pd(&_data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))],_mm256_add_pd(elMatVec_0,_mm256_loadu_pd(& _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]))); + _mm256_storeu_pd(&_data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1],_mm256_add_pd(elMatVec_1,_mm256_loadu_pd(& _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]))); + _mm256_storeu_pd(&_data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))],_mm256_add_pd(elMatVec_2,_mm256_loadu_pd(& _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]))); + _mm256_storeu_pd(&_data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))],_mm256_add_pd(elMatVec_3,_mm256_loadu_pd(& _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]))); + _mm256_storeu_pd(&_data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))],_mm256_add_pd(elMatVec_4,_mm256_loadu_pd(& _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]))); + _mm256_storeu_pd(&_data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))],_mm256_add_pd(elMatVec_5,_mm256_loadu_pd(& _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]))); + } + for (int64_t ctr_0 = (int64_t)((-ctr_1 + micro_edges_per_macro_edge) / (4)) * (4); ctr_0 < -ctr_1 + micro_edges_per_macro_edge; ctr_0 += 1) + { + + const int64_t phantom_ctr_0 = ctr_0; + real_t _data_float_loop_ctr_array_dim_0[4]; + _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0; + _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1; + _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2; + _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3; + real_t _data_float_loop_ctr_array_dim_1[4]; + _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1; + + const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; + const real_t src_dof_0 = _data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]; + const real_t src_dof_1 = _data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + const real_t src_dof_2 = _data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t src_dof_3 = _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t src_dof_4 = _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t src_dof_5 = _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]; + const real_t cp_times_delta_dof_0 = _data_cp_times_deltaVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]; + const real_t cp_times_delta_dof_1 = _data_cp_times_deltaVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + const real_t cp_times_delta_dof_2 = _data_cp_times_deltaVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t cp_times_delta_dof_3 = _data_cp_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t cp_times_delta_dof_4 = _data_cp_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t cp_times_delta_dof_5 = _data_cp_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]; + const real_t ux_dof_0 = _data_uxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]; + const real_t ux_dof_1 = _data_uxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + const real_t ux_dof_2 = _data_uxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t ux_dof_3 = _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t ux_dof_4 = _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t ux_dof_5 = _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]; + const real_t uy_dof_0 = _data_uyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]; + const real_t uy_dof_1 = _data_uyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + const real_t uy_dof_2 = _data_uyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t uy_dof_3 = _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t uy_dof_4 = _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t uy_dof_5 = _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]; + real_t q_acc_0_0 = 0.0; + real_t q_acc_0_1 = 0.0; + real_t q_acc_0_2 = 0.0; + real_t q_acc_0_3 = 0.0; + real_t q_acc_0_4 = 0.0; + real_t q_acc_0_5 = 0.0; + real_t q_acc_1_0 = 0.0; + real_t q_acc_1_1 = 0.0; + real_t q_acc_1_2 = 0.0; + real_t q_acc_1_3 = 0.0; + real_t q_acc_1_4 = 0.0; + real_t q_acc_1_5 = 0.0; + real_t q_acc_2_0 = 0.0; + real_t q_acc_2_1 = 0.0; + real_t q_acc_2_2 = 0.0; + real_t q_acc_2_3 = 0.0; + real_t q_acc_2_4 = 0.0; + real_t q_acc_2_5 = 0.0; + real_t q_acc_3_0 = 0.0; + real_t q_acc_3_1 = 0.0; + real_t q_acc_3_2 = 0.0; + real_t q_acc_3_3 = 0.0; + real_t q_acc_3_4 = 0.0; + real_t q_acc_3_5 = 0.0; + real_t q_acc_4_0 = 0.0; + real_t q_acc_4_1 = 0.0; + real_t q_acc_4_2 = 0.0; + real_t q_acc_4_3 = 0.0; + real_t q_acc_4_4 = 0.0; + real_t q_acc_4_5 = 0.0; + real_t q_acc_5_0 = 0.0; + real_t q_acc_5_1 = 0.0; + real_t q_acc_5_2 = 0.0; + real_t q_acc_5_3 = 0.0; + real_t q_acc_5_4 = 0.0; + real_t q_acc_5_5 = 0.0; + for (int64_t q = 0; q < 4; q += 1) + { + const real_t tmp_qloop_0 = 4.0*_data_q_p_0[q]; + const real_t tmp_qloop_1 = 4.0*_data_q_p_1[q]; + const real_t tmp_qloop_2 = tmp_qloop_0 + tmp_qloop_1 - 3.0; + const real_t tmp_qloop_3 = tmp_qloop_0*_data_q_p_1[q]; + const real_t tmp_qloop_4 = (_data_q_p_0[q]*_data_q_p_0[q]); + const real_t tmp_qloop_5 = tmp_qloop_4*2.0; + const real_t tmp_qloop_6 = tmp_qloop_5 - _data_q_p_0[q]; + const real_t tmp_qloop_7 = (_data_q_p_1[q]*_data_q_p_1[q]); + const real_t tmp_qloop_8 = tmp_qloop_7*2.0; + const real_t tmp_qloop_9 = tmp_qloop_8 - _data_q_p_1[q]; + const real_t tmp_qloop_10 = tmp_qloop_1 - tmp_qloop_3 + tmp_qloop_7*-4.0; + const real_t tmp_qloop_11 = tmp_qloop_0 - tmp_qloop_3 + tmp_qloop_4*-4.0; + const real_t tmp_qloop_12 = tmp_qloop_3 + tmp_qloop_5 + tmp_qloop_8 - 3.0*_data_q_p_0[q] - 3.0*_data_q_p_1[q] + 1.0; + const real_t tmp_qloop_13 = tmp_qloop_10*ux_dof_4 + tmp_qloop_11*ux_dof_5 + tmp_qloop_12*ux_dof_0 + tmp_qloop_3*ux_dof_3 + tmp_qloop_6*ux_dof_1 + tmp_qloop_9*ux_dof_2; + const real_t tmp_qloop_14 = tmp_qloop_10*uy_dof_4 + tmp_qloop_11*uy_dof_5 + tmp_qloop_12*uy_dof_0 + tmp_qloop_3*uy_dof_3 + tmp_qloop_6*uy_dof_1 + tmp_qloop_9*uy_dof_2; + const real_t tmp_qloop_15 = tmp_qloop_13*(jac_affine_inv_0_0_GRAY*tmp_qloop_2 + jac_affine_inv_1_0_GRAY*tmp_qloop_2) + tmp_qloop_14*(jac_affine_inv_0_1_GRAY*tmp_qloop_2 + jac_affine_inv_1_1_GRAY*tmp_qloop_2); + const real_t tmp_qloop_16 = abs_det_jac_affine_GRAY*(cp_times_delta_dof_0*tmp_qloop_12 + cp_times_delta_dof_1*tmp_qloop_6 + cp_times_delta_dof_2*tmp_qloop_9 + cp_times_delta_dof_3*tmp_qloop_3 + cp_times_delta_dof_4*tmp_qloop_10 + cp_times_delta_dof_5*tmp_qloop_11)*_data_q_w[q]; + const real_t tmp_qloop_17 = tmp_qloop_0 - 1.0; + const real_t tmp_qloop_18 = jac_affine_inv_0_0_GRAY*tmp_qloop_13*tmp_qloop_17 + jac_affine_inv_0_1_GRAY*tmp_qloop_14*tmp_qloop_17; + const real_t tmp_qloop_19 = tmp_qloop_15*tmp_qloop_16; + const real_t tmp_qloop_20 = tmp_qloop_18*tmp_qloop_19; + const real_t tmp_qloop_21 = tmp_qloop_1 - 1.0; + const real_t tmp_qloop_22 = jac_affine_inv_1_0_GRAY*tmp_qloop_13*tmp_qloop_21 + jac_affine_inv_1_1_GRAY*tmp_qloop_14*tmp_qloop_21; + const real_t tmp_qloop_23 = tmp_qloop_19*tmp_qloop_22; + const real_t tmp_qloop_24 = jac_affine_inv_1_0_GRAY*tmp_qloop_0; + const real_t tmp_qloop_25 = jac_affine_inv_0_0_GRAY*tmp_qloop_1; + const real_t tmp_qloop_26 = jac_affine_inv_1_1_GRAY*tmp_qloop_0; + const real_t tmp_qloop_27 = jac_affine_inv_0_1_GRAY*tmp_qloop_1; + const real_t tmp_qloop_28 = tmp_qloop_13*(tmp_qloop_24 + tmp_qloop_25) + tmp_qloop_14*(tmp_qloop_26 + tmp_qloop_27); + const real_t tmp_qloop_29 = tmp_qloop_19*tmp_qloop_28; + const real_t tmp_qloop_30 = -tmp_qloop_0 - 8.0*_data_q_p_1[q] + 4.0; + const real_t tmp_qloop_31 = tmp_qloop_13*(jac_affine_inv_1_0_GRAY*tmp_qloop_30 - tmp_qloop_25) + tmp_qloop_14*(jac_affine_inv_1_1_GRAY*tmp_qloop_30 - tmp_qloop_27); + const real_t tmp_qloop_32 = tmp_qloop_19*tmp_qloop_31; + const real_t tmp_qloop_33 = -tmp_qloop_1 - 8.0*_data_q_p_0[q] + 4.0; + const real_t tmp_qloop_34 = tmp_qloop_13*(jac_affine_inv_0_0_GRAY*tmp_qloop_33 - tmp_qloop_24) + tmp_qloop_14*(jac_affine_inv_0_1_GRAY*tmp_qloop_33 - tmp_qloop_26); + const real_t tmp_qloop_35 = tmp_qloop_19*tmp_qloop_34; + const real_t tmp_qloop_36 = tmp_qloop_16*tmp_qloop_18; + const real_t tmp_qloop_37 = tmp_qloop_22*tmp_qloop_36; + const real_t tmp_qloop_38 = tmp_qloop_28*tmp_qloop_36; + const real_t tmp_qloop_39 = tmp_qloop_31*tmp_qloop_36; + const real_t tmp_qloop_40 = tmp_qloop_34*tmp_qloop_36; + const real_t tmp_qloop_41 = tmp_qloop_16*tmp_qloop_22; + const real_t tmp_qloop_42 = tmp_qloop_28*tmp_qloop_41; + const real_t tmp_qloop_43 = tmp_qloop_31*tmp_qloop_41; + const real_t tmp_qloop_44 = tmp_qloop_34*tmp_qloop_41; + const real_t tmp_qloop_45 = tmp_qloop_16*tmp_qloop_28; + const real_t tmp_qloop_46 = tmp_qloop_31*tmp_qloop_45; + const real_t tmp_qloop_47 = tmp_qloop_34*tmp_qloop_45; + const real_t tmp_qloop_48 = tmp_qloop_16*tmp_qloop_31*tmp_qloop_34; + const real_t q_tmp_0_0 = (tmp_qloop_15*tmp_qloop_15)*tmp_qloop_16; + const real_t q_tmp_0_1 = tmp_qloop_20; + const real_t q_tmp_0_2 = tmp_qloop_23; + const real_t q_tmp_0_3 = tmp_qloop_29; + const real_t q_tmp_0_4 = tmp_qloop_32; + const real_t q_tmp_0_5 = tmp_qloop_35; + const real_t q_tmp_1_0 = tmp_qloop_20; + const real_t q_tmp_1_1 = tmp_qloop_16*(tmp_qloop_18*tmp_qloop_18); + const real_t q_tmp_1_2 = tmp_qloop_37; + const real_t q_tmp_1_3 = tmp_qloop_38; + const real_t q_tmp_1_4 = tmp_qloop_39; + const real_t q_tmp_1_5 = tmp_qloop_40; + const real_t q_tmp_2_0 = tmp_qloop_23; + const real_t q_tmp_2_1 = tmp_qloop_37; + const real_t q_tmp_2_2 = tmp_qloop_16*(tmp_qloop_22*tmp_qloop_22); + const real_t q_tmp_2_3 = tmp_qloop_42; + const real_t q_tmp_2_4 = tmp_qloop_43; + const real_t q_tmp_2_5 = tmp_qloop_44; + const real_t q_tmp_3_0 = tmp_qloop_29; + const real_t q_tmp_3_1 = tmp_qloop_38; + const real_t q_tmp_3_2 = tmp_qloop_42; + const real_t q_tmp_3_3 = tmp_qloop_16*(tmp_qloop_28*tmp_qloop_28); + const real_t q_tmp_3_4 = tmp_qloop_46; + const real_t q_tmp_3_5 = tmp_qloop_47; + const real_t q_tmp_4_0 = tmp_qloop_32; + const real_t q_tmp_4_1 = tmp_qloop_39; + const real_t q_tmp_4_2 = tmp_qloop_43; + const real_t q_tmp_4_3 = tmp_qloop_46; + const real_t q_tmp_4_4 = tmp_qloop_16*(tmp_qloop_31*tmp_qloop_31); + const real_t q_tmp_4_5 = tmp_qloop_48; + const real_t q_tmp_5_0 = tmp_qloop_35; + const real_t q_tmp_5_1 = tmp_qloop_40; + const real_t q_tmp_5_2 = tmp_qloop_44; + const real_t q_tmp_5_3 = tmp_qloop_47; + const real_t q_tmp_5_4 = tmp_qloop_48; + const real_t q_tmp_5_5 = tmp_qloop_16*(tmp_qloop_34*tmp_qloop_34); + q_acc_0_0 = q_acc_0_0 + q_tmp_0_0; + q_acc_0_1 = q_acc_0_1 + q_tmp_0_1; + q_acc_0_2 = q_acc_0_2 + q_tmp_0_2; + q_acc_0_3 = q_acc_0_3 + q_tmp_0_3; + q_acc_0_4 = q_acc_0_4 + q_tmp_0_4; + q_acc_0_5 = q_acc_0_5 + q_tmp_0_5; + q_acc_1_0 = q_acc_1_0 + q_tmp_1_0; + q_acc_1_1 = q_acc_1_1 + q_tmp_1_1; + q_acc_1_2 = q_acc_1_2 + q_tmp_1_2; + q_acc_1_3 = q_acc_1_3 + q_tmp_1_3; + q_acc_1_4 = q_acc_1_4 + q_tmp_1_4; + q_acc_1_5 = q_acc_1_5 + q_tmp_1_5; + q_acc_2_0 = q_acc_2_0 + q_tmp_2_0; + q_acc_2_1 = q_acc_2_1 + q_tmp_2_1; + q_acc_2_2 = q_acc_2_2 + q_tmp_2_2; + q_acc_2_3 = q_acc_2_3 + q_tmp_2_3; + q_acc_2_4 = q_acc_2_4 + q_tmp_2_4; + q_acc_2_5 = q_acc_2_5 + q_tmp_2_5; + q_acc_3_0 = q_acc_3_0 + q_tmp_3_0; + q_acc_3_1 = q_acc_3_1 + q_tmp_3_1; + q_acc_3_2 = q_acc_3_2 + q_tmp_3_2; + q_acc_3_3 = q_acc_3_3 + q_tmp_3_3; + q_acc_3_4 = q_acc_3_4 + q_tmp_3_4; + q_acc_3_5 = q_acc_3_5 + q_tmp_3_5; + q_acc_4_0 = q_acc_4_0 + q_tmp_4_0; + q_acc_4_1 = q_acc_4_1 + q_tmp_4_1; + q_acc_4_2 = q_acc_4_2 + q_tmp_4_2; + q_acc_4_3 = q_acc_4_3 + q_tmp_4_3; + q_acc_4_4 = q_acc_4_4 + q_tmp_4_4; + q_acc_4_5 = q_acc_4_5 + q_tmp_4_5; + q_acc_5_0 = q_acc_5_0 + q_tmp_5_0; + q_acc_5_1 = q_acc_5_1 + q_tmp_5_1; + q_acc_5_2 = q_acc_5_2 + q_tmp_5_2; + q_acc_5_3 = q_acc_5_3 + q_tmp_5_3; + q_acc_5_4 = q_acc_5_4 + q_tmp_5_4; + q_acc_5_5 = q_acc_5_5 + q_tmp_5_5; + } + const real_t elMatVec_0 = q_acc_0_0*src_dof_0 + q_acc_0_1*src_dof_1 + q_acc_0_2*src_dof_2 + q_acc_0_3*src_dof_3 + q_acc_0_4*src_dof_4 + q_acc_0_5*src_dof_5; + const real_t elMatVec_1 = q_acc_1_0*src_dof_0 + q_acc_1_1*src_dof_1 + q_acc_1_2*src_dof_2 + q_acc_1_3*src_dof_3 + q_acc_1_4*src_dof_4 + q_acc_1_5*src_dof_5; + const real_t elMatVec_2 = q_acc_2_0*src_dof_0 + q_acc_2_1*src_dof_1 + q_acc_2_2*src_dof_2 + q_acc_2_3*src_dof_3 + q_acc_2_4*src_dof_4 + q_acc_2_5*src_dof_5; + const real_t elMatVec_3 = q_acc_3_0*src_dof_0 + q_acc_3_1*src_dof_1 + q_acc_3_2*src_dof_2 + q_acc_3_3*src_dof_3 + q_acc_3_4*src_dof_4 + q_acc_3_5*src_dof_5; + const real_t elMatVec_4 = q_acc_4_0*src_dof_0 + q_acc_4_1*src_dof_1 + q_acc_4_2*src_dof_2 + q_acc_4_3*src_dof_3 + q_acc_4_4*src_dof_4 + q_acc_4_5*src_dof_5; + const real_t elMatVec_5 = q_acc_5_0*src_dof_0 + q_acc_5_1*src_dof_1 + q_acc_5_2*src_dof_2 + q_acc_5_3*src_dof_3 + q_acc_5_4*src_dof_4 + q_acc_5_5*src_dof_5; + _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))] = elMatVec_0 + _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]; + _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1] = elMatVec_1 + _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatVec_2 + _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))] = elMatVec_3 + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))] = elMatVec_4 + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))] = elMatVec_5 + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]; + } + } + } + const real_t tmp_coords_jac_0_BLUE = 1.0 / (micro_edges_per_macro_edge_float)*1.0; + const real_t tmp_coords_jac_1_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0); + const real_t tmp_coords_jac_2_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1); + const real_t tmp_coords_jac_3_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0); + const real_t tmp_coords_jac_4_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1); + const real_t p_affine_const_0_0_BLUE = tmp_coords_jac_1_BLUE; + const real_t p_affine_const_0_1_BLUE = tmp_coords_jac_2_BLUE; + const real_t p_affine_const_1_0_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_3_BLUE; + const real_t p_affine_const_1_1_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_4_BLUE; + const real_t p_affine_const_2_0_BLUE = tmp_coords_jac_1_BLUE + tmp_coords_jac_3_BLUE; + const real_t p_affine_const_2_1_BLUE = tmp_coords_jac_2_BLUE + tmp_coords_jac_4_BLUE; + const real_t jac_affine_0_0_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_1_0_BLUE; + const real_t jac_affine_0_1_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_2_0_BLUE; + const real_t jac_affine_1_0_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_1_1_BLUE; + const real_t jac_affine_1_1_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_2_1_BLUE; + const real_t tmp_coords_jac_5_BLUE = jac_affine_0_0_BLUE*jac_affine_1_1_BLUE - jac_affine_0_1_BLUE*jac_affine_1_0_BLUE; + const real_t tmp_coords_jac_6_BLUE = 1.0 / (tmp_coords_jac_5_BLUE); + const real_t jac_affine_inv_0_0_BLUE = jac_affine_1_1_BLUE*tmp_coords_jac_6_BLUE; + const real_t jac_affine_inv_0_1_BLUE = -jac_affine_0_1_BLUE*tmp_coords_jac_6_BLUE; + const real_t jac_affine_inv_1_0_BLUE = -jac_affine_1_0_BLUE*tmp_coords_jac_6_BLUE; + const real_t jac_affine_inv_1_1_BLUE = jac_affine_0_0_BLUE*tmp_coords_jac_6_BLUE; + const real_t abs_det_jac_affine_BLUE = abs(tmp_coords_jac_5_BLUE); + { + /* FaceType.BLUE */ + for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1) + { + for (int64_t ctr_0 = 0; ctr_0 < (int64_t)((-ctr_1 + micro_edges_per_macro_edge - 1) / (4)) * (4); ctr_0 += 4) + { + + const int64_t phantom_ctr_0 = ctr_0; + real_t _data_float_loop_ctr_array_dim_0[4]; + _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0; + _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1; + _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2; + _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3; + real_t _data_float_loop_ctr_array_dim_1[4]; + _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1; + + const __m256d p_affine_0_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)); + const __m256d p_affine_0_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)); + const __m256d p_affine_1_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)); + const __m256d p_affine_1_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)); + const __m256d p_affine_2_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)); + const __m256d p_affine_2_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)); + const __m256d src_dof_0 = _mm256_loadu_pd(& _data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]); + const __m256d src_dof_1 = _mm256_loadu_pd(& _data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]); + const __m256d src_dof_2 = _mm256_loadu_pd(& _data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]); + const __m256d src_dof_3 = _mm256_loadu_pd(& _data_srcEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]); + const __m256d src_dof_4 = _mm256_loadu_pd(& _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]); + const __m256d src_dof_5 = _mm256_loadu_pd(& _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]); + const __m256d cp_times_delta_dof_0 = _mm256_loadu_pd(& _data_cp_times_deltaVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]); + const __m256d cp_times_delta_dof_1 = _mm256_loadu_pd(& _data_cp_times_deltaVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]); + const __m256d cp_times_delta_dof_2 = _mm256_loadu_pd(& _data_cp_times_deltaVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]); + const __m256d cp_times_delta_dof_3 = _mm256_loadu_pd(& _data_cp_times_deltaEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]); + const __m256d cp_times_delta_dof_4 = _mm256_loadu_pd(& _data_cp_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]); + const __m256d cp_times_delta_dof_5 = _mm256_loadu_pd(& _data_cp_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]); + const __m256d ux_dof_0 = _mm256_loadu_pd(& _data_uxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]); + const __m256d ux_dof_1 = _mm256_loadu_pd(& _data_uxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]); + const __m256d ux_dof_2 = _mm256_loadu_pd(& _data_uxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]); + const __m256d ux_dof_3 = _mm256_loadu_pd(& _data_uxEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]); + const __m256d ux_dof_4 = _mm256_loadu_pd(& _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]); + const __m256d ux_dof_5 = _mm256_loadu_pd(& _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]); + const __m256d uy_dof_0 = _mm256_loadu_pd(& _data_uyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]); + const __m256d uy_dof_1 = _mm256_loadu_pd(& _data_uyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]); + const __m256d uy_dof_2 = _mm256_loadu_pd(& _data_uyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]); + const __m256d uy_dof_3 = _mm256_loadu_pd(& _data_uyEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]); + const __m256d uy_dof_4 = _mm256_loadu_pd(& _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]); + const __m256d uy_dof_5 = _mm256_loadu_pd(& _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]); + __m256d q_acc_0_0 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_1 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_2 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_3 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_4 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_5 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_0 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_1 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_2 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_3 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_4 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_5 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_0 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_1 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_2 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_3 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_4 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_5 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_0 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_1 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_2 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_3 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_4 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_5 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_4_0 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_4_1 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_4_2 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_4_3 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_4_4 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_4_5 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_5_0 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_5_1 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_5_2 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_5_3 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_5_4 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_5_5 = _mm256_set_pd(0.0,0.0,0.0,0.0); + for (int64_t q = 0; q < 4; q += 1) + { + const __m256d tmp_qloop_0 = _mm256_mul_pd(_mm256_set_pd(4.0,4.0,4.0,4.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])); + const __m256d tmp_qloop_1 = _mm256_mul_pd(_mm256_set_pd(4.0,4.0,4.0,4.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])); + const __m256d tmp_qloop_2 = _mm256_add_pd(_mm256_add_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),tmp_qloop_0),tmp_qloop_1); + const __m256d tmp_qloop_3 = _mm256_mul_pd(tmp_qloop_0,_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])); + const __m256d tmp_qloop_4 = _mm256_mul_pd(_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])); + const __m256d tmp_qloop_5 = _mm256_mul_pd(tmp_qloop_4,_mm256_set_pd(2.0,2.0,2.0,2.0)); + const __m256d tmp_qloop_6 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),tmp_qloop_5); + const __m256d tmp_qloop_7 = _mm256_mul_pd(_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])); + const __m256d tmp_qloop_8 = _mm256_mul_pd(tmp_qloop_7,_mm256_set_pd(2.0,2.0,2.0,2.0)); + const __m256d tmp_qloop_9 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])),tmp_qloop_8); + const __m256d tmp_qloop_10 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_7,_mm256_set_pd(-4.0,-4.0,-4.0,-4.0)),_mm256_mul_pd(tmp_qloop_3,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_1); + const __m256d tmp_qloop_11 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_4,_mm256_set_pd(-4.0,-4.0,-4.0,-4.0)),_mm256_mul_pd(tmp_qloop_3,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_0); + const __m256d tmp_qloop_12 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),tmp_qloop_3),tmp_qloop_5),tmp_qloop_8); + const __m256d tmp_qloop_13 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_10,ux_dof_4),_mm256_mul_pd(tmp_qloop_11,ux_dof_5)),_mm256_mul_pd(tmp_qloop_12,ux_dof_0)),_mm256_mul_pd(tmp_qloop_3,ux_dof_3)),_mm256_mul_pd(tmp_qloop_6,ux_dof_1)),_mm256_mul_pd(tmp_qloop_9,ux_dof_2)); + const __m256d tmp_qloop_14 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_10,uy_dof_4),_mm256_mul_pd(tmp_qloop_11,uy_dof_5)),_mm256_mul_pd(tmp_qloop_12,uy_dof_0)),_mm256_mul_pd(tmp_qloop_3,uy_dof_3)),_mm256_mul_pd(tmp_qloop_6,uy_dof_1)),_mm256_mul_pd(tmp_qloop_9,uy_dof_2)); + const __m256d tmp_qloop_15 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_13,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_2,_mm256_set_pd(jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE)),_mm256_mul_pd(tmp_qloop_2,_mm256_set_pd(jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE)))),_mm256_mul_pd(tmp_qloop_14,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_2,_mm256_set_pd(jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE)),_mm256_mul_pd(tmp_qloop_2,_mm256_set_pd(jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE))))); + const __m256d tmp_qloop_16 = _mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(cp_times_delta_dof_0,tmp_qloop_12),_mm256_mul_pd(cp_times_delta_dof_1,tmp_qloop_6)),_mm256_mul_pd(cp_times_delta_dof_2,tmp_qloop_9)),_mm256_mul_pd(cp_times_delta_dof_3,tmp_qloop_3)),_mm256_mul_pd(cp_times_delta_dof_4,tmp_qloop_10)),_mm256_mul_pd(cp_times_delta_dof_5,tmp_qloop_11)),_mm256_set_pd(_data_q_w[q],_data_q_w[q],_data_q_w[q],_data_q_w[q])),_mm256_set_pd(abs_det_jac_affine_BLUE,abs_det_jac_affine_BLUE,abs_det_jac_affine_BLUE,abs_det_jac_affine_BLUE)); + const __m256d tmp_qloop_17 = _mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),tmp_qloop_0); + const __m256d tmp_qloop_18 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_13,tmp_qloop_17),_mm256_set_pd(jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_14,tmp_qloop_17),_mm256_set_pd(jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE))); + const __m256d tmp_qloop_19 = _mm256_mul_pd(tmp_qloop_15,tmp_qloop_16); + const __m256d tmp_qloop_20 = _mm256_mul_pd(tmp_qloop_18,tmp_qloop_19); + const __m256d tmp_qloop_21 = _mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),tmp_qloop_1); + const __m256d tmp_qloop_22 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_13,tmp_qloop_21),_mm256_set_pd(jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_14,tmp_qloop_21),_mm256_set_pd(jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE))); + const __m256d tmp_qloop_23 = _mm256_mul_pd(tmp_qloop_19,tmp_qloop_22); + const __m256d tmp_qloop_24 = _mm256_mul_pd(tmp_qloop_0,_mm256_set_pd(jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE)); + const __m256d tmp_qloop_25 = _mm256_mul_pd(tmp_qloop_1,_mm256_set_pd(jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE)); + const __m256d tmp_qloop_26 = _mm256_mul_pd(tmp_qloop_0,_mm256_set_pd(jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE)); + const __m256d tmp_qloop_27 = _mm256_mul_pd(tmp_qloop_1,_mm256_set_pd(jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE)); + const __m256d tmp_qloop_28 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_13,_mm256_add_pd(tmp_qloop_24,tmp_qloop_25)),_mm256_mul_pd(tmp_qloop_14,_mm256_add_pd(tmp_qloop_26,tmp_qloop_27))); + const __m256d tmp_qloop_29 = _mm256_mul_pd(tmp_qloop_19,tmp_qloop_28); + const __m256d tmp_qloop_30 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-8.0,-8.0,-8.0,-8.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])),_mm256_mul_pd(tmp_qloop_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_set_pd(4.0,4.0,4.0,4.0)); + const __m256d tmp_qloop_31 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_13,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_25,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_30,_mm256_set_pd(jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE)))),_mm256_mul_pd(tmp_qloop_14,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_27,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_30,_mm256_set_pd(jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE))))); + const __m256d tmp_qloop_32 = _mm256_mul_pd(tmp_qloop_19,tmp_qloop_31); + const __m256d tmp_qloop_33 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-8.0,-8.0,-8.0,-8.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(tmp_qloop_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_set_pd(4.0,4.0,4.0,4.0)); + const __m256d tmp_qloop_34 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_13,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_24,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_33,_mm256_set_pd(jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE)))),_mm256_mul_pd(tmp_qloop_14,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_26,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_33,_mm256_set_pd(jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE))))); + const __m256d tmp_qloop_35 = _mm256_mul_pd(tmp_qloop_19,tmp_qloop_34); + const __m256d tmp_qloop_36 = _mm256_mul_pd(tmp_qloop_16,tmp_qloop_18); + const __m256d tmp_qloop_37 = _mm256_mul_pd(tmp_qloop_22,tmp_qloop_36); + const __m256d tmp_qloop_38 = _mm256_mul_pd(tmp_qloop_28,tmp_qloop_36); + const __m256d tmp_qloop_39 = _mm256_mul_pd(tmp_qloop_31,tmp_qloop_36); + const __m256d tmp_qloop_40 = _mm256_mul_pd(tmp_qloop_34,tmp_qloop_36); + const __m256d tmp_qloop_41 = _mm256_mul_pd(tmp_qloop_16,tmp_qloop_22); + const __m256d tmp_qloop_42 = _mm256_mul_pd(tmp_qloop_28,tmp_qloop_41); + const __m256d tmp_qloop_43 = _mm256_mul_pd(tmp_qloop_31,tmp_qloop_41); + const __m256d tmp_qloop_44 = _mm256_mul_pd(tmp_qloop_34,tmp_qloop_41); + const __m256d tmp_qloop_45 = _mm256_mul_pd(tmp_qloop_16,tmp_qloop_28); + const __m256d tmp_qloop_46 = _mm256_mul_pd(tmp_qloop_31,tmp_qloop_45); + const __m256d tmp_qloop_47 = _mm256_mul_pd(tmp_qloop_34,tmp_qloop_45); + const __m256d tmp_qloop_48 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_16,tmp_qloop_31),tmp_qloop_34); + const __m256d q_tmp_0_0 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_15,tmp_qloop_15),tmp_qloop_16); + const __m256d q_tmp_0_1 = tmp_qloop_20; + const __m256d q_tmp_0_2 = tmp_qloop_23; + const __m256d q_tmp_0_3 = tmp_qloop_29; + const __m256d q_tmp_0_4 = tmp_qloop_32; + const __m256d q_tmp_0_5 = tmp_qloop_35; + const __m256d q_tmp_1_0 = tmp_qloop_20; + const __m256d q_tmp_1_1 = _mm256_mul_pd(tmp_qloop_16,_mm256_mul_pd(tmp_qloop_18,tmp_qloop_18)); + const __m256d q_tmp_1_2 = tmp_qloop_37; + const __m256d q_tmp_1_3 = tmp_qloop_38; + const __m256d q_tmp_1_4 = tmp_qloop_39; + const __m256d q_tmp_1_5 = tmp_qloop_40; + const __m256d q_tmp_2_0 = tmp_qloop_23; + const __m256d q_tmp_2_1 = tmp_qloop_37; + const __m256d q_tmp_2_2 = _mm256_mul_pd(tmp_qloop_16,_mm256_mul_pd(tmp_qloop_22,tmp_qloop_22)); + const __m256d q_tmp_2_3 = tmp_qloop_42; + const __m256d q_tmp_2_4 = tmp_qloop_43; + const __m256d q_tmp_2_5 = tmp_qloop_44; + const __m256d q_tmp_3_0 = tmp_qloop_29; + const __m256d q_tmp_3_1 = tmp_qloop_38; + const __m256d q_tmp_3_2 = tmp_qloop_42; + const __m256d q_tmp_3_3 = _mm256_mul_pd(tmp_qloop_16,_mm256_mul_pd(tmp_qloop_28,tmp_qloop_28)); + const __m256d q_tmp_3_4 = tmp_qloop_46; + const __m256d q_tmp_3_5 = tmp_qloop_47; + const __m256d q_tmp_4_0 = tmp_qloop_32; + const __m256d q_tmp_4_1 = tmp_qloop_39; + const __m256d q_tmp_4_2 = tmp_qloop_43; + const __m256d q_tmp_4_3 = tmp_qloop_46; + const __m256d q_tmp_4_4 = _mm256_mul_pd(tmp_qloop_16,_mm256_mul_pd(tmp_qloop_31,tmp_qloop_31)); + const __m256d q_tmp_4_5 = tmp_qloop_48; + const __m256d q_tmp_5_0 = tmp_qloop_35; + const __m256d q_tmp_5_1 = tmp_qloop_40; + const __m256d q_tmp_5_2 = tmp_qloop_44; + const __m256d q_tmp_5_3 = tmp_qloop_47; + const __m256d q_tmp_5_4 = tmp_qloop_48; + const __m256d q_tmp_5_5 = _mm256_mul_pd(tmp_qloop_16,_mm256_mul_pd(tmp_qloop_34,tmp_qloop_34)); + q_acc_0_0 = _mm256_add_pd(q_acc_0_0,q_tmp_0_0); + q_acc_0_1 = _mm256_add_pd(q_acc_0_1,q_tmp_0_1); + q_acc_0_2 = _mm256_add_pd(q_acc_0_2,q_tmp_0_2); + q_acc_0_3 = _mm256_add_pd(q_acc_0_3,q_tmp_0_3); + q_acc_0_4 = _mm256_add_pd(q_acc_0_4,q_tmp_0_4); + q_acc_0_5 = _mm256_add_pd(q_acc_0_5,q_tmp_0_5); + q_acc_1_0 = _mm256_add_pd(q_acc_1_0,q_tmp_1_0); + q_acc_1_1 = _mm256_add_pd(q_acc_1_1,q_tmp_1_1); + q_acc_1_2 = _mm256_add_pd(q_acc_1_2,q_tmp_1_2); + q_acc_1_3 = _mm256_add_pd(q_acc_1_3,q_tmp_1_3); + q_acc_1_4 = _mm256_add_pd(q_acc_1_4,q_tmp_1_4); + q_acc_1_5 = _mm256_add_pd(q_acc_1_5,q_tmp_1_5); + q_acc_2_0 = _mm256_add_pd(q_acc_2_0,q_tmp_2_0); + q_acc_2_1 = _mm256_add_pd(q_acc_2_1,q_tmp_2_1); + q_acc_2_2 = _mm256_add_pd(q_acc_2_2,q_tmp_2_2); + q_acc_2_3 = _mm256_add_pd(q_acc_2_3,q_tmp_2_3); + q_acc_2_4 = _mm256_add_pd(q_acc_2_4,q_tmp_2_4); + q_acc_2_5 = _mm256_add_pd(q_acc_2_5,q_tmp_2_5); + q_acc_3_0 = _mm256_add_pd(q_acc_3_0,q_tmp_3_0); + q_acc_3_1 = _mm256_add_pd(q_acc_3_1,q_tmp_3_1); + q_acc_3_2 = _mm256_add_pd(q_acc_3_2,q_tmp_3_2); + q_acc_3_3 = _mm256_add_pd(q_acc_3_3,q_tmp_3_3); + q_acc_3_4 = _mm256_add_pd(q_acc_3_4,q_tmp_3_4); + q_acc_3_5 = _mm256_add_pd(q_acc_3_5,q_tmp_3_5); + q_acc_4_0 = _mm256_add_pd(q_acc_4_0,q_tmp_4_0); + q_acc_4_1 = _mm256_add_pd(q_acc_4_1,q_tmp_4_1); + q_acc_4_2 = _mm256_add_pd(q_acc_4_2,q_tmp_4_2); + q_acc_4_3 = _mm256_add_pd(q_acc_4_3,q_tmp_4_3); + q_acc_4_4 = _mm256_add_pd(q_acc_4_4,q_tmp_4_4); + q_acc_4_5 = _mm256_add_pd(q_acc_4_5,q_tmp_4_5); + q_acc_5_0 = _mm256_add_pd(q_acc_5_0,q_tmp_5_0); + q_acc_5_1 = _mm256_add_pd(q_acc_5_1,q_tmp_5_1); + q_acc_5_2 = _mm256_add_pd(q_acc_5_2,q_tmp_5_2); + q_acc_5_3 = _mm256_add_pd(q_acc_5_3,q_tmp_5_3); + q_acc_5_4 = _mm256_add_pd(q_acc_5_4,q_tmp_5_4); + q_acc_5_5 = _mm256_add_pd(q_acc_5_5,q_tmp_5_5); + } + const __m256d elMatVec_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_0_0,src_dof_0),_mm256_mul_pd(q_acc_0_1,src_dof_1)),_mm256_mul_pd(q_acc_0_2,src_dof_2)),_mm256_mul_pd(q_acc_0_3,src_dof_3)),_mm256_mul_pd(q_acc_0_4,src_dof_4)),_mm256_mul_pd(q_acc_0_5,src_dof_5)); + const __m256d elMatVec_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_1_0,src_dof_0),_mm256_mul_pd(q_acc_1_1,src_dof_1)),_mm256_mul_pd(q_acc_1_2,src_dof_2)),_mm256_mul_pd(q_acc_1_3,src_dof_3)),_mm256_mul_pd(q_acc_1_4,src_dof_4)),_mm256_mul_pd(q_acc_1_5,src_dof_5)); + const __m256d elMatVec_2 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_2_0,src_dof_0),_mm256_mul_pd(q_acc_2_1,src_dof_1)),_mm256_mul_pd(q_acc_2_2,src_dof_2)),_mm256_mul_pd(q_acc_2_3,src_dof_3)),_mm256_mul_pd(q_acc_2_4,src_dof_4)),_mm256_mul_pd(q_acc_2_5,src_dof_5)); + const __m256d elMatVec_3 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_3_0,src_dof_0),_mm256_mul_pd(q_acc_3_1,src_dof_1)),_mm256_mul_pd(q_acc_3_2,src_dof_2)),_mm256_mul_pd(q_acc_3_3,src_dof_3)),_mm256_mul_pd(q_acc_3_4,src_dof_4)),_mm256_mul_pd(q_acc_3_5,src_dof_5)); + const __m256d elMatVec_4 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_4_0,src_dof_0),_mm256_mul_pd(q_acc_4_1,src_dof_1)),_mm256_mul_pd(q_acc_4_2,src_dof_2)),_mm256_mul_pd(q_acc_4_3,src_dof_3)),_mm256_mul_pd(q_acc_4_4,src_dof_4)),_mm256_mul_pd(q_acc_4_5,src_dof_5)); + const __m256d elMatVec_5 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_5_0,src_dof_0),_mm256_mul_pd(q_acc_5_1,src_dof_1)),_mm256_mul_pd(q_acc_5_2,src_dof_2)),_mm256_mul_pd(q_acc_5_3,src_dof_3)),_mm256_mul_pd(q_acc_5_4,src_dof_4)),_mm256_mul_pd(q_acc_5_5,src_dof_5)); + _mm256_storeu_pd(&_data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1],_mm256_add_pd(elMatVec_0,_mm256_loadu_pd(& _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]))); + _mm256_storeu_pd(&_data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))],_mm256_add_pd(elMatVec_1,_mm256_loadu_pd(& _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]))); + _mm256_storeu_pd(&_data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1],_mm256_add_pd(elMatVec_2,_mm256_loadu_pd(& _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]))); + _mm256_storeu_pd(&_data_dstEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))],_mm256_add_pd(elMatVec_3,_mm256_loadu_pd(& _data_dstEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]))); + _mm256_storeu_pd(&_data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1],_mm256_add_pd(elMatVec_4,_mm256_loadu_pd(& _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]))); + _mm256_storeu_pd(&_data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))],_mm256_add_pd(elMatVec_5,_mm256_loadu_pd(& _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]))); + } + for (int64_t ctr_0 = (int64_t)((-ctr_1 + micro_edges_per_macro_edge - 1) / (4)) * (4); ctr_0 < -ctr_1 + micro_edges_per_macro_edge - 1; ctr_0 += 1) + { + + const int64_t phantom_ctr_0 = ctr_0; + real_t _data_float_loop_ctr_array_dim_0[4]; + _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0; + _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1; + _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2; + _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3; + real_t _data_float_loop_ctr_array_dim_1[4]; + _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1; + + const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; + const real_t src_dof_0 = _data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + const real_t src_dof_1 = _data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t src_dof_2 = _data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]; + const real_t src_dof_3 = _data_srcEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t src_dof_4 = _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]; + const real_t src_dof_5 = _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t cp_times_delta_dof_0 = _data_cp_times_deltaVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + const real_t cp_times_delta_dof_1 = _data_cp_times_deltaVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t cp_times_delta_dof_2 = _data_cp_times_deltaVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]; + const real_t cp_times_delta_dof_3 = _data_cp_times_deltaEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t cp_times_delta_dof_4 = _data_cp_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]; + const real_t cp_times_delta_dof_5 = _data_cp_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t ux_dof_0 = _data_uxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + const real_t ux_dof_1 = _data_uxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t ux_dof_2 = _data_uxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]; + const real_t ux_dof_3 = _data_uxEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t ux_dof_4 = _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]; + const real_t ux_dof_5 = _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t uy_dof_0 = _data_uyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + const real_t uy_dof_1 = _data_uyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t uy_dof_2 = _data_uyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]; + const real_t uy_dof_3 = _data_uyEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t uy_dof_4 = _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]; + const real_t uy_dof_5 = _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + real_t q_acc_0_0 = 0.0; + real_t q_acc_0_1 = 0.0; + real_t q_acc_0_2 = 0.0; + real_t q_acc_0_3 = 0.0; + real_t q_acc_0_4 = 0.0; + real_t q_acc_0_5 = 0.0; + real_t q_acc_1_0 = 0.0; + real_t q_acc_1_1 = 0.0; + real_t q_acc_1_2 = 0.0; + real_t q_acc_1_3 = 0.0; + real_t q_acc_1_4 = 0.0; + real_t q_acc_1_5 = 0.0; + real_t q_acc_2_0 = 0.0; + real_t q_acc_2_1 = 0.0; + real_t q_acc_2_2 = 0.0; + real_t q_acc_2_3 = 0.0; + real_t q_acc_2_4 = 0.0; + real_t q_acc_2_5 = 0.0; + real_t q_acc_3_0 = 0.0; + real_t q_acc_3_1 = 0.0; + real_t q_acc_3_2 = 0.0; + real_t q_acc_3_3 = 0.0; + real_t q_acc_3_4 = 0.0; + real_t q_acc_3_5 = 0.0; + real_t q_acc_4_0 = 0.0; + real_t q_acc_4_1 = 0.0; + real_t q_acc_4_2 = 0.0; + real_t q_acc_4_3 = 0.0; + real_t q_acc_4_4 = 0.0; + real_t q_acc_4_5 = 0.0; + real_t q_acc_5_0 = 0.0; + real_t q_acc_5_1 = 0.0; + real_t q_acc_5_2 = 0.0; + real_t q_acc_5_3 = 0.0; + real_t q_acc_5_4 = 0.0; + real_t q_acc_5_5 = 0.0; + for (int64_t q = 0; q < 4; q += 1) + { + const real_t tmp_qloop_0 = 4.0*_data_q_p_0[q]; + const real_t tmp_qloop_1 = 4.0*_data_q_p_1[q]; + const real_t tmp_qloop_2 = tmp_qloop_0 + tmp_qloop_1 - 3.0; + const real_t tmp_qloop_3 = tmp_qloop_0*_data_q_p_1[q]; + const real_t tmp_qloop_4 = (_data_q_p_0[q]*_data_q_p_0[q]); + const real_t tmp_qloop_5 = tmp_qloop_4*2.0; + const real_t tmp_qloop_6 = tmp_qloop_5 - _data_q_p_0[q]; + const real_t tmp_qloop_7 = (_data_q_p_1[q]*_data_q_p_1[q]); + const real_t tmp_qloop_8 = tmp_qloop_7*2.0; + const real_t tmp_qloop_9 = tmp_qloop_8 - _data_q_p_1[q]; + const real_t tmp_qloop_10 = tmp_qloop_1 - tmp_qloop_3 + tmp_qloop_7*-4.0; + const real_t tmp_qloop_11 = tmp_qloop_0 - tmp_qloop_3 + tmp_qloop_4*-4.0; + const real_t tmp_qloop_12 = tmp_qloop_3 + tmp_qloop_5 + tmp_qloop_8 - 3.0*_data_q_p_0[q] - 3.0*_data_q_p_1[q] + 1.0; + const real_t tmp_qloop_13 = tmp_qloop_10*ux_dof_4 + tmp_qloop_11*ux_dof_5 + tmp_qloop_12*ux_dof_0 + tmp_qloop_3*ux_dof_3 + tmp_qloop_6*ux_dof_1 + tmp_qloop_9*ux_dof_2; + const real_t tmp_qloop_14 = tmp_qloop_10*uy_dof_4 + tmp_qloop_11*uy_dof_5 + tmp_qloop_12*uy_dof_0 + tmp_qloop_3*uy_dof_3 + tmp_qloop_6*uy_dof_1 + tmp_qloop_9*uy_dof_2; + const real_t tmp_qloop_15 = tmp_qloop_13*(jac_affine_inv_0_0_BLUE*tmp_qloop_2 + jac_affine_inv_1_0_BLUE*tmp_qloop_2) + tmp_qloop_14*(jac_affine_inv_0_1_BLUE*tmp_qloop_2 + jac_affine_inv_1_1_BLUE*tmp_qloop_2); + const real_t tmp_qloop_16 = abs_det_jac_affine_BLUE*(cp_times_delta_dof_0*tmp_qloop_12 + cp_times_delta_dof_1*tmp_qloop_6 + cp_times_delta_dof_2*tmp_qloop_9 + cp_times_delta_dof_3*tmp_qloop_3 + cp_times_delta_dof_4*tmp_qloop_10 + cp_times_delta_dof_5*tmp_qloop_11)*_data_q_w[q]; + const real_t tmp_qloop_17 = tmp_qloop_0 - 1.0; + const real_t tmp_qloop_18 = jac_affine_inv_0_0_BLUE*tmp_qloop_13*tmp_qloop_17 + jac_affine_inv_0_1_BLUE*tmp_qloop_14*tmp_qloop_17; + const real_t tmp_qloop_19 = tmp_qloop_15*tmp_qloop_16; + const real_t tmp_qloop_20 = tmp_qloop_18*tmp_qloop_19; + const real_t tmp_qloop_21 = tmp_qloop_1 - 1.0; + const real_t tmp_qloop_22 = jac_affine_inv_1_0_BLUE*tmp_qloop_13*tmp_qloop_21 + jac_affine_inv_1_1_BLUE*tmp_qloop_14*tmp_qloop_21; + const real_t tmp_qloop_23 = tmp_qloop_19*tmp_qloop_22; + const real_t tmp_qloop_24 = jac_affine_inv_1_0_BLUE*tmp_qloop_0; + const real_t tmp_qloop_25 = jac_affine_inv_0_0_BLUE*tmp_qloop_1; + const real_t tmp_qloop_26 = jac_affine_inv_1_1_BLUE*tmp_qloop_0; + const real_t tmp_qloop_27 = jac_affine_inv_0_1_BLUE*tmp_qloop_1; + const real_t tmp_qloop_28 = tmp_qloop_13*(tmp_qloop_24 + tmp_qloop_25) + tmp_qloop_14*(tmp_qloop_26 + tmp_qloop_27); + const real_t tmp_qloop_29 = tmp_qloop_19*tmp_qloop_28; + const real_t tmp_qloop_30 = -tmp_qloop_0 - 8.0*_data_q_p_1[q] + 4.0; + const real_t tmp_qloop_31 = tmp_qloop_13*(jac_affine_inv_1_0_BLUE*tmp_qloop_30 - tmp_qloop_25) + tmp_qloop_14*(jac_affine_inv_1_1_BLUE*tmp_qloop_30 - tmp_qloop_27); + const real_t tmp_qloop_32 = tmp_qloop_19*tmp_qloop_31; + const real_t tmp_qloop_33 = -tmp_qloop_1 - 8.0*_data_q_p_0[q] + 4.0; + const real_t tmp_qloop_34 = tmp_qloop_13*(jac_affine_inv_0_0_BLUE*tmp_qloop_33 - tmp_qloop_24) + tmp_qloop_14*(jac_affine_inv_0_1_BLUE*tmp_qloop_33 - tmp_qloop_26); + const real_t tmp_qloop_35 = tmp_qloop_19*tmp_qloop_34; + const real_t tmp_qloop_36 = tmp_qloop_16*tmp_qloop_18; + const real_t tmp_qloop_37 = tmp_qloop_22*tmp_qloop_36; + const real_t tmp_qloop_38 = tmp_qloop_28*tmp_qloop_36; + const real_t tmp_qloop_39 = tmp_qloop_31*tmp_qloop_36; + const real_t tmp_qloop_40 = tmp_qloop_34*tmp_qloop_36; + const real_t tmp_qloop_41 = tmp_qloop_16*tmp_qloop_22; + const real_t tmp_qloop_42 = tmp_qloop_28*tmp_qloop_41; + const real_t tmp_qloop_43 = tmp_qloop_31*tmp_qloop_41; + const real_t tmp_qloop_44 = tmp_qloop_34*tmp_qloop_41; + const real_t tmp_qloop_45 = tmp_qloop_16*tmp_qloop_28; + const real_t tmp_qloop_46 = tmp_qloop_31*tmp_qloop_45; + const real_t tmp_qloop_47 = tmp_qloop_34*tmp_qloop_45; + const real_t tmp_qloop_48 = tmp_qloop_16*tmp_qloop_31*tmp_qloop_34; + const real_t q_tmp_0_0 = (tmp_qloop_15*tmp_qloop_15)*tmp_qloop_16; + const real_t q_tmp_0_1 = tmp_qloop_20; + const real_t q_tmp_0_2 = tmp_qloop_23; + const real_t q_tmp_0_3 = tmp_qloop_29; + const real_t q_tmp_0_4 = tmp_qloop_32; + const real_t q_tmp_0_5 = tmp_qloop_35; + const real_t q_tmp_1_0 = tmp_qloop_20; + const real_t q_tmp_1_1 = tmp_qloop_16*(tmp_qloop_18*tmp_qloop_18); + const real_t q_tmp_1_2 = tmp_qloop_37; + const real_t q_tmp_1_3 = tmp_qloop_38; + const real_t q_tmp_1_4 = tmp_qloop_39; + const real_t q_tmp_1_5 = tmp_qloop_40; + const real_t q_tmp_2_0 = tmp_qloop_23; + const real_t q_tmp_2_1 = tmp_qloop_37; + const real_t q_tmp_2_2 = tmp_qloop_16*(tmp_qloop_22*tmp_qloop_22); + const real_t q_tmp_2_3 = tmp_qloop_42; + const real_t q_tmp_2_4 = tmp_qloop_43; + const real_t q_tmp_2_5 = tmp_qloop_44; + const real_t q_tmp_3_0 = tmp_qloop_29; + const real_t q_tmp_3_1 = tmp_qloop_38; + const real_t q_tmp_3_2 = tmp_qloop_42; + const real_t q_tmp_3_3 = tmp_qloop_16*(tmp_qloop_28*tmp_qloop_28); + const real_t q_tmp_3_4 = tmp_qloop_46; + const real_t q_tmp_3_5 = tmp_qloop_47; + const real_t q_tmp_4_0 = tmp_qloop_32; + const real_t q_tmp_4_1 = tmp_qloop_39; + const real_t q_tmp_4_2 = tmp_qloop_43; + const real_t q_tmp_4_3 = tmp_qloop_46; + const real_t q_tmp_4_4 = tmp_qloop_16*(tmp_qloop_31*tmp_qloop_31); + const real_t q_tmp_4_5 = tmp_qloop_48; + const real_t q_tmp_5_0 = tmp_qloop_35; + const real_t q_tmp_5_1 = tmp_qloop_40; + const real_t q_tmp_5_2 = tmp_qloop_44; + const real_t q_tmp_5_3 = tmp_qloop_47; + const real_t q_tmp_5_4 = tmp_qloop_48; + const real_t q_tmp_5_5 = tmp_qloop_16*(tmp_qloop_34*tmp_qloop_34); + q_acc_0_0 = q_acc_0_0 + q_tmp_0_0; + q_acc_0_1 = q_acc_0_1 + q_tmp_0_1; + q_acc_0_2 = q_acc_0_2 + q_tmp_0_2; + q_acc_0_3 = q_acc_0_3 + q_tmp_0_3; + q_acc_0_4 = q_acc_0_4 + q_tmp_0_4; + q_acc_0_5 = q_acc_0_5 + q_tmp_0_5; + q_acc_1_0 = q_acc_1_0 + q_tmp_1_0; + q_acc_1_1 = q_acc_1_1 + q_tmp_1_1; + q_acc_1_2 = q_acc_1_2 + q_tmp_1_2; + q_acc_1_3 = q_acc_1_3 + q_tmp_1_3; + q_acc_1_4 = q_acc_1_4 + q_tmp_1_4; + q_acc_1_5 = q_acc_1_5 + q_tmp_1_5; + q_acc_2_0 = q_acc_2_0 + q_tmp_2_0; + q_acc_2_1 = q_acc_2_1 + q_tmp_2_1; + q_acc_2_2 = q_acc_2_2 + q_tmp_2_2; + q_acc_2_3 = q_acc_2_3 + q_tmp_2_3; + q_acc_2_4 = q_acc_2_4 + q_tmp_2_4; + q_acc_2_5 = q_acc_2_5 + q_tmp_2_5; + q_acc_3_0 = q_acc_3_0 + q_tmp_3_0; + q_acc_3_1 = q_acc_3_1 + q_tmp_3_1; + q_acc_3_2 = q_acc_3_2 + q_tmp_3_2; + q_acc_3_3 = q_acc_3_3 + q_tmp_3_3; + q_acc_3_4 = q_acc_3_4 + q_tmp_3_4; + q_acc_3_5 = q_acc_3_5 + q_tmp_3_5; + q_acc_4_0 = q_acc_4_0 + q_tmp_4_0; + q_acc_4_1 = q_acc_4_1 + q_tmp_4_1; + q_acc_4_2 = q_acc_4_2 + q_tmp_4_2; + q_acc_4_3 = q_acc_4_3 + q_tmp_4_3; + q_acc_4_4 = q_acc_4_4 + q_tmp_4_4; + q_acc_4_5 = q_acc_4_5 + q_tmp_4_5; + q_acc_5_0 = q_acc_5_0 + q_tmp_5_0; + q_acc_5_1 = q_acc_5_1 + q_tmp_5_1; + q_acc_5_2 = q_acc_5_2 + q_tmp_5_2; + q_acc_5_3 = q_acc_5_3 + q_tmp_5_3; + q_acc_5_4 = q_acc_5_4 + q_tmp_5_4; + q_acc_5_5 = q_acc_5_5 + q_tmp_5_5; + } + const real_t elMatVec_0 = q_acc_0_0*src_dof_0 + q_acc_0_1*src_dof_1 + q_acc_0_2*src_dof_2 + q_acc_0_3*src_dof_3 + q_acc_0_4*src_dof_4 + q_acc_0_5*src_dof_5; + const real_t elMatVec_1 = q_acc_1_0*src_dof_0 + q_acc_1_1*src_dof_1 + q_acc_1_2*src_dof_2 + q_acc_1_3*src_dof_3 + q_acc_1_4*src_dof_4 + q_acc_1_5*src_dof_5; + const real_t elMatVec_2 = q_acc_2_0*src_dof_0 + q_acc_2_1*src_dof_1 + q_acc_2_2*src_dof_2 + q_acc_2_3*src_dof_3 + q_acc_2_4*src_dof_4 + q_acc_2_5*src_dof_5; + const real_t elMatVec_3 = q_acc_3_0*src_dof_0 + q_acc_3_1*src_dof_1 + q_acc_3_2*src_dof_2 + q_acc_3_3*src_dof_3 + q_acc_3_4*src_dof_4 + q_acc_3_5*src_dof_5; + const real_t elMatVec_4 = q_acc_4_0*src_dof_0 + q_acc_4_1*src_dof_1 + q_acc_4_2*src_dof_2 + q_acc_4_3*src_dof_3 + q_acc_4_4*src_dof_4 + q_acc_4_5*src_dof_5; + const real_t elMatVec_5 = q_acc_5_0*src_dof_0 + q_acc_5_1*src_dof_1 + q_acc_5_2*src_dof_2 + q_acc_5_3*src_dof_3 + q_acc_5_4*src_dof_4 + q_acc_5_5*src_dof_5; + _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1] = elMatVec_0 + _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatVec_1 + _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1] = elMatVec_2 + _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]; + _data_dstEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatVec_3 + _data_dstEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1] = elMatVec_4 + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]; + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))] = elMatVec_5 + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + } + } + } + } +} +} // namespace operatorgeneration + +} // namespace hyteg diff --git a/operators/supg_advection/avx/P2ElementwiseSupgAdvection_computeInverseDiagonalOperatorValues_P2ElementwiseSupgAdvection_macro_2D.cpp b/operators/supg_advection/avx/P2ElementwiseSupgAdvection_computeInverseDiagonalOperatorValues_P2ElementwiseSupgAdvection_macro_2D.cpp new file mode 100644 index 0000000000000000000000000000000000000000..769d0c05e9bb3f1b0ffc378448fca4a708ec31e2 --- /dev/null +++ b/operators/supg_advection/avx/P2ElementwiseSupgAdvection_computeInverseDiagonalOperatorValues_P2ElementwiseSupgAdvection_macro_2D.cpp @@ -0,0 +1,516 @@ +/* +* Copyright (c) 2017-2024 Nils Kohl, Daniel Bauer, Fabian Böhm. +* +* This file is part of HyTeG +* (see https://i10git.cs.fau.de/hyteg/hyteg). +* +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. +* +* You should have received a copy of the GNU General Public License +* along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +/* +* The entire file was generated with the HyTeG Operator Generator. +* +* Avoid modifying this file. If buggy, consider fixing the generator itself. +*/ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +#include "../P2ElementwiseSupgAdvection.hpp" + +#define FUNC_PREFIX + +namespace hyteg { + +namespace operatorgeneration { + +void P2ElementwiseSupgAdvection::computeInverseDiagonalOperatorValues_P2ElementwiseSupgAdvection_macro_2D( real_t * RESTRICT _data_cp_times_deltaEdge, real_t * RESTRICT _data_cp_times_deltaVertex, real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_uxEdge, real_t * RESTRICT _data_uxVertex, real_t * RESTRICT _data_uyEdge, real_t * RESTRICT _data_uyVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +{ + { + const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669}; + + const real_t _data_q_p_0 [] = {0.33333333333333331, 0.20000000000000001, 0.59999999999999998, 0.20000000000000001}; + + const real_t _data_q_p_1 [] = {0.33333333333333331, 0.59999999999999998, 0.20000000000000001, 0.20000000000000001}; + + const real_t tmp_coords_jac_0_GRAY = 1.0 / (micro_edges_per_macro_edge_float)*1.0; + const real_t p_affine_const_0_0_GRAY = macro_vertex_coord_id_0comp0; + const real_t p_affine_const_0_1_GRAY = macro_vertex_coord_id_0comp1; + const real_t p_affine_const_1_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0); + const real_t p_affine_const_1_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1); + const real_t p_affine_const_2_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0); + const real_t p_affine_const_2_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1); + const real_t jac_affine_0_0_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_1_0_GRAY; + const real_t jac_affine_0_1_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_2_0_GRAY; + const real_t jac_affine_1_0_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_1_1_GRAY; + const real_t jac_affine_1_1_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_2_1_GRAY; + const real_t tmp_coords_jac_1_GRAY = jac_affine_0_0_GRAY*jac_affine_1_1_GRAY - jac_affine_0_1_GRAY*jac_affine_1_0_GRAY; + const real_t tmp_coords_jac_2_GRAY = 1.0 / (tmp_coords_jac_1_GRAY); + const real_t jac_affine_inv_0_0_GRAY = jac_affine_1_1_GRAY*tmp_coords_jac_2_GRAY; + const real_t jac_affine_inv_0_1_GRAY = -jac_affine_0_1_GRAY*tmp_coords_jac_2_GRAY; + const real_t jac_affine_inv_1_0_GRAY = -jac_affine_1_0_GRAY*tmp_coords_jac_2_GRAY; + const real_t jac_affine_inv_1_1_GRAY = jac_affine_0_0_GRAY*tmp_coords_jac_2_GRAY; + const real_t abs_det_jac_affine_GRAY = abs(tmp_coords_jac_1_GRAY); + { + /* FaceType.GRAY */ + for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1) + { + for (int64_t ctr_0 = 0; ctr_0 < (int64_t)((-ctr_1 + micro_edges_per_macro_edge) / (4)) * (4); ctr_0 += 4) + { + + const int64_t phantom_ctr_0 = ctr_0; + real_t _data_float_loop_ctr_array_dim_0[4]; + _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0; + _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1; + _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2; + _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3; + real_t _data_float_loop_ctr_array_dim_1[4]; + _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1; + + const __m256d p_affine_0_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)); + const __m256d p_affine_0_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)); + const __m256d p_affine_1_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)); + const __m256d p_affine_1_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)); + const __m256d p_affine_2_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)); + const __m256d p_affine_2_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)); + const __m256d cp_times_delta_dof_0 = _mm256_loadu_pd(& _data_cp_times_deltaVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]); + const __m256d cp_times_delta_dof_1 = _mm256_loadu_pd(& _data_cp_times_deltaVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]); + const __m256d cp_times_delta_dof_2 = _mm256_loadu_pd(& _data_cp_times_deltaVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]); + const __m256d cp_times_delta_dof_3 = _mm256_loadu_pd(& _data_cp_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]); + const __m256d cp_times_delta_dof_4 = _mm256_loadu_pd(& _data_cp_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]); + const __m256d cp_times_delta_dof_5 = _mm256_loadu_pd(& _data_cp_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]); + const __m256d ux_dof_0 = _mm256_loadu_pd(& _data_uxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]); + const __m256d ux_dof_1 = _mm256_loadu_pd(& _data_uxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]); + const __m256d ux_dof_2 = _mm256_loadu_pd(& _data_uxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]); + const __m256d ux_dof_3 = _mm256_loadu_pd(& _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]); + const __m256d ux_dof_4 = _mm256_loadu_pd(& _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]); + const __m256d ux_dof_5 = _mm256_loadu_pd(& _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]); + const __m256d uy_dof_0 = _mm256_loadu_pd(& _data_uyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]); + const __m256d uy_dof_1 = _mm256_loadu_pd(& _data_uyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]); + const __m256d uy_dof_2 = _mm256_loadu_pd(& _data_uyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]); + const __m256d uy_dof_3 = _mm256_loadu_pd(& _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]); + const __m256d uy_dof_4 = _mm256_loadu_pd(& _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]); + const __m256d uy_dof_5 = _mm256_loadu_pd(& _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]); + __m256d q_acc_0_0 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_1 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_2 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_3 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_4_4 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_5_5 = _mm256_set_pd(0.0,0.0,0.0,0.0); + for (int64_t q = 0; q < 4; q += 1) + { + const __m256d tmp_qloop_0 = _mm256_mul_pd(_mm256_set_pd(4.0,4.0,4.0,4.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])); + const __m256d tmp_qloop_1 = _mm256_mul_pd(_mm256_set_pd(4.0,4.0,4.0,4.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])); + const __m256d tmp_qloop_2 = _mm256_add_pd(_mm256_add_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),tmp_qloop_0),tmp_qloop_1); + const __m256d tmp_qloop_3 = _mm256_mul_pd(tmp_qloop_0,_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])); + const __m256d tmp_qloop_4 = _mm256_mul_pd(_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])); + const __m256d tmp_qloop_5 = _mm256_mul_pd(tmp_qloop_4,_mm256_set_pd(2.0,2.0,2.0,2.0)); + const __m256d tmp_qloop_6 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),tmp_qloop_5); + const __m256d tmp_qloop_7 = _mm256_mul_pd(_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])); + const __m256d tmp_qloop_8 = _mm256_mul_pd(tmp_qloop_7,_mm256_set_pd(2.0,2.0,2.0,2.0)); + const __m256d tmp_qloop_9 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])),tmp_qloop_8); + const __m256d tmp_qloop_10 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_7,_mm256_set_pd(-4.0,-4.0,-4.0,-4.0)),_mm256_mul_pd(tmp_qloop_3,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_1); + const __m256d tmp_qloop_11 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_4,_mm256_set_pd(-4.0,-4.0,-4.0,-4.0)),_mm256_mul_pd(tmp_qloop_3,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_0); + const __m256d tmp_qloop_12 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),tmp_qloop_3),tmp_qloop_5),tmp_qloop_8); + const __m256d tmp_qloop_13 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_10,ux_dof_4),_mm256_mul_pd(tmp_qloop_11,ux_dof_5)),_mm256_mul_pd(tmp_qloop_12,ux_dof_0)),_mm256_mul_pd(tmp_qloop_3,ux_dof_3)),_mm256_mul_pd(tmp_qloop_6,ux_dof_1)),_mm256_mul_pd(tmp_qloop_9,ux_dof_2)); + const __m256d tmp_qloop_14 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_10,uy_dof_4),_mm256_mul_pd(tmp_qloop_11,uy_dof_5)),_mm256_mul_pd(tmp_qloop_12,uy_dof_0)),_mm256_mul_pd(tmp_qloop_3,uy_dof_3)),_mm256_mul_pd(tmp_qloop_6,uy_dof_1)),_mm256_mul_pd(tmp_qloop_9,uy_dof_2)); + const __m256d tmp_qloop_15 = _mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(cp_times_delta_dof_0,tmp_qloop_12),_mm256_mul_pd(cp_times_delta_dof_1,tmp_qloop_6)),_mm256_mul_pd(cp_times_delta_dof_2,tmp_qloop_9)),_mm256_mul_pd(cp_times_delta_dof_3,tmp_qloop_3)),_mm256_mul_pd(cp_times_delta_dof_4,tmp_qloop_10)),_mm256_mul_pd(cp_times_delta_dof_5,tmp_qloop_11)),_mm256_set_pd(_data_q_w[q],_data_q_w[q],_data_q_w[q],_data_q_w[q])),_mm256_set_pd(abs_det_jac_affine_GRAY,abs_det_jac_affine_GRAY,abs_det_jac_affine_GRAY,abs_det_jac_affine_GRAY)); + const __m256d tmp_qloop_16 = _mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),tmp_qloop_0); + const __m256d tmp_qloop_17 = _mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),tmp_qloop_1); + const __m256d tmp_qloop_18 = _mm256_mul_pd(tmp_qloop_0,_mm256_set_pd(jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY)); + const __m256d tmp_qloop_19 = _mm256_mul_pd(tmp_qloop_1,_mm256_set_pd(jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY)); + const __m256d tmp_qloop_20 = _mm256_mul_pd(tmp_qloop_0,_mm256_set_pd(jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY)); + const __m256d tmp_qloop_21 = _mm256_mul_pd(tmp_qloop_1,_mm256_set_pd(jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY)); + const __m256d tmp_qloop_22 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-8.0,-8.0,-8.0,-8.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])),_mm256_mul_pd(tmp_qloop_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_set_pd(4.0,4.0,4.0,4.0)); + const __m256d tmp_qloop_23 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-8.0,-8.0,-8.0,-8.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(tmp_qloop_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_set_pd(4.0,4.0,4.0,4.0)); + const __m256d q_tmp_0_0 = _mm256_mul_pd(tmp_qloop_15,_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_13,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_2,_mm256_set_pd(jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY)),_mm256_mul_pd(tmp_qloop_2,_mm256_set_pd(jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY)))),_mm256_mul_pd(tmp_qloop_14,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_2,_mm256_set_pd(jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY)),_mm256_mul_pd(tmp_qloop_2,_mm256_set_pd(jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY))))),_mm256_add_pd(_mm256_mul_pd(tmp_qloop_13,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_2,_mm256_set_pd(jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY)),_mm256_mul_pd(tmp_qloop_2,_mm256_set_pd(jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY)))),_mm256_mul_pd(tmp_qloop_14,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_2,_mm256_set_pd(jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY)),_mm256_mul_pd(tmp_qloop_2,_mm256_set_pd(jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY))))))); + const __m256d q_tmp_1_1 = _mm256_mul_pd(tmp_qloop_15,_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_13,tmp_qloop_16),_mm256_set_pd(jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_14,tmp_qloop_16),_mm256_set_pd(jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY))),_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_13,tmp_qloop_16),_mm256_set_pd(jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_14,tmp_qloop_16),_mm256_set_pd(jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY))))); + const __m256d q_tmp_2_2 = _mm256_mul_pd(tmp_qloop_15,_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_13,tmp_qloop_17),_mm256_set_pd(jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_14,tmp_qloop_17),_mm256_set_pd(jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY))),_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_13,tmp_qloop_17),_mm256_set_pd(jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_14,tmp_qloop_17),_mm256_set_pd(jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY))))); + const __m256d q_tmp_3_3 = _mm256_mul_pd(tmp_qloop_15,_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_13,_mm256_add_pd(tmp_qloop_18,tmp_qloop_19)),_mm256_mul_pd(tmp_qloop_14,_mm256_add_pd(tmp_qloop_20,tmp_qloop_21))),_mm256_add_pd(_mm256_mul_pd(tmp_qloop_13,_mm256_add_pd(tmp_qloop_18,tmp_qloop_19)),_mm256_mul_pd(tmp_qloop_14,_mm256_add_pd(tmp_qloop_20,tmp_qloop_21))))); + const __m256d q_tmp_4_4 = _mm256_mul_pd(tmp_qloop_15,_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_13,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_19,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_22,_mm256_set_pd(jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY)))),_mm256_mul_pd(tmp_qloop_14,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_21,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_22,_mm256_set_pd(jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY))))),_mm256_add_pd(_mm256_mul_pd(tmp_qloop_13,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_19,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_22,_mm256_set_pd(jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY)))),_mm256_mul_pd(tmp_qloop_14,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_21,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_22,_mm256_set_pd(jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY))))))); + const __m256d q_tmp_5_5 = _mm256_mul_pd(tmp_qloop_15,_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_13,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_18,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_23,_mm256_set_pd(jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY)))),_mm256_mul_pd(tmp_qloop_14,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_20,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_23,_mm256_set_pd(jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY))))),_mm256_add_pd(_mm256_mul_pd(tmp_qloop_13,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_18,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_23,_mm256_set_pd(jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY)))),_mm256_mul_pd(tmp_qloop_14,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_20,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_23,_mm256_set_pd(jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY))))))); + q_acc_0_0 = _mm256_add_pd(q_acc_0_0,q_tmp_0_0); + q_acc_1_1 = _mm256_add_pd(q_acc_1_1,q_tmp_1_1); + q_acc_2_2 = _mm256_add_pd(q_acc_2_2,q_tmp_2_2); + q_acc_3_3 = _mm256_add_pd(q_acc_3_3,q_tmp_3_3); + q_acc_4_4 = _mm256_add_pd(q_acc_4_4,q_tmp_4_4); + q_acc_5_5 = _mm256_add_pd(q_acc_5_5,q_tmp_5_5); + } + const __m256d elMatDiag_0 = q_acc_0_0; + const __m256d elMatDiag_1 = q_acc_1_1; + const __m256d elMatDiag_2 = q_acc_2_2; + const __m256d elMatDiag_3 = q_acc_3_3; + const __m256d elMatDiag_4 = q_acc_4_4; + const __m256d elMatDiag_5 = q_acc_5_5; + _mm256_storeu_pd(&_data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))],_mm256_add_pd(elMatDiag_0,_mm256_loadu_pd(& _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]))); + _mm256_storeu_pd(&_data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1],_mm256_add_pd(elMatDiag_1,_mm256_loadu_pd(& _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]))); + _mm256_storeu_pd(&_data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))],_mm256_add_pd(elMatDiag_2,_mm256_loadu_pd(& _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]))); + _mm256_storeu_pd(&_data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))],_mm256_add_pd(elMatDiag_3,_mm256_loadu_pd(& _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]))); + _mm256_storeu_pd(&_data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))],_mm256_add_pd(elMatDiag_4,_mm256_loadu_pd(& _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]))); + _mm256_storeu_pd(&_data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))],_mm256_add_pd(elMatDiag_5,_mm256_loadu_pd(& _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]))); + } + for (int64_t ctr_0 = (int64_t)((-ctr_1 + micro_edges_per_macro_edge) / (4)) * (4); ctr_0 < -ctr_1 + micro_edges_per_macro_edge; ctr_0 += 1) + { + + const int64_t phantom_ctr_0 = ctr_0; + real_t _data_float_loop_ctr_array_dim_0[4]; + _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0; + _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1; + _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2; + _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3; + real_t _data_float_loop_ctr_array_dim_1[4]; + _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1; + + const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; + const real_t cp_times_delta_dof_0 = _data_cp_times_deltaVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]; + const real_t cp_times_delta_dof_1 = _data_cp_times_deltaVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + const real_t cp_times_delta_dof_2 = _data_cp_times_deltaVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t cp_times_delta_dof_3 = _data_cp_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t cp_times_delta_dof_4 = _data_cp_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t cp_times_delta_dof_5 = _data_cp_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]; + const real_t ux_dof_0 = _data_uxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]; + const real_t ux_dof_1 = _data_uxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + const real_t ux_dof_2 = _data_uxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t ux_dof_3 = _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t ux_dof_4 = _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t ux_dof_5 = _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]; + const real_t uy_dof_0 = _data_uyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]; + const real_t uy_dof_1 = _data_uyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + const real_t uy_dof_2 = _data_uyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t uy_dof_3 = _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t uy_dof_4 = _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t uy_dof_5 = _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]; + real_t q_acc_0_0 = 0.0; + real_t q_acc_1_1 = 0.0; + real_t q_acc_2_2 = 0.0; + real_t q_acc_3_3 = 0.0; + real_t q_acc_4_4 = 0.0; + real_t q_acc_5_5 = 0.0; + for (int64_t q = 0; q < 4; q += 1) + { + const real_t tmp_qloop_0 = 4.0*_data_q_p_0[q]; + const real_t tmp_qloop_1 = 4.0*_data_q_p_1[q]; + const real_t tmp_qloop_2 = tmp_qloop_0 + tmp_qloop_1 - 3.0; + const real_t tmp_qloop_3 = tmp_qloop_0*_data_q_p_1[q]; + const real_t tmp_qloop_4 = (_data_q_p_0[q]*_data_q_p_0[q]); + const real_t tmp_qloop_5 = tmp_qloop_4*2.0; + const real_t tmp_qloop_6 = tmp_qloop_5 - _data_q_p_0[q]; + const real_t tmp_qloop_7 = (_data_q_p_1[q]*_data_q_p_1[q]); + const real_t tmp_qloop_8 = tmp_qloop_7*2.0; + const real_t tmp_qloop_9 = tmp_qloop_8 - _data_q_p_1[q]; + const real_t tmp_qloop_10 = tmp_qloop_1 - tmp_qloop_3 + tmp_qloop_7*-4.0; + const real_t tmp_qloop_11 = tmp_qloop_0 - tmp_qloop_3 + tmp_qloop_4*-4.0; + const real_t tmp_qloop_12 = tmp_qloop_3 + tmp_qloop_5 + tmp_qloop_8 - 3.0*_data_q_p_0[q] - 3.0*_data_q_p_1[q] + 1.0; + const real_t tmp_qloop_13 = tmp_qloop_10*ux_dof_4 + tmp_qloop_11*ux_dof_5 + tmp_qloop_12*ux_dof_0 + tmp_qloop_3*ux_dof_3 + tmp_qloop_6*ux_dof_1 + tmp_qloop_9*ux_dof_2; + const real_t tmp_qloop_14 = tmp_qloop_10*uy_dof_4 + tmp_qloop_11*uy_dof_5 + tmp_qloop_12*uy_dof_0 + tmp_qloop_3*uy_dof_3 + tmp_qloop_6*uy_dof_1 + tmp_qloop_9*uy_dof_2; + const real_t tmp_qloop_15 = abs_det_jac_affine_GRAY*(cp_times_delta_dof_0*tmp_qloop_12 + cp_times_delta_dof_1*tmp_qloop_6 + cp_times_delta_dof_2*tmp_qloop_9 + cp_times_delta_dof_3*tmp_qloop_3 + cp_times_delta_dof_4*tmp_qloop_10 + cp_times_delta_dof_5*tmp_qloop_11)*_data_q_w[q]; + const real_t tmp_qloop_16 = tmp_qloop_0 - 1.0; + const real_t tmp_qloop_17 = tmp_qloop_1 - 1.0; + const real_t tmp_qloop_18 = jac_affine_inv_1_0_GRAY*tmp_qloop_0; + const real_t tmp_qloop_19 = jac_affine_inv_0_0_GRAY*tmp_qloop_1; + const real_t tmp_qloop_20 = jac_affine_inv_1_1_GRAY*tmp_qloop_0; + const real_t tmp_qloop_21 = jac_affine_inv_0_1_GRAY*tmp_qloop_1; + const real_t tmp_qloop_22 = -tmp_qloop_0 - 8.0*_data_q_p_1[q] + 4.0; + const real_t tmp_qloop_23 = -tmp_qloop_1 - 8.0*_data_q_p_0[q] + 4.0; + const real_t q_tmp_0_0 = tmp_qloop_15*((tmp_qloop_13*(jac_affine_inv_0_0_GRAY*tmp_qloop_2 + jac_affine_inv_1_0_GRAY*tmp_qloop_2) + tmp_qloop_14*(jac_affine_inv_0_1_GRAY*tmp_qloop_2 + jac_affine_inv_1_1_GRAY*tmp_qloop_2))*(tmp_qloop_13*(jac_affine_inv_0_0_GRAY*tmp_qloop_2 + jac_affine_inv_1_0_GRAY*tmp_qloop_2) + tmp_qloop_14*(jac_affine_inv_0_1_GRAY*tmp_qloop_2 + jac_affine_inv_1_1_GRAY*tmp_qloop_2))); + const real_t q_tmp_1_1 = tmp_qloop_15*((jac_affine_inv_0_0_GRAY*tmp_qloop_13*tmp_qloop_16 + jac_affine_inv_0_1_GRAY*tmp_qloop_14*tmp_qloop_16)*(jac_affine_inv_0_0_GRAY*tmp_qloop_13*tmp_qloop_16 + jac_affine_inv_0_1_GRAY*tmp_qloop_14*tmp_qloop_16)); + const real_t q_tmp_2_2 = tmp_qloop_15*((jac_affine_inv_1_0_GRAY*tmp_qloop_13*tmp_qloop_17 + jac_affine_inv_1_1_GRAY*tmp_qloop_14*tmp_qloop_17)*(jac_affine_inv_1_0_GRAY*tmp_qloop_13*tmp_qloop_17 + jac_affine_inv_1_1_GRAY*tmp_qloop_14*tmp_qloop_17)); + const real_t q_tmp_3_3 = tmp_qloop_15*((tmp_qloop_13*(tmp_qloop_18 + tmp_qloop_19) + tmp_qloop_14*(tmp_qloop_20 + tmp_qloop_21))*(tmp_qloop_13*(tmp_qloop_18 + tmp_qloop_19) + tmp_qloop_14*(tmp_qloop_20 + tmp_qloop_21))); + const real_t q_tmp_4_4 = tmp_qloop_15*((tmp_qloop_13*(jac_affine_inv_1_0_GRAY*tmp_qloop_22 - tmp_qloop_19) + tmp_qloop_14*(jac_affine_inv_1_1_GRAY*tmp_qloop_22 - tmp_qloop_21))*(tmp_qloop_13*(jac_affine_inv_1_0_GRAY*tmp_qloop_22 - tmp_qloop_19) + tmp_qloop_14*(jac_affine_inv_1_1_GRAY*tmp_qloop_22 - tmp_qloop_21))); + const real_t q_tmp_5_5 = tmp_qloop_15*((tmp_qloop_13*(jac_affine_inv_0_0_GRAY*tmp_qloop_23 - tmp_qloop_18) + tmp_qloop_14*(jac_affine_inv_0_1_GRAY*tmp_qloop_23 - tmp_qloop_20))*(tmp_qloop_13*(jac_affine_inv_0_0_GRAY*tmp_qloop_23 - tmp_qloop_18) + tmp_qloop_14*(jac_affine_inv_0_1_GRAY*tmp_qloop_23 - tmp_qloop_20))); + q_acc_0_0 = q_acc_0_0 + q_tmp_0_0; + q_acc_1_1 = q_acc_1_1 + q_tmp_1_1; + q_acc_2_2 = q_acc_2_2 + q_tmp_2_2; + q_acc_3_3 = q_acc_3_3 + q_tmp_3_3; + q_acc_4_4 = q_acc_4_4 + q_tmp_4_4; + q_acc_5_5 = q_acc_5_5 + q_tmp_5_5; + } + const real_t elMatDiag_0 = q_acc_0_0; + const real_t elMatDiag_1 = q_acc_1_1; + const real_t elMatDiag_2 = q_acc_2_2; + const real_t elMatDiag_3 = q_acc_3_3; + const real_t elMatDiag_4 = q_acc_4_4; + const real_t elMatDiag_5 = q_acc_5_5; + _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))] = elMatDiag_0 + _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]; + _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1] = elMatDiag_1 + _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatDiag_2 + _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))] = elMatDiag_3 + _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))] = elMatDiag_4 + _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))] = elMatDiag_5 + _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]; + } + } + } + const real_t tmp_coords_jac_0_BLUE = 1.0 / (micro_edges_per_macro_edge_float)*1.0; + const real_t tmp_coords_jac_1_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0); + const real_t tmp_coords_jac_2_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1); + const real_t tmp_coords_jac_3_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0); + const real_t tmp_coords_jac_4_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1); + const real_t p_affine_const_0_0_BLUE = tmp_coords_jac_1_BLUE; + const real_t p_affine_const_0_1_BLUE = tmp_coords_jac_2_BLUE; + const real_t p_affine_const_1_0_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_3_BLUE; + const real_t p_affine_const_1_1_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_4_BLUE; + const real_t p_affine_const_2_0_BLUE = tmp_coords_jac_1_BLUE + tmp_coords_jac_3_BLUE; + const real_t p_affine_const_2_1_BLUE = tmp_coords_jac_2_BLUE + tmp_coords_jac_4_BLUE; + const real_t jac_affine_0_0_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_1_0_BLUE; + const real_t jac_affine_0_1_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_2_0_BLUE; + const real_t jac_affine_1_0_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_1_1_BLUE; + const real_t jac_affine_1_1_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_2_1_BLUE; + const real_t tmp_coords_jac_5_BLUE = jac_affine_0_0_BLUE*jac_affine_1_1_BLUE - jac_affine_0_1_BLUE*jac_affine_1_0_BLUE; + const real_t tmp_coords_jac_6_BLUE = 1.0 / (tmp_coords_jac_5_BLUE); + const real_t jac_affine_inv_0_0_BLUE = jac_affine_1_1_BLUE*tmp_coords_jac_6_BLUE; + const real_t jac_affine_inv_0_1_BLUE = -jac_affine_0_1_BLUE*tmp_coords_jac_6_BLUE; + const real_t jac_affine_inv_1_0_BLUE = -jac_affine_1_0_BLUE*tmp_coords_jac_6_BLUE; + const real_t jac_affine_inv_1_1_BLUE = jac_affine_0_0_BLUE*tmp_coords_jac_6_BLUE; + const real_t abs_det_jac_affine_BLUE = abs(tmp_coords_jac_5_BLUE); + { + /* FaceType.BLUE */ + for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1) + { + for (int64_t ctr_0 = 0; ctr_0 < (int64_t)((-ctr_1 + micro_edges_per_macro_edge - 1) / (4)) * (4); ctr_0 += 4) + { + + const int64_t phantom_ctr_0 = ctr_0; + real_t _data_float_loop_ctr_array_dim_0[4]; + _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0; + _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1; + _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2; + _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3; + real_t _data_float_loop_ctr_array_dim_1[4]; + _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1; + + const __m256d p_affine_0_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)); + const __m256d p_affine_0_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)); + const __m256d p_affine_1_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)); + const __m256d p_affine_1_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)); + const __m256d p_affine_2_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)); + const __m256d p_affine_2_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)); + const __m256d cp_times_delta_dof_0 = _mm256_loadu_pd(& _data_cp_times_deltaVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]); + const __m256d cp_times_delta_dof_1 = _mm256_loadu_pd(& _data_cp_times_deltaVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]); + const __m256d cp_times_delta_dof_2 = _mm256_loadu_pd(& _data_cp_times_deltaVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]); + const __m256d cp_times_delta_dof_3 = _mm256_loadu_pd(& _data_cp_times_deltaEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]); + const __m256d cp_times_delta_dof_4 = _mm256_loadu_pd(& _data_cp_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]); + const __m256d cp_times_delta_dof_5 = _mm256_loadu_pd(& _data_cp_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]); + const __m256d ux_dof_0 = _mm256_loadu_pd(& _data_uxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]); + const __m256d ux_dof_1 = _mm256_loadu_pd(& _data_uxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]); + const __m256d ux_dof_2 = _mm256_loadu_pd(& _data_uxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]); + const __m256d ux_dof_3 = _mm256_loadu_pd(& _data_uxEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]); + const __m256d ux_dof_4 = _mm256_loadu_pd(& _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]); + const __m256d ux_dof_5 = _mm256_loadu_pd(& _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]); + const __m256d uy_dof_0 = _mm256_loadu_pd(& _data_uyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]); + const __m256d uy_dof_1 = _mm256_loadu_pd(& _data_uyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]); + const __m256d uy_dof_2 = _mm256_loadu_pd(& _data_uyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]); + const __m256d uy_dof_3 = _mm256_loadu_pd(& _data_uyEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]); + const __m256d uy_dof_4 = _mm256_loadu_pd(& _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]); + const __m256d uy_dof_5 = _mm256_loadu_pd(& _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]); + __m256d q_acc_0_0 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_1 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_2 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_3 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_4_4 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_5_5 = _mm256_set_pd(0.0,0.0,0.0,0.0); + for (int64_t q = 0; q < 4; q += 1) + { + const __m256d tmp_qloop_0 = _mm256_mul_pd(_mm256_set_pd(4.0,4.0,4.0,4.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])); + const __m256d tmp_qloop_1 = _mm256_mul_pd(_mm256_set_pd(4.0,4.0,4.0,4.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])); + const __m256d tmp_qloop_2 = _mm256_add_pd(_mm256_add_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),tmp_qloop_0),tmp_qloop_1); + const __m256d tmp_qloop_3 = _mm256_mul_pd(tmp_qloop_0,_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])); + const __m256d tmp_qloop_4 = _mm256_mul_pd(_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])); + const __m256d tmp_qloop_5 = _mm256_mul_pd(tmp_qloop_4,_mm256_set_pd(2.0,2.0,2.0,2.0)); + const __m256d tmp_qloop_6 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),tmp_qloop_5); + const __m256d tmp_qloop_7 = _mm256_mul_pd(_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])); + const __m256d tmp_qloop_8 = _mm256_mul_pd(tmp_qloop_7,_mm256_set_pd(2.0,2.0,2.0,2.0)); + const __m256d tmp_qloop_9 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])),tmp_qloop_8); + const __m256d tmp_qloop_10 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_7,_mm256_set_pd(-4.0,-4.0,-4.0,-4.0)),_mm256_mul_pd(tmp_qloop_3,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_1); + const __m256d tmp_qloop_11 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_4,_mm256_set_pd(-4.0,-4.0,-4.0,-4.0)),_mm256_mul_pd(tmp_qloop_3,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_0); + const __m256d tmp_qloop_12 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),tmp_qloop_3),tmp_qloop_5),tmp_qloop_8); + const __m256d tmp_qloop_13 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_10,ux_dof_4),_mm256_mul_pd(tmp_qloop_11,ux_dof_5)),_mm256_mul_pd(tmp_qloop_12,ux_dof_0)),_mm256_mul_pd(tmp_qloop_3,ux_dof_3)),_mm256_mul_pd(tmp_qloop_6,ux_dof_1)),_mm256_mul_pd(tmp_qloop_9,ux_dof_2)); + const __m256d tmp_qloop_14 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_10,uy_dof_4),_mm256_mul_pd(tmp_qloop_11,uy_dof_5)),_mm256_mul_pd(tmp_qloop_12,uy_dof_0)),_mm256_mul_pd(tmp_qloop_3,uy_dof_3)),_mm256_mul_pd(tmp_qloop_6,uy_dof_1)),_mm256_mul_pd(tmp_qloop_9,uy_dof_2)); + const __m256d tmp_qloop_15 = _mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(cp_times_delta_dof_0,tmp_qloop_12),_mm256_mul_pd(cp_times_delta_dof_1,tmp_qloop_6)),_mm256_mul_pd(cp_times_delta_dof_2,tmp_qloop_9)),_mm256_mul_pd(cp_times_delta_dof_3,tmp_qloop_3)),_mm256_mul_pd(cp_times_delta_dof_4,tmp_qloop_10)),_mm256_mul_pd(cp_times_delta_dof_5,tmp_qloop_11)),_mm256_set_pd(_data_q_w[q],_data_q_w[q],_data_q_w[q],_data_q_w[q])),_mm256_set_pd(abs_det_jac_affine_BLUE,abs_det_jac_affine_BLUE,abs_det_jac_affine_BLUE,abs_det_jac_affine_BLUE)); + const __m256d tmp_qloop_16 = _mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),tmp_qloop_0); + const __m256d tmp_qloop_17 = _mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),tmp_qloop_1); + const __m256d tmp_qloop_18 = _mm256_mul_pd(tmp_qloop_0,_mm256_set_pd(jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE)); + const __m256d tmp_qloop_19 = _mm256_mul_pd(tmp_qloop_1,_mm256_set_pd(jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE)); + const __m256d tmp_qloop_20 = _mm256_mul_pd(tmp_qloop_0,_mm256_set_pd(jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE)); + const __m256d tmp_qloop_21 = _mm256_mul_pd(tmp_qloop_1,_mm256_set_pd(jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE)); + const __m256d tmp_qloop_22 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-8.0,-8.0,-8.0,-8.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])),_mm256_mul_pd(tmp_qloop_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_set_pd(4.0,4.0,4.0,4.0)); + const __m256d tmp_qloop_23 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-8.0,-8.0,-8.0,-8.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(tmp_qloop_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_set_pd(4.0,4.0,4.0,4.0)); + const __m256d q_tmp_0_0 = _mm256_mul_pd(tmp_qloop_15,_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_13,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_2,_mm256_set_pd(jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE)),_mm256_mul_pd(tmp_qloop_2,_mm256_set_pd(jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE)))),_mm256_mul_pd(tmp_qloop_14,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_2,_mm256_set_pd(jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE)),_mm256_mul_pd(tmp_qloop_2,_mm256_set_pd(jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE))))),_mm256_add_pd(_mm256_mul_pd(tmp_qloop_13,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_2,_mm256_set_pd(jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE)),_mm256_mul_pd(tmp_qloop_2,_mm256_set_pd(jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE)))),_mm256_mul_pd(tmp_qloop_14,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_2,_mm256_set_pd(jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE)),_mm256_mul_pd(tmp_qloop_2,_mm256_set_pd(jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE))))))); + const __m256d q_tmp_1_1 = _mm256_mul_pd(tmp_qloop_15,_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_13,tmp_qloop_16),_mm256_set_pd(jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_14,tmp_qloop_16),_mm256_set_pd(jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE))),_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_13,tmp_qloop_16),_mm256_set_pd(jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_14,tmp_qloop_16),_mm256_set_pd(jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE))))); + const __m256d q_tmp_2_2 = _mm256_mul_pd(tmp_qloop_15,_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_13,tmp_qloop_17),_mm256_set_pd(jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_14,tmp_qloop_17),_mm256_set_pd(jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE))),_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_13,tmp_qloop_17),_mm256_set_pd(jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_14,tmp_qloop_17),_mm256_set_pd(jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE))))); + const __m256d q_tmp_3_3 = _mm256_mul_pd(tmp_qloop_15,_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_13,_mm256_add_pd(tmp_qloop_18,tmp_qloop_19)),_mm256_mul_pd(tmp_qloop_14,_mm256_add_pd(tmp_qloop_20,tmp_qloop_21))),_mm256_add_pd(_mm256_mul_pd(tmp_qloop_13,_mm256_add_pd(tmp_qloop_18,tmp_qloop_19)),_mm256_mul_pd(tmp_qloop_14,_mm256_add_pd(tmp_qloop_20,tmp_qloop_21))))); + const __m256d q_tmp_4_4 = _mm256_mul_pd(tmp_qloop_15,_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_13,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_19,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_22,_mm256_set_pd(jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE)))),_mm256_mul_pd(tmp_qloop_14,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_21,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_22,_mm256_set_pd(jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE))))),_mm256_add_pd(_mm256_mul_pd(tmp_qloop_13,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_19,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_22,_mm256_set_pd(jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE)))),_mm256_mul_pd(tmp_qloop_14,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_21,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_22,_mm256_set_pd(jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE))))))); + const __m256d q_tmp_5_5 = _mm256_mul_pd(tmp_qloop_15,_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_13,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_18,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_23,_mm256_set_pd(jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE)))),_mm256_mul_pd(tmp_qloop_14,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_20,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_23,_mm256_set_pd(jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE))))),_mm256_add_pd(_mm256_mul_pd(tmp_qloop_13,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_18,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_23,_mm256_set_pd(jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE)))),_mm256_mul_pd(tmp_qloop_14,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_20,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_23,_mm256_set_pd(jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE))))))); + q_acc_0_0 = _mm256_add_pd(q_acc_0_0,q_tmp_0_0); + q_acc_1_1 = _mm256_add_pd(q_acc_1_1,q_tmp_1_1); + q_acc_2_2 = _mm256_add_pd(q_acc_2_2,q_tmp_2_2); + q_acc_3_3 = _mm256_add_pd(q_acc_3_3,q_tmp_3_3); + q_acc_4_4 = _mm256_add_pd(q_acc_4_4,q_tmp_4_4); + q_acc_5_5 = _mm256_add_pd(q_acc_5_5,q_tmp_5_5); + } + const __m256d elMatDiag_0 = q_acc_0_0; + const __m256d elMatDiag_1 = q_acc_1_1; + const __m256d elMatDiag_2 = q_acc_2_2; + const __m256d elMatDiag_3 = q_acc_3_3; + const __m256d elMatDiag_4 = q_acc_4_4; + const __m256d elMatDiag_5 = q_acc_5_5; + _mm256_storeu_pd(&_data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1],_mm256_add_pd(elMatDiag_0,_mm256_loadu_pd(& _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]))); + _mm256_storeu_pd(&_data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))],_mm256_add_pd(elMatDiag_1,_mm256_loadu_pd(& _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]))); + _mm256_storeu_pd(&_data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1],_mm256_add_pd(elMatDiag_2,_mm256_loadu_pd(& _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]))); + _mm256_storeu_pd(&_data_invDiag_Edge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))],_mm256_add_pd(elMatDiag_3,_mm256_loadu_pd(& _data_invDiag_Edge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]))); + _mm256_storeu_pd(&_data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1],_mm256_add_pd(elMatDiag_4,_mm256_loadu_pd(& _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]))); + _mm256_storeu_pd(&_data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))],_mm256_add_pd(elMatDiag_5,_mm256_loadu_pd(& _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]))); + } + for (int64_t ctr_0 = (int64_t)((-ctr_1 + micro_edges_per_macro_edge - 1) / (4)) * (4); ctr_0 < -ctr_1 + micro_edges_per_macro_edge - 1; ctr_0 += 1) + { + + const int64_t phantom_ctr_0 = ctr_0; + real_t _data_float_loop_ctr_array_dim_0[4]; + _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0; + _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1; + _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2; + _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3; + real_t _data_float_loop_ctr_array_dim_1[4]; + _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1; + + const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; + const real_t cp_times_delta_dof_0 = _data_cp_times_deltaVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + const real_t cp_times_delta_dof_1 = _data_cp_times_deltaVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t cp_times_delta_dof_2 = _data_cp_times_deltaVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]; + const real_t cp_times_delta_dof_3 = _data_cp_times_deltaEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t cp_times_delta_dof_4 = _data_cp_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]; + const real_t cp_times_delta_dof_5 = _data_cp_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t ux_dof_0 = _data_uxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + const real_t ux_dof_1 = _data_uxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t ux_dof_2 = _data_uxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]; + const real_t ux_dof_3 = _data_uxEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t ux_dof_4 = _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]; + const real_t ux_dof_5 = _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t uy_dof_0 = _data_uyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + const real_t uy_dof_1 = _data_uyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t uy_dof_2 = _data_uyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]; + const real_t uy_dof_3 = _data_uyEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t uy_dof_4 = _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]; + const real_t uy_dof_5 = _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + real_t q_acc_0_0 = 0.0; + real_t q_acc_1_1 = 0.0; + real_t q_acc_2_2 = 0.0; + real_t q_acc_3_3 = 0.0; + real_t q_acc_4_4 = 0.0; + real_t q_acc_5_5 = 0.0; + for (int64_t q = 0; q < 4; q += 1) + { + const real_t tmp_qloop_0 = 4.0*_data_q_p_0[q]; + const real_t tmp_qloop_1 = 4.0*_data_q_p_1[q]; + const real_t tmp_qloop_2 = tmp_qloop_0 + tmp_qloop_1 - 3.0; + const real_t tmp_qloop_3 = tmp_qloop_0*_data_q_p_1[q]; + const real_t tmp_qloop_4 = (_data_q_p_0[q]*_data_q_p_0[q]); + const real_t tmp_qloop_5 = tmp_qloop_4*2.0; + const real_t tmp_qloop_6 = tmp_qloop_5 - _data_q_p_0[q]; + const real_t tmp_qloop_7 = (_data_q_p_1[q]*_data_q_p_1[q]); + const real_t tmp_qloop_8 = tmp_qloop_7*2.0; + const real_t tmp_qloop_9 = tmp_qloop_8 - _data_q_p_1[q]; + const real_t tmp_qloop_10 = tmp_qloop_1 - tmp_qloop_3 + tmp_qloop_7*-4.0; + const real_t tmp_qloop_11 = tmp_qloop_0 - tmp_qloop_3 + tmp_qloop_4*-4.0; + const real_t tmp_qloop_12 = tmp_qloop_3 + tmp_qloop_5 + tmp_qloop_8 - 3.0*_data_q_p_0[q] - 3.0*_data_q_p_1[q] + 1.0; + const real_t tmp_qloop_13 = tmp_qloop_10*ux_dof_4 + tmp_qloop_11*ux_dof_5 + tmp_qloop_12*ux_dof_0 + tmp_qloop_3*ux_dof_3 + tmp_qloop_6*ux_dof_1 + tmp_qloop_9*ux_dof_2; + const real_t tmp_qloop_14 = tmp_qloop_10*uy_dof_4 + tmp_qloop_11*uy_dof_5 + tmp_qloop_12*uy_dof_0 + tmp_qloop_3*uy_dof_3 + tmp_qloop_6*uy_dof_1 + tmp_qloop_9*uy_dof_2; + const real_t tmp_qloop_15 = abs_det_jac_affine_BLUE*(cp_times_delta_dof_0*tmp_qloop_12 + cp_times_delta_dof_1*tmp_qloop_6 + cp_times_delta_dof_2*tmp_qloop_9 + cp_times_delta_dof_3*tmp_qloop_3 + cp_times_delta_dof_4*tmp_qloop_10 + cp_times_delta_dof_5*tmp_qloop_11)*_data_q_w[q]; + const real_t tmp_qloop_16 = tmp_qloop_0 - 1.0; + const real_t tmp_qloop_17 = tmp_qloop_1 - 1.0; + const real_t tmp_qloop_18 = jac_affine_inv_1_0_BLUE*tmp_qloop_0; + const real_t tmp_qloop_19 = jac_affine_inv_0_0_BLUE*tmp_qloop_1; + const real_t tmp_qloop_20 = jac_affine_inv_1_1_BLUE*tmp_qloop_0; + const real_t tmp_qloop_21 = jac_affine_inv_0_1_BLUE*tmp_qloop_1; + const real_t tmp_qloop_22 = -tmp_qloop_0 - 8.0*_data_q_p_1[q] + 4.0; + const real_t tmp_qloop_23 = -tmp_qloop_1 - 8.0*_data_q_p_0[q] + 4.0; + const real_t q_tmp_0_0 = tmp_qloop_15*((tmp_qloop_13*(jac_affine_inv_0_0_BLUE*tmp_qloop_2 + jac_affine_inv_1_0_BLUE*tmp_qloop_2) + tmp_qloop_14*(jac_affine_inv_0_1_BLUE*tmp_qloop_2 + jac_affine_inv_1_1_BLUE*tmp_qloop_2))*(tmp_qloop_13*(jac_affine_inv_0_0_BLUE*tmp_qloop_2 + jac_affine_inv_1_0_BLUE*tmp_qloop_2) + tmp_qloop_14*(jac_affine_inv_0_1_BLUE*tmp_qloop_2 + jac_affine_inv_1_1_BLUE*tmp_qloop_2))); + const real_t q_tmp_1_1 = tmp_qloop_15*((jac_affine_inv_0_0_BLUE*tmp_qloop_13*tmp_qloop_16 + jac_affine_inv_0_1_BLUE*tmp_qloop_14*tmp_qloop_16)*(jac_affine_inv_0_0_BLUE*tmp_qloop_13*tmp_qloop_16 + jac_affine_inv_0_1_BLUE*tmp_qloop_14*tmp_qloop_16)); + const real_t q_tmp_2_2 = tmp_qloop_15*((jac_affine_inv_1_0_BLUE*tmp_qloop_13*tmp_qloop_17 + jac_affine_inv_1_1_BLUE*tmp_qloop_14*tmp_qloop_17)*(jac_affine_inv_1_0_BLUE*tmp_qloop_13*tmp_qloop_17 + jac_affine_inv_1_1_BLUE*tmp_qloop_14*tmp_qloop_17)); + const real_t q_tmp_3_3 = tmp_qloop_15*((tmp_qloop_13*(tmp_qloop_18 + tmp_qloop_19) + tmp_qloop_14*(tmp_qloop_20 + tmp_qloop_21))*(tmp_qloop_13*(tmp_qloop_18 + tmp_qloop_19) + tmp_qloop_14*(tmp_qloop_20 + tmp_qloop_21))); + const real_t q_tmp_4_4 = tmp_qloop_15*((tmp_qloop_13*(jac_affine_inv_1_0_BLUE*tmp_qloop_22 - tmp_qloop_19) + tmp_qloop_14*(jac_affine_inv_1_1_BLUE*tmp_qloop_22 - tmp_qloop_21))*(tmp_qloop_13*(jac_affine_inv_1_0_BLUE*tmp_qloop_22 - tmp_qloop_19) + tmp_qloop_14*(jac_affine_inv_1_1_BLUE*tmp_qloop_22 - tmp_qloop_21))); + const real_t q_tmp_5_5 = tmp_qloop_15*((tmp_qloop_13*(jac_affine_inv_0_0_BLUE*tmp_qloop_23 - tmp_qloop_18) + tmp_qloop_14*(jac_affine_inv_0_1_BLUE*tmp_qloop_23 - tmp_qloop_20))*(tmp_qloop_13*(jac_affine_inv_0_0_BLUE*tmp_qloop_23 - tmp_qloop_18) + tmp_qloop_14*(jac_affine_inv_0_1_BLUE*tmp_qloop_23 - tmp_qloop_20))); + q_acc_0_0 = q_acc_0_0 + q_tmp_0_0; + q_acc_1_1 = q_acc_1_1 + q_tmp_1_1; + q_acc_2_2 = q_acc_2_2 + q_tmp_2_2; + q_acc_3_3 = q_acc_3_3 + q_tmp_3_3; + q_acc_4_4 = q_acc_4_4 + q_tmp_4_4; + q_acc_5_5 = q_acc_5_5 + q_tmp_5_5; + } + const real_t elMatDiag_0 = q_acc_0_0; + const real_t elMatDiag_1 = q_acc_1_1; + const real_t elMatDiag_2 = q_acc_2_2; + const real_t elMatDiag_3 = q_acc_3_3; + const real_t elMatDiag_4 = q_acc_4_4; + const real_t elMatDiag_5 = q_acc_5_5; + _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1] = elMatDiag_0 + _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatDiag_1 + _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1] = elMatDiag_2 + _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]; + _data_invDiag_Edge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatDiag_3 + _data_invDiag_Edge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1] = elMatDiag_4 + _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]; + _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))] = elMatDiag_5 + _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + } + } + } + } +} +} // namespace operatorgeneration + +} // namespace hyteg diff --git a/operators/supg_advection/noarch/P2ElementwiseSupgAdvectionAnnulusMap_apply_P2ElementwiseSupgAdvectionAnnulusMap_macro_2D.cpp b/operators/supg_advection/noarch/P2ElementwiseSupgAdvectionAnnulusMap_apply_P2ElementwiseSupgAdvectionAnnulusMap_macro_2D.cpp new file mode 100644 index 0000000000000000000000000000000000000000..2abb673f69cb6c5adc082bf1a18d5bb8cfb5d590 --- /dev/null +++ b/operators/supg_advection/noarch/P2ElementwiseSupgAdvectionAnnulusMap_apply_P2ElementwiseSupgAdvectionAnnulusMap_macro_2D.cpp @@ -0,0 +1,687 @@ +/* +* Copyright (c) 2017-2024 Nils Kohl, Daniel Bauer, Fabian Böhm. +* +* This file is part of HyTeG +* (see https://i10git.cs.fau.de/hyteg/hyteg). +* +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. +* +* You should have received a copy of the GNU General Public License +* along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +/* +* The entire file was generated with the HyTeG Operator Generator. +* +* Avoid modifying this file. If buggy, consider fixing the generator itself. +*/ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +#include "../P2ElementwiseSupgAdvectionAnnulusMap.hpp" + +#define FUNC_PREFIX + +namespace hyteg { + +namespace operatorgeneration { + +void P2ElementwiseSupgAdvectionAnnulusMap::apply_P2ElementwiseSupgAdvectionAnnulusMap_macro_2D( real_t * RESTRICT _data_cp_times_deltaEdge, real_t * RESTRICT _data_cp_times_deltaVertex, real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t * RESTRICT _data_uxEdge, real_t * RESTRICT _data_uxVertex, real_t * RESTRICT _data_uyEdge, real_t * RESTRICT _data_uyVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const +{ + { + const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669}; + + const real_t _data_q_p_0 [] = {0.33333333333333331, 0.20000000000000001, 0.59999999999999998, 0.20000000000000001}; + + const real_t _data_q_p_1 [] = {0.33333333333333331, 0.59999999999999998, 0.20000000000000001, 0.20000000000000001}; + + const real_t tmp_coords_jac_0_GRAY = 1.0 / (micro_edges_per_macro_edge_float)*1.0; + const real_t p_affine_const_0_0_GRAY = macro_vertex_coord_id_0comp0; + const real_t p_affine_const_0_1_GRAY = macro_vertex_coord_id_0comp1; + const real_t p_affine_const_1_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0); + const real_t p_affine_const_1_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1); + const real_t p_affine_const_2_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0); + const real_t p_affine_const_2_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1); + const real_t jac_affine_0_0_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_1_0_GRAY; + const real_t jac_affine_0_1_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_2_0_GRAY; + const real_t jac_affine_1_0_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_1_1_GRAY; + const real_t jac_affine_1_1_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_2_1_GRAY; + const real_t tmp_coords_jac_1_GRAY = jac_affine_0_0_GRAY*jac_affine_1_1_GRAY - jac_affine_0_1_GRAY*jac_affine_1_0_GRAY; + const real_t tmp_coords_jac_2_GRAY = 1.0 / (tmp_coords_jac_1_GRAY); + const real_t jac_affine_inv_0_0_GRAY = jac_affine_1_1_GRAY*tmp_coords_jac_2_GRAY; + const real_t jac_affine_inv_0_1_GRAY = -jac_affine_0_1_GRAY*tmp_coords_jac_2_GRAY; + const real_t jac_affine_inv_1_0_GRAY = -jac_affine_1_0_GRAY*tmp_coords_jac_2_GRAY; + const real_t jac_affine_inv_1_1_GRAY = jac_affine_0_0_GRAY*tmp_coords_jac_2_GRAY; + const real_t abs_det_jac_affine_GRAY = abs(tmp_coords_jac_1_GRAY); + const real_t tmp_qloop_1 = -rayVertex_1 + thrVertex_1; + const real_t tmp_qloop_7 = rayVertex_0 - thrVertex_0; + const real_t tmp_qloop_8 = -tmp_qloop_7; + const real_t tmp_qloop_9 = 1.0 / (tmp_qloop_1*(-rayVertex_0 + refVertex_0) - tmp_qloop_8*(-rayVertex_1 + refVertex_1)); + const real_t tmp_qloop_10 = -radRayVertex + radRefVertex; + const real_t tmp_qloop_11 = tmp_qloop_10*tmp_qloop_9; + const real_t tmp_qloop_12 = tmp_qloop_11*1.0; + { + /* FaceType.GRAY */ + for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1) + for (int64_t ctr_0 = 0; ctr_0 < -ctr_1 + micro_edges_per_macro_edge; ctr_0 += 1) + { + + const int64_t phantom_ctr_0 = ctr_0; + real_t _data_float_loop_ctr_array_dim_0[4]; + _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0; + _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1; + _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2; + _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3; + real_t _data_float_loop_ctr_array_dim_1[4]; + _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1; + + const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; + const real_t src_dof_0 = _data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]; + const real_t src_dof_1 = _data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + const real_t src_dof_2 = _data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t src_dof_3 = _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t src_dof_4 = _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t src_dof_5 = _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]; + const real_t cp_times_delta_dof_0 = _data_cp_times_deltaVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]; + const real_t cp_times_delta_dof_1 = _data_cp_times_deltaVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + const real_t cp_times_delta_dof_2 = _data_cp_times_deltaVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t cp_times_delta_dof_3 = _data_cp_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t cp_times_delta_dof_4 = _data_cp_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t cp_times_delta_dof_5 = _data_cp_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]; + const real_t ux_dof_0 = _data_uxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]; + const real_t ux_dof_1 = _data_uxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + const real_t ux_dof_2 = _data_uxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t ux_dof_3 = _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t ux_dof_4 = _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t ux_dof_5 = _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]; + const real_t uy_dof_0 = _data_uyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]; + const real_t uy_dof_1 = _data_uyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + const real_t uy_dof_2 = _data_uyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t uy_dof_3 = _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t uy_dof_4 = _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t uy_dof_5 = _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]; + real_t q_acc_0_0 = 0.0; + real_t q_acc_0_1 = 0.0; + real_t q_acc_0_2 = 0.0; + real_t q_acc_0_3 = 0.0; + real_t q_acc_0_4 = 0.0; + real_t q_acc_0_5 = 0.0; + real_t q_acc_1_0 = 0.0; + real_t q_acc_1_1 = 0.0; + real_t q_acc_1_2 = 0.0; + real_t q_acc_1_3 = 0.0; + real_t q_acc_1_4 = 0.0; + real_t q_acc_1_5 = 0.0; + real_t q_acc_2_0 = 0.0; + real_t q_acc_2_1 = 0.0; + real_t q_acc_2_2 = 0.0; + real_t q_acc_2_3 = 0.0; + real_t q_acc_2_4 = 0.0; + real_t q_acc_2_5 = 0.0; + real_t q_acc_3_0 = 0.0; + real_t q_acc_3_1 = 0.0; + real_t q_acc_3_2 = 0.0; + real_t q_acc_3_3 = 0.0; + real_t q_acc_3_4 = 0.0; + real_t q_acc_3_5 = 0.0; + real_t q_acc_4_0 = 0.0; + real_t q_acc_4_1 = 0.0; + real_t q_acc_4_2 = 0.0; + real_t q_acc_4_3 = 0.0; + real_t q_acc_4_4 = 0.0; + real_t q_acc_4_5 = 0.0; + real_t q_acc_5_0 = 0.0; + real_t q_acc_5_1 = 0.0; + real_t q_acc_5_2 = 0.0; + real_t q_acc_5_3 = 0.0; + real_t q_acc_5_4 = 0.0; + real_t q_acc_5_5 = 0.0; + for (int64_t q = 0; q < 4; q += 1) + { + const real_t tmp_qloop_0 = p_affine_0_0 + (-p_affine_0_0 + p_affine_1_0)*_data_q_p_0[q] + (-p_affine_0_0 + p_affine_2_0)*_data_q_p_1[q]; + const real_t tmp_qloop_2 = (tmp_qloop_0*tmp_qloop_0); + const real_t tmp_qloop_3 = p_affine_0_1 + (-p_affine_0_1 + p_affine_1_1)*_data_q_p_0[q] + (-p_affine_0_1 + p_affine_2_1)*_data_q_p_1[q]; + const real_t tmp_qloop_4 = (tmp_qloop_3*tmp_qloop_3); + const real_t tmp_qloop_5 = tmp_qloop_2 + tmp_qloop_4; + const real_t tmp_qloop_6 = pow(tmp_qloop_5, -0.50000000000000000); + const real_t tmp_qloop_13 = tmp_qloop_12*tmp_qloop_6; + const real_t tmp_qloop_14 = tmp_qloop_1*tmp_qloop_13; + const real_t tmp_qloop_15 = pow(tmp_qloop_5, -1.5000000000000000); + const real_t tmp_qloop_16 = radRayVertex + tmp_qloop_11*(tmp_qloop_1*(-rayVertex_0 + tmp_qloop_0) - tmp_qloop_8*(-rayVertex_1 + tmp_qloop_3)); + const real_t tmp_qloop_17 = tmp_qloop_15*tmp_qloop_16; + const real_t tmp_qloop_18 = tmp_qloop_17*1.0; + const real_t tmp_qloop_19 = tmp_qloop_13*tmp_qloop_8; + const real_t tmp_qloop_20 = tmp_qloop_0*tmp_qloop_18; + const real_t tmp_qloop_23 = tmp_qloop_12*tmp_qloop_15; + const real_t tmp_qloop_24 = tmp_qloop_1*tmp_qloop_23; + const real_t tmp_qloop_25 = tmp_qloop_2*tmp_qloop_24; + const real_t tmp_qloop_26 = tmp_qloop_16*pow(tmp_qloop_5, -2.5000000000000000)*3.0; + const real_t tmp_qloop_27 = tmp_qloop_0*tmp_qloop_26*tmp_qloop_4; + const real_t tmp_qloop_28 = tmp_qloop_24*tmp_qloop_4 - tmp_qloop_27; + const real_t tmp_qloop_29 = tmp_qloop_23*tmp_qloop_7; + const real_t tmp_qloop_30 = tmp_qloop_0*tmp_qloop_3; + const real_t tmp_qloop_31 = tmp_qloop_24*tmp_qloop_30; + const real_t tmp_qloop_32 = tmp_qloop_17*2.0; + const real_t tmp_qloop_33 = tmp_qloop_2*tmp_qloop_26*tmp_qloop_3; + const real_t tmp_qloop_34 = tmp_qloop_18*tmp_qloop_3 - tmp_qloop_33; + const real_t tmp_qloop_35 = tmp_qloop_29*tmp_qloop_30; + const real_t tmp_qloop_36 = tmp_qloop_23*tmp_qloop_30*tmp_qloop_8; + const real_t tmp_qloop_37 = 4.0*_data_q_p_0[q]; + const real_t tmp_qloop_38 = 4.0*_data_q_p_1[q]; + const real_t tmp_qloop_39 = tmp_qloop_37 + tmp_qloop_38 - 3.0; + const real_t tmp_qloop_40 = jac_affine_inv_0_0_GRAY*tmp_qloop_39 + jac_affine_inv_1_0_GRAY*tmp_qloop_39; + const real_t tmp_qloop_41 = jac_affine_inv_0_1_GRAY*tmp_qloop_39 + jac_affine_inv_1_1_GRAY*tmp_qloop_39; + const real_t tmp_qloop_42 = tmp_qloop_37*_data_q_p_1[q]; + const real_t tmp_qloop_43 = (_data_q_p_0[q]*_data_q_p_0[q]); + const real_t tmp_qloop_44 = tmp_qloop_43*2.0; + const real_t tmp_qloop_45 = tmp_qloop_44 - _data_q_p_0[q]; + const real_t tmp_qloop_46 = (_data_q_p_1[q]*_data_q_p_1[q]); + const real_t tmp_qloop_47 = tmp_qloop_46*2.0; + const real_t tmp_qloop_48 = tmp_qloop_47 - _data_q_p_1[q]; + const real_t tmp_qloop_49 = tmp_qloop_38 - tmp_qloop_42 + tmp_qloop_46*-4.0; + const real_t tmp_qloop_50 = tmp_qloop_37 - tmp_qloop_42 + tmp_qloop_43*-4.0; + const real_t tmp_qloop_51 = tmp_qloop_42 + tmp_qloop_44 + tmp_qloop_47 - 3.0*_data_q_p_0[q] - 3.0*_data_q_p_1[q] + 1.0; + const real_t tmp_qloop_52 = tmp_qloop_42*ux_dof_3 + tmp_qloop_45*ux_dof_1 + tmp_qloop_48*ux_dof_2 + tmp_qloop_49*ux_dof_4 + tmp_qloop_50*ux_dof_5 + tmp_qloop_51*ux_dof_0; + const real_t tmp_qloop_53 = tmp_qloop_42*uy_dof_3 + tmp_qloop_45*uy_dof_1 + tmp_qloop_48*uy_dof_2 + tmp_qloop_49*uy_dof_4 + tmp_qloop_50*uy_dof_5 + tmp_qloop_51*uy_dof_0; + const real_t tmp_qloop_56 = tmp_qloop_37 - 1.0; + const real_t tmp_qloop_57 = jac_affine_inv_0_0_GRAY*tmp_qloop_56; + const real_t tmp_qloop_58 = jac_affine_inv_0_1_GRAY*tmp_qloop_56; + const real_t tmp_qloop_62 = tmp_qloop_38 - 1.0; + const real_t tmp_qloop_63 = jac_affine_inv_1_0_GRAY*tmp_qloop_62; + const real_t tmp_qloop_64 = jac_affine_inv_1_1_GRAY*tmp_qloop_62; + const real_t tmp_qloop_67 = jac_affine_inv_1_0_GRAY*tmp_qloop_37; + const real_t tmp_qloop_68 = jac_affine_inv_0_0_GRAY*tmp_qloop_38; + const real_t tmp_qloop_69 = tmp_qloop_67 + tmp_qloop_68; + const real_t tmp_qloop_70 = jac_affine_inv_1_1_GRAY*tmp_qloop_37; + const real_t tmp_qloop_71 = jac_affine_inv_0_1_GRAY*tmp_qloop_38; + const real_t tmp_qloop_72 = tmp_qloop_70 + tmp_qloop_71; + const real_t tmp_qloop_75 = -tmp_qloop_37 - 8.0*_data_q_p_1[q] + 4.0; + const real_t tmp_qloop_76 = jac_affine_inv_1_0_GRAY*tmp_qloop_75 - tmp_qloop_68; + const real_t tmp_qloop_77 = jac_affine_inv_1_1_GRAY*tmp_qloop_75 - tmp_qloop_71; + const real_t tmp_qloop_80 = -tmp_qloop_38 - 8.0*_data_q_p_0[q] + 4.0; + const real_t tmp_qloop_81 = jac_affine_inv_0_0_GRAY*tmp_qloop_80 - tmp_qloop_67; + const real_t tmp_qloop_82 = jac_affine_inv_0_1_GRAY*tmp_qloop_80 - tmp_qloop_70; + const real_t jac_blending_0_0 = tmp_qloop_0*tmp_qloop_14 + tmp_qloop_18*tmp_qloop_4; + const real_t jac_blending_0_1 = -tmp_qloop_0*tmp_qloop_15*tmp_qloop_16*tmp_qloop_3 - tmp_qloop_0*tmp_qloop_19; + const real_t jac_blending_1_0 = tmp_qloop_14*tmp_qloop_3 - tmp_qloop_20*tmp_qloop_3; + const real_t jac_blending_1_1 = tmp_qloop_15*tmp_qloop_16*tmp_qloop_2*1.0 - tmp_qloop_19*tmp_qloop_3; + const real_t tmp_qloop_21 = jac_blending_0_0*jac_blending_1_1 - jac_blending_0_1*jac_blending_1_0; + const real_t tmp_qloop_22 = 1.0 / (tmp_qloop_21); + const real_t abs_det_jac_blending = tmp_qloop_21; + const real_t tmp_qloop_55 = abs_det_jac_affine_GRAY*abs_det_jac_blending*(cp_times_delta_dof_0*tmp_qloop_51 + cp_times_delta_dof_1*tmp_qloop_45 + cp_times_delta_dof_2*tmp_qloop_48 + cp_times_delta_dof_3*tmp_qloop_42 + cp_times_delta_dof_4*tmp_qloop_49 + cp_times_delta_dof_5*tmp_qloop_50)*_data_q_w[q]; + const real_t jac_blending_inv_0_0 = jac_blending_1_1*tmp_qloop_22; + const real_t jac_blending_inv_0_1 = -jac_blending_0_1*tmp_qloop_22; + const real_t jac_blending_inv_1_0 = -jac_blending_1_0*tmp_qloop_22; + const real_t jac_blending_inv_1_1 = jac_blending_0_0*tmp_qloop_22; + const real_t tmp_qloop_54 = tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_40 + jac_blending_inv_1_0*tmp_qloop_41) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_40 + jac_blending_inv_1_1*tmp_qloop_41); + const real_t tmp_qloop_59 = tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_57 + jac_blending_inv_1_0*tmp_qloop_58) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_57 + jac_blending_inv_1_1*tmp_qloop_58); + const real_t tmp_qloop_60 = tmp_qloop_54*tmp_qloop_55; + const real_t tmp_qloop_61 = tmp_qloop_59*tmp_qloop_60; + const real_t tmp_qloop_65 = tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_63 + jac_blending_inv_1_0*tmp_qloop_64) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_63 + jac_blending_inv_1_1*tmp_qloop_64); + const real_t tmp_qloop_66 = tmp_qloop_60*tmp_qloop_65; + const real_t tmp_qloop_73 = tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_69 + jac_blending_inv_1_0*tmp_qloop_72) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_69 + jac_blending_inv_1_1*tmp_qloop_72); + const real_t tmp_qloop_74 = tmp_qloop_60*tmp_qloop_73; + const real_t tmp_qloop_78 = tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_76 + jac_blending_inv_1_0*tmp_qloop_77) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_76 + jac_blending_inv_1_1*tmp_qloop_77); + const real_t tmp_qloop_79 = tmp_qloop_60*tmp_qloop_78; + const real_t tmp_qloop_83 = tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_81 + jac_blending_inv_1_0*tmp_qloop_82) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_81 + jac_blending_inv_1_1*tmp_qloop_82); + const real_t tmp_qloop_84 = tmp_qloop_60*tmp_qloop_83; + const real_t tmp_qloop_85 = tmp_qloop_55*tmp_qloop_59; + const real_t tmp_qloop_86 = tmp_qloop_65*tmp_qloop_85; + const real_t tmp_qloop_87 = tmp_qloop_73*tmp_qloop_85; + const real_t tmp_qloop_88 = tmp_qloop_78*tmp_qloop_85; + const real_t tmp_qloop_89 = tmp_qloop_83*tmp_qloop_85; + const real_t tmp_qloop_90 = tmp_qloop_55*tmp_qloop_65; + const real_t tmp_qloop_91 = tmp_qloop_73*tmp_qloop_90; + const real_t tmp_qloop_92 = tmp_qloop_78*tmp_qloop_90; + const real_t tmp_qloop_93 = tmp_qloop_83*tmp_qloop_90; + const real_t tmp_qloop_94 = tmp_qloop_55*tmp_qloop_73; + const real_t tmp_qloop_95 = tmp_qloop_78*tmp_qloop_94; + const real_t tmp_qloop_96 = tmp_qloop_83*tmp_qloop_94; + const real_t tmp_qloop_97 = tmp_qloop_55*tmp_qloop_78*tmp_qloop_83; + const real_t hessian_blending_0_0_0 = tmp_qloop_14 - tmp_qloop_25 + tmp_qloop_28; + const real_t hessian_blending_1_0_0 = -tmp_qloop_26*(tmp_qloop_3*tmp_qloop_3*tmp_qloop_3) + tmp_qloop_29*tmp_qloop_4 + tmp_qloop_3*tmp_qloop_32 - tmp_qloop_31; + const real_t hessian_blending_0_0_1 = tmp_qloop_1*tmp_qloop_11*tmp_qloop_15*tmp_qloop_30*-2.0 - tmp_qloop_34; + const real_t hessian_blending_1_0_1 = tmp_qloop_1*tmp_qloop_10*tmp_qloop_6*tmp_qloop_9*1.0 - tmp_qloop_20 - tmp_qloop_28 - tmp_qloop_35; + const real_t hessian_blending_0_1_0 = tmp_qloop_10*tmp_qloop_15*tmp_qloop_2*tmp_qloop_8*tmp_qloop_9*1.0 - tmp_qloop_19 - tmp_qloop_31 - tmp_qloop_34; + const real_t hessian_blending_1_1_0 = -tmp_qloop_20 + tmp_qloop_27 - tmp_qloop_35 + tmp_qloop_36; + const real_t hessian_blending_0_1_1 = tmp_qloop_0*tmp_qloop_32 - (tmp_qloop_0*tmp_qloop_0*tmp_qloop_0)*tmp_qloop_26 + tmp_qloop_25 + tmp_qloop_36; + const real_t hessian_blending_1_1_1 = tmp_qloop_10*tmp_qloop_15*tmp_qloop_2*tmp_qloop_7*tmp_qloop_9*1.0 + tmp_qloop_10*tmp_qloop_15*tmp_qloop_4*tmp_qloop_8*tmp_qloop_9*1.0 - tmp_qloop_19 - tmp_qloop_33; + const real_t q_tmp_0_0 = (tmp_qloop_54*tmp_qloop_54)*tmp_qloop_55; + const real_t q_tmp_0_1 = tmp_qloop_61; + const real_t q_tmp_0_2 = tmp_qloop_66; + const real_t q_tmp_0_3 = tmp_qloop_74; + const real_t q_tmp_0_4 = tmp_qloop_79; + const real_t q_tmp_0_5 = tmp_qloop_84; + const real_t q_tmp_1_0 = tmp_qloop_61; + const real_t q_tmp_1_1 = tmp_qloop_55*(tmp_qloop_59*tmp_qloop_59); + const real_t q_tmp_1_2 = tmp_qloop_86; + const real_t q_tmp_1_3 = tmp_qloop_87; + const real_t q_tmp_1_4 = tmp_qloop_88; + const real_t q_tmp_1_5 = tmp_qloop_89; + const real_t q_tmp_2_0 = tmp_qloop_66; + const real_t q_tmp_2_1 = tmp_qloop_86; + const real_t q_tmp_2_2 = tmp_qloop_55*(tmp_qloop_65*tmp_qloop_65); + const real_t q_tmp_2_3 = tmp_qloop_91; + const real_t q_tmp_2_4 = tmp_qloop_92; + const real_t q_tmp_2_5 = tmp_qloop_93; + const real_t q_tmp_3_0 = tmp_qloop_74; + const real_t q_tmp_3_1 = tmp_qloop_87; + const real_t q_tmp_3_2 = tmp_qloop_91; + const real_t q_tmp_3_3 = tmp_qloop_55*(tmp_qloop_73*tmp_qloop_73); + const real_t q_tmp_3_4 = tmp_qloop_95; + const real_t q_tmp_3_5 = tmp_qloop_96; + const real_t q_tmp_4_0 = tmp_qloop_79; + const real_t q_tmp_4_1 = tmp_qloop_88; + const real_t q_tmp_4_2 = tmp_qloop_92; + const real_t q_tmp_4_3 = tmp_qloop_95; + const real_t q_tmp_4_4 = tmp_qloop_55*(tmp_qloop_78*tmp_qloop_78); + const real_t q_tmp_4_5 = tmp_qloop_97; + const real_t q_tmp_5_0 = tmp_qloop_84; + const real_t q_tmp_5_1 = tmp_qloop_89; + const real_t q_tmp_5_2 = tmp_qloop_93; + const real_t q_tmp_5_3 = tmp_qloop_96; + const real_t q_tmp_5_4 = tmp_qloop_97; + const real_t q_tmp_5_5 = tmp_qloop_55*(tmp_qloop_83*tmp_qloop_83); + q_acc_0_0 = q_acc_0_0 + q_tmp_0_0; + q_acc_0_1 = q_acc_0_1 + q_tmp_0_1; + q_acc_0_2 = q_acc_0_2 + q_tmp_0_2; + q_acc_0_3 = q_acc_0_3 + q_tmp_0_3; + q_acc_0_4 = q_acc_0_4 + q_tmp_0_4; + q_acc_0_5 = q_acc_0_5 + q_tmp_0_5; + q_acc_1_0 = q_acc_1_0 + q_tmp_1_0; + q_acc_1_1 = q_acc_1_1 + q_tmp_1_1; + q_acc_1_2 = q_acc_1_2 + q_tmp_1_2; + q_acc_1_3 = q_acc_1_3 + q_tmp_1_3; + q_acc_1_4 = q_acc_1_4 + q_tmp_1_4; + q_acc_1_5 = q_acc_1_5 + q_tmp_1_5; + q_acc_2_0 = q_acc_2_0 + q_tmp_2_0; + q_acc_2_1 = q_acc_2_1 + q_tmp_2_1; + q_acc_2_2 = q_acc_2_2 + q_tmp_2_2; + q_acc_2_3 = q_acc_2_3 + q_tmp_2_3; + q_acc_2_4 = q_acc_2_4 + q_tmp_2_4; + q_acc_2_5 = q_acc_2_5 + q_tmp_2_5; + q_acc_3_0 = q_acc_3_0 + q_tmp_3_0; + q_acc_3_1 = q_acc_3_1 + q_tmp_3_1; + q_acc_3_2 = q_acc_3_2 + q_tmp_3_2; + q_acc_3_3 = q_acc_3_3 + q_tmp_3_3; + q_acc_3_4 = q_acc_3_4 + q_tmp_3_4; + q_acc_3_5 = q_acc_3_5 + q_tmp_3_5; + q_acc_4_0 = q_acc_4_0 + q_tmp_4_0; + q_acc_4_1 = q_acc_4_1 + q_tmp_4_1; + q_acc_4_2 = q_acc_4_2 + q_tmp_4_2; + q_acc_4_3 = q_acc_4_3 + q_tmp_4_3; + q_acc_4_4 = q_acc_4_4 + q_tmp_4_4; + q_acc_4_5 = q_acc_4_5 + q_tmp_4_5; + q_acc_5_0 = q_acc_5_0 + q_tmp_5_0; + q_acc_5_1 = q_acc_5_1 + q_tmp_5_1; + q_acc_5_2 = q_acc_5_2 + q_tmp_5_2; + q_acc_5_3 = q_acc_5_3 + q_tmp_5_3; + q_acc_5_4 = q_acc_5_4 + q_tmp_5_4; + q_acc_5_5 = q_acc_5_5 + q_tmp_5_5; + } + const real_t elMatVec_0 = q_acc_0_0*src_dof_0 + q_acc_0_1*src_dof_1 + q_acc_0_2*src_dof_2 + q_acc_0_3*src_dof_3 + q_acc_0_4*src_dof_4 + q_acc_0_5*src_dof_5; + const real_t elMatVec_1 = q_acc_1_0*src_dof_0 + q_acc_1_1*src_dof_1 + q_acc_1_2*src_dof_2 + q_acc_1_3*src_dof_3 + q_acc_1_4*src_dof_4 + q_acc_1_5*src_dof_5; + const real_t elMatVec_2 = q_acc_2_0*src_dof_0 + q_acc_2_1*src_dof_1 + q_acc_2_2*src_dof_2 + q_acc_2_3*src_dof_3 + q_acc_2_4*src_dof_4 + q_acc_2_5*src_dof_5; + const real_t elMatVec_3 = q_acc_3_0*src_dof_0 + q_acc_3_1*src_dof_1 + q_acc_3_2*src_dof_2 + q_acc_3_3*src_dof_3 + q_acc_3_4*src_dof_4 + q_acc_3_5*src_dof_5; + const real_t elMatVec_4 = q_acc_4_0*src_dof_0 + q_acc_4_1*src_dof_1 + q_acc_4_2*src_dof_2 + q_acc_4_3*src_dof_3 + q_acc_4_4*src_dof_4 + q_acc_4_5*src_dof_5; + const real_t elMatVec_5 = q_acc_5_0*src_dof_0 + q_acc_5_1*src_dof_1 + q_acc_5_2*src_dof_2 + q_acc_5_3*src_dof_3 + q_acc_5_4*src_dof_4 + q_acc_5_5*src_dof_5; + _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))] = elMatVec_0 + _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]; + _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1] = elMatVec_1 + _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatVec_2 + _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))] = elMatVec_3 + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))] = elMatVec_4 + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))] = elMatVec_5 + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]; + } + } + const real_t tmp_coords_jac_0_BLUE = 1.0 / (micro_edges_per_macro_edge_float)*1.0; + const real_t tmp_coords_jac_1_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0); + const real_t tmp_coords_jac_2_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1); + const real_t tmp_coords_jac_3_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0); + const real_t tmp_coords_jac_4_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1); + const real_t p_affine_const_0_0_BLUE = tmp_coords_jac_1_BLUE; + const real_t p_affine_const_0_1_BLUE = tmp_coords_jac_2_BLUE; + const real_t p_affine_const_1_0_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_3_BLUE; + const real_t p_affine_const_1_1_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_4_BLUE; + const real_t p_affine_const_2_0_BLUE = tmp_coords_jac_1_BLUE + tmp_coords_jac_3_BLUE; + const real_t p_affine_const_2_1_BLUE = tmp_coords_jac_2_BLUE + tmp_coords_jac_4_BLUE; + const real_t jac_affine_0_0_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_1_0_BLUE; + const real_t jac_affine_0_1_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_2_0_BLUE; + const real_t jac_affine_1_0_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_1_1_BLUE; + const real_t jac_affine_1_1_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_2_1_BLUE; + const real_t tmp_coords_jac_5_BLUE = jac_affine_0_0_BLUE*jac_affine_1_1_BLUE - jac_affine_0_1_BLUE*jac_affine_1_0_BLUE; + const real_t tmp_coords_jac_6_BLUE = 1.0 / (tmp_coords_jac_5_BLUE); + const real_t jac_affine_inv_0_0_BLUE = jac_affine_1_1_BLUE*tmp_coords_jac_6_BLUE; + const real_t jac_affine_inv_0_1_BLUE = -jac_affine_0_1_BLUE*tmp_coords_jac_6_BLUE; + const real_t jac_affine_inv_1_0_BLUE = -jac_affine_1_0_BLUE*tmp_coords_jac_6_BLUE; + const real_t jac_affine_inv_1_1_BLUE = jac_affine_0_0_BLUE*tmp_coords_jac_6_BLUE; + const real_t abs_det_jac_affine_BLUE = abs(tmp_coords_jac_5_BLUE); + { + /* FaceType.BLUE */ + for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1) + for (int64_t ctr_0 = 0; ctr_0 < -ctr_1 + micro_edges_per_macro_edge - 1; ctr_0 += 1) + { + + const int64_t phantom_ctr_0 = ctr_0; + real_t _data_float_loop_ctr_array_dim_0[4]; + _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0; + _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1; + _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2; + _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3; + real_t _data_float_loop_ctr_array_dim_1[4]; + _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1; + + const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; + const real_t src_dof_0 = _data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + const real_t src_dof_1 = _data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t src_dof_2 = _data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]; + const real_t src_dof_3 = _data_srcEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t src_dof_4 = _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]; + const real_t src_dof_5 = _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t cp_times_delta_dof_0 = _data_cp_times_deltaVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + const real_t cp_times_delta_dof_1 = _data_cp_times_deltaVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t cp_times_delta_dof_2 = _data_cp_times_deltaVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]; + const real_t cp_times_delta_dof_3 = _data_cp_times_deltaEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t cp_times_delta_dof_4 = _data_cp_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]; + const real_t cp_times_delta_dof_5 = _data_cp_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t ux_dof_0 = _data_uxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + const real_t ux_dof_1 = _data_uxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t ux_dof_2 = _data_uxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]; + const real_t ux_dof_3 = _data_uxEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t ux_dof_4 = _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]; + const real_t ux_dof_5 = _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t uy_dof_0 = _data_uyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + const real_t uy_dof_1 = _data_uyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t uy_dof_2 = _data_uyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]; + const real_t uy_dof_3 = _data_uyEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t uy_dof_4 = _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]; + const real_t uy_dof_5 = _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + real_t q_acc_0_0 = 0.0; + real_t q_acc_0_1 = 0.0; + real_t q_acc_0_2 = 0.0; + real_t q_acc_0_3 = 0.0; + real_t q_acc_0_4 = 0.0; + real_t q_acc_0_5 = 0.0; + real_t q_acc_1_0 = 0.0; + real_t q_acc_1_1 = 0.0; + real_t q_acc_1_2 = 0.0; + real_t q_acc_1_3 = 0.0; + real_t q_acc_1_4 = 0.0; + real_t q_acc_1_5 = 0.0; + real_t q_acc_2_0 = 0.0; + real_t q_acc_2_1 = 0.0; + real_t q_acc_2_2 = 0.0; + real_t q_acc_2_3 = 0.0; + real_t q_acc_2_4 = 0.0; + real_t q_acc_2_5 = 0.0; + real_t q_acc_3_0 = 0.0; + real_t q_acc_3_1 = 0.0; + real_t q_acc_3_2 = 0.0; + real_t q_acc_3_3 = 0.0; + real_t q_acc_3_4 = 0.0; + real_t q_acc_3_5 = 0.0; + real_t q_acc_4_0 = 0.0; + real_t q_acc_4_1 = 0.0; + real_t q_acc_4_2 = 0.0; + real_t q_acc_4_3 = 0.0; + real_t q_acc_4_4 = 0.0; + real_t q_acc_4_5 = 0.0; + real_t q_acc_5_0 = 0.0; + real_t q_acc_5_1 = 0.0; + real_t q_acc_5_2 = 0.0; + real_t q_acc_5_3 = 0.0; + real_t q_acc_5_4 = 0.0; + real_t q_acc_5_5 = 0.0; + for (int64_t q = 0; q < 4; q += 1) + { + const real_t tmp_qloop_0 = p_affine_0_0 + (-p_affine_0_0 + p_affine_1_0)*_data_q_p_0[q] + (-p_affine_0_0 + p_affine_2_0)*_data_q_p_1[q]; + const real_t tmp_qloop_2 = (tmp_qloop_0*tmp_qloop_0); + const real_t tmp_qloop_3 = p_affine_0_1 + (-p_affine_0_1 + p_affine_1_1)*_data_q_p_0[q] + (-p_affine_0_1 + p_affine_2_1)*_data_q_p_1[q]; + const real_t tmp_qloop_4 = (tmp_qloop_3*tmp_qloop_3); + const real_t tmp_qloop_5 = tmp_qloop_2 + tmp_qloop_4; + const real_t tmp_qloop_6 = pow(tmp_qloop_5, -0.50000000000000000); + const real_t tmp_qloop_13 = tmp_qloop_12*tmp_qloop_6; + const real_t tmp_qloop_14 = tmp_qloop_1*tmp_qloop_13; + const real_t tmp_qloop_15 = pow(tmp_qloop_5, -1.5000000000000000); + const real_t tmp_qloop_16 = radRayVertex + tmp_qloop_11*(tmp_qloop_1*(-rayVertex_0 + tmp_qloop_0) - tmp_qloop_8*(-rayVertex_1 + tmp_qloop_3)); + const real_t tmp_qloop_17 = tmp_qloop_15*tmp_qloop_16; + const real_t tmp_qloop_18 = tmp_qloop_17*1.0; + const real_t tmp_qloop_19 = tmp_qloop_13*tmp_qloop_8; + const real_t tmp_qloop_20 = tmp_qloop_0*tmp_qloop_18; + const real_t tmp_qloop_23 = tmp_qloop_12*tmp_qloop_15; + const real_t tmp_qloop_24 = tmp_qloop_1*tmp_qloop_23; + const real_t tmp_qloop_25 = tmp_qloop_2*tmp_qloop_24; + const real_t tmp_qloop_26 = tmp_qloop_16*pow(tmp_qloop_5, -2.5000000000000000)*3.0; + const real_t tmp_qloop_27 = tmp_qloop_0*tmp_qloop_26*tmp_qloop_4; + const real_t tmp_qloop_28 = tmp_qloop_24*tmp_qloop_4 - tmp_qloop_27; + const real_t tmp_qloop_29 = tmp_qloop_23*tmp_qloop_7; + const real_t tmp_qloop_30 = tmp_qloop_0*tmp_qloop_3; + const real_t tmp_qloop_31 = tmp_qloop_24*tmp_qloop_30; + const real_t tmp_qloop_32 = tmp_qloop_17*2.0; + const real_t tmp_qloop_33 = tmp_qloop_2*tmp_qloop_26*tmp_qloop_3; + const real_t tmp_qloop_34 = tmp_qloop_18*tmp_qloop_3 - tmp_qloop_33; + const real_t tmp_qloop_35 = tmp_qloop_29*tmp_qloop_30; + const real_t tmp_qloop_36 = tmp_qloop_23*tmp_qloop_30*tmp_qloop_8; + const real_t tmp_qloop_37 = 4.0*_data_q_p_0[q]; + const real_t tmp_qloop_38 = 4.0*_data_q_p_1[q]; + const real_t tmp_qloop_39 = tmp_qloop_37 + tmp_qloop_38 - 3.0; + const real_t tmp_qloop_40 = jac_affine_inv_0_0_BLUE*tmp_qloop_39 + jac_affine_inv_1_0_BLUE*tmp_qloop_39; + const real_t tmp_qloop_41 = jac_affine_inv_0_1_BLUE*tmp_qloop_39 + jac_affine_inv_1_1_BLUE*tmp_qloop_39; + const real_t tmp_qloop_42 = tmp_qloop_37*_data_q_p_1[q]; + const real_t tmp_qloop_43 = (_data_q_p_0[q]*_data_q_p_0[q]); + const real_t tmp_qloop_44 = tmp_qloop_43*2.0; + const real_t tmp_qloop_45 = tmp_qloop_44 - _data_q_p_0[q]; + const real_t tmp_qloop_46 = (_data_q_p_1[q]*_data_q_p_1[q]); + const real_t tmp_qloop_47 = tmp_qloop_46*2.0; + const real_t tmp_qloop_48 = tmp_qloop_47 - _data_q_p_1[q]; + const real_t tmp_qloop_49 = tmp_qloop_38 - tmp_qloop_42 + tmp_qloop_46*-4.0; + const real_t tmp_qloop_50 = tmp_qloop_37 - tmp_qloop_42 + tmp_qloop_43*-4.0; + const real_t tmp_qloop_51 = tmp_qloop_42 + tmp_qloop_44 + tmp_qloop_47 - 3.0*_data_q_p_0[q] - 3.0*_data_q_p_1[q] + 1.0; + const real_t tmp_qloop_52 = tmp_qloop_42*ux_dof_3 + tmp_qloop_45*ux_dof_1 + tmp_qloop_48*ux_dof_2 + tmp_qloop_49*ux_dof_4 + tmp_qloop_50*ux_dof_5 + tmp_qloop_51*ux_dof_0; + const real_t tmp_qloop_53 = tmp_qloop_42*uy_dof_3 + tmp_qloop_45*uy_dof_1 + tmp_qloop_48*uy_dof_2 + tmp_qloop_49*uy_dof_4 + tmp_qloop_50*uy_dof_5 + tmp_qloop_51*uy_dof_0; + const real_t tmp_qloop_56 = tmp_qloop_37 - 1.0; + const real_t tmp_qloop_57 = jac_affine_inv_0_0_BLUE*tmp_qloop_56; + const real_t tmp_qloop_58 = jac_affine_inv_0_1_BLUE*tmp_qloop_56; + const real_t tmp_qloop_62 = tmp_qloop_38 - 1.0; + const real_t tmp_qloop_63 = jac_affine_inv_1_0_BLUE*tmp_qloop_62; + const real_t tmp_qloop_64 = jac_affine_inv_1_1_BLUE*tmp_qloop_62; + const real_t tmp_qloop_67 = jac_affine_inv_1_0_BLUE*tmp_qloop_37; + const real_t tmp_qloop_68 = jac_affine_inv_0_0_BLUE*tmp_qloop_38; + const real_t tmp_qloop_69 = tmp_qloop_67 + tmp_qloop_68; + const real_t tmp_qloop_70 = jac_affine_inv_1_1_BLUE*tmp_qloop_37; + const real_t tmp_qloop_71 = jac_affine_inv_0_1_BLUE*tmp_qloop_38; + const real_t tmp_qloop_72 = tmp_qloop_70 + tmp_qloop_71; + const real_t tmp_qloop_75 = -tmp_qloop_37 - 8.0*_data_q_p_1[q] + 4.0; + const real_t tmp_qloop_76 = jac_affine_inv_1_0_BLUE*tmp_qloop_75 - tmp_qloop_68; + const real_t tmp_qloop_77 = jac_affine_inv_1_1_BLUE*tmp_qloop_75 - tmp_qloop_71; + const real_t tmp_qloop_80 = -tmp_qloop_38 - 8.0*_data_q_p_0[q] + 4.0; + const real_t tmp_qloop_81 = jac_affine_inv_0_0_BLUE*tmp_qloop_80 - tmp_qloop_67; + const real_t tmp_qloop_82 = jac_affine_inv_0_1_BLUE*tmp_qloop_80 - tmp_qloop_70; + const real_t jac_blending_0_0 = tmp_qloop_0*tmp_qloop_14 + tmp_qloop_18*tmp_qloop_4; + const real_t jac_blending_0_1 = -tmp_qloop_0*tmp_qloop_15*tmp_qloop_16*tmp_qloop_3 - tmp_qloop_0*tmp_qloop_19; + const real_t jac_blending_1_0 = tmp_qloop_14*tmp_qloop_3 - tmp_qloop_20*tmp_qloop_3; + const real_t jac_blending_1_1 = tmp_qloop_15*tmp_qloop_16*tmp_qloop_2*1.0 - tmp_qloop_19*tmp_qloop_3; + const real_t tmp_qloop_21 = jac_blending_0_0*jac_blending_1_1 - jac_blending_0_1*jac_blending_1_0; + const real_t tmp_qloop_22 = 1.0 / (tmp_qloop_21); + const real_t abs_det_jac_blending = tmp_qloop_21; + const real_t tmp_qloop_55 = abs_det_jac_affine_BLUE*abs_det_jac_blending*(cp_times_delta_dof_0*tmp_qloop_51 + cp_times_delta_dof_1*tmp_qloop_45 + cp_times_delta_dof_2*tmp_qloop_48 + cp_times_delta_dof_3*tmp_qloop_42 + cp_times_delta_dof_4*tmp_qloop_49 + cp_times_delta_dof_5*tmp_qloop_50)*_data_q_w[q]; + const real_t jac_blending_inv_0_0 = jac_blending_1_1*tmp_qloop_22; + const real_t jac_blending_inv_0_1 = -jac_blending_0_1*tmp_qloop_22; + const real_t jac_blending_inv_1_0 = -jac_blending_1_0*tmp_qloop_22; + const real_t jac_blending_inv_1_1 = jac_blending_0_0*tmp_qloop_22; + const real_t tmp_qloop_54 = tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_40 + jac_blending_inv_1_0*tmp_qloop_41) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_40 + jac_blending_inv_1_1*tmp_qloop_41); + const real_t tmp_qloop_59 = tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_57 + jac_blending_inv_1_0*tmp_qloop_58) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_57 + jac_blending_inv_1_1*tmp_qloop_58); + const real_t tmp_qloop_60 = tmp_qloop_54*tmp_qloop_55; + const real_t tmp_qloop_61 = tmp_qloop_59*tmp_qloop_60; + const real_t tmp_qloop_65 = tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_63 + jac_blending_inv_1_0*tmp_qloop_64) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_63 + jac_blending_inv_1_1*tmp_qloop_64); + const real_t tmp_qloop_66 = tmp_qloop_60*tmp_qloop_65; + const real_t tmp_qloop_73 = tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_69 + jac_blending_inv_1_0*tmp_qloop_72) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_69 + jac_blending_inv_1_1*tmp_qloop_72); + const real_t tmp_qloop_74 = tmp_qloop_60*tmp_qloop_73; + const real_t tmp_qloop_78 = tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_76 + jac_blending_inv_1_0*tmp_qloop_77) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_76 + jac_blending_inv_1_1*tmp_qloop_77); + const real_t tmp_qloop_79 = tmp_qloop_60*tmp_qloop_78; + const real_t tmp_qloop_83 = tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_81 + jac_blending_inv_1_0*tmp_qloop_82) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_81 + jac_blending_inv_1_1*tmp_qloop_82); + const real_t tmp_qloop_84 = tmp_qloop_60*tmp_qloop_83; + const real_t tmp_qloop_85 = tmp_qloop_55*tmp_qloop_59; + const real_t tmp_qloop_86 = tmp_qloop_65*tmp_qloop_85; + const real_t tmp_qloop_87 = tmp_qloop_73*tmp_qloop_85; + const real_t tmp_qloop_88 = tmp_qloop_78*tmp_qloop_85; + const real_t tmp_qloop_89 = tmp_qloop_83*tmp_qloop_85; + const real_t tmp_qloop_90 = tmp_qloop_55*tmp_qloop_65; + const real_t tmp_qloop_91 = tmp_qloop_73*tmp_qloop_90; + const real_t tmp_qloop_92 = tmp_qloop_78*tmp_qloop_90; + const real_t tmp_qloop_93 = tmp_qloop_83*tmp_qloop_90; + const real_t tmp_qloop_94 = tmp_qloop_55*tmp_qloop_73; + const real_t tmp_qloop_95 = tmp_qloop_78*tmp_qloop_94; + const real_t tmp_qloop_96 = tmp_qloop_83*tmp_qloop_94; + const real_t tmp_qloop_97 = tmp_qloop_55*tmp_qloop_78*tmp_qloop_83; + const real_t hessian_blending_0_0_0 = tmp_qloop_14 - tmp_qloop_25 + tmp_qloop_28; + const real_t hessian_blending_1_0_0 = -tmp_qloop_26*(tmp_qloop_3*tmp_qloop_3*tmp_qloop_3) + tmp_qloop_29*tmp_qloop_4 + tmp_qloop_3*tmp_qloop_32 - tmp_qloop_31; + const real_t hessian_blending_0_0_1 = tmp_qloop_1*tmp_qloop_11*tmp_qloop_15*tmp_qloop_30*-2.0 - tmp_qloop_34; + const real_t hessian_blending_1_0_1 = tmp_qloop_1*tmp_qloop_10*tmp_qloop_6*tmp_qloop_9*1.0 - tmp_qloop_20 - tmp_qloop_28 - tmp_qloop_35; + const real_t hessian_blending_0_1_0 = tmp_qloop_10*tmp_qloop_15*tmp_qloop_2*tmp_qloop_8*tmp_qloop_9*1.0 - tmp_qloop_19 - tmp_qloop_31 - tmp_qloop_34; + const real_t hessian_blending_1_1_0 = -tmp_qloop_20 + tmp_qloop_27 - tmp_qloop_35 + tmp_qloop_36; + const real_t hessian_blending_0_1_1 = tmp_qloop_0*tmp_qloop_32 - (tmp_qloop_0*tmp_qloop_0*tmp_qloop_0)*tmp_qloop_26 + tmp_qloop_25 + tmp_qloop_36; + const real_t hessian_blending_1_1_1 = tmp_qloop_10*tmp_qloop_15*tmp_qloop_2*tmp_qloop_7*tmp_qloop_9*1.0 + tmp_qloop_10*tmp_qloop_15*tmp_qloop_4*tmp_qloop_8*tmp_qloop_9*1.0 - tmp_qloop_19 - tmp_qloop_33; + const real_t q_tmp_0_0 = (tmp_qloop_54*tmp_qloop_54)*tmp_qloop_55; + const real_t q_tmp_0_1 = tmp_qloop_61; + const real_t q_tmp_0_2 = tmp_qloop_66; + const real_t q_tmp_0_3 = tmp_qloop_74; + const real_t q_tmp_0_4 = tmp_qloop_79; + const real_t q_tmp_0_5 = tmp_qloop_84; + const real_t q_tmp_1_0 = tmp_qloop_61; + const real_t q_tmp_1_1 = tmp_qloop_55*(tmp_qloop_59*tmp_qloop_59); + const real_t q_tmp_1_2 = tmp_qloop_86; + const real_t q_tmp_1_3 = tmp_qloop_87; + const real_t q_tmp_1_4 = tmp_qloop_88; + const real_t q_tmp_1_5 = tmp_qloop_89; + const real_t q_tmp_2_0 = tmp_qloop_66; + const real_t q_tmp_2_1 = tmp_qloop_86; + const real_t q_tmp_2_2 = tmp_qloop_55*(tmp_qloop_65*tmp_qloop_65); + const real_t q_tmp_2_3 = tmp_qloop_91; + const real_t q_tmp_2_4 = tmp_qloop_92; + const real_t q_tmp_2_5 = tmp_qloop_93; + const real_t q_tmp_3_0 = tmp_qloop_74; + const real_t q_tmp_3_1 = tmp_qloop_87; + const real_t q_tmp_3_2 = tmp_qloop_91; + const real_t q_tmp_3_3 = tmp_qloop_55*(tmp_qloop_73*tmp_qloop_73); + const real_t q_tmp_3_4 = tmp_qloop_95; + const real_t q_tmp_3_5 = tmp_qloop_96; + const real_t q_tmp_4_0 = tmp_qloop_79; + const real_t q_tmp_4_1 = tmp_qloop_88; + const real_t q_tmp_4_2 = tmp_qloop_92; + const real_t q_tmp_4_3 = tmp_qloop_95; + const real_t q_tmp_4_4 = tmp_qloop_55*(tmp_qloop_78*tmp_qloop_78); + const real_t q_tmp_4_5 = tmp_qloop_97; + const real_t q_tmp_5_0 = tmp_qloop_84; + const real_t q_tmp_5_1 = tmp_qloop_89; + const real_t q_tmp_5_2 = tmp_qloop_93; + const real_t q_tmp_5_3 = tmp_qloop_96; + const real_t q_tmp_5_4 = tmp_qloop_97; + const real_t q_tmp_5_5 = tmp_qloop_55*(tmp_qloop_83*tmp_qloop_83); + q_acc_0_0 = q_acc_0_0 + q_tmp_0_0; + q_acc_0_1 = q_acc_0_1 + q_tmp_0_1; + q_acc_0_2 = q_acc_0_2 + q_tmp_0_2; + q_acc_0_3 = q_acc_0_3 + q_tmp_0_3; + q_acc_0_4 = q_acc_0_4 + q_tmp_0_4; + q_acc_0_5 = q_acc_0_5 + q_tmp_0_5; + q_acc_1_0 = q_acc_1_0 + q_tmp_1_0; + q_acc_1_1 = q_acc_1_1 + q_tmp_1_1; + q_acc_1_2 = q_acc_1_2 + q_tmp_1_2; + q_acc_1_3 = q_acc_1_3 + q_tmp_1_3; + q_acc_1_4 = q_acc_1_4 + q_tmp_1_4; + q_acc_1_5 = q_acc_1_5 + q_tmp_1_5; + q_acc_2_0 = q_acc_2_0 + q_tmp_2_0; + q_acc_2_1 = q_acc_2_1 + q_tmp_2_1; + q_acc_2_2 = q_acc_2_2 + q_tmp_2_2; + q_acc_2_3 = q_acc_2_3 + q_tmp_2_3; + q_acc_2_4 = q_acc_2_4 + q_tmp_2_4; + q_acc_2_5 = q_acc_2_5 + q_tmp_2_5; + q_acc_3_0 = q_acc_3_0 + q_tmp_3_0; + q_acc_3_1 = q_acc_3_1 + q_tmp_3_1; + q_acc_3_2 = q_acc_3_2 + q_tmp_3_2; + q_acc_3_3 = q_acc_3_3 + q_tmp_3_3; + q_acc_3_4 = q_acc_3_4 + q_tmp_3_4; + q_acc_3_5 = q_acc_3_5 + q_tmp_3_5; + q_acc_4_0 = q_acc_4_0 + q_tmp_4_0; + q_acc_4_1 = q_acc_4_1 + q_tmp_4_1; + q_acc_4_2 = q_acc_4_2 + q_tmp_4_2; + q_acc_4_3 = q_acc_4_3 + q_tmp_4_3; + q_acc_4_4 = q_acc_4_4 + q_tmp_4_4; + q_acc_4_5 = q_acc_4_5 + q_tmp_4_5; + q_acc_5_0 = q_acc_5_0 + q_tmp_5_0; + q_acc_5_1 = q_acc_5_1 + q_tmp_5_1; + q_acc_5_2 = q_acc_5_2 + q_tmp_5_2; + q_acc_5_3 = q_acc_5_3 + q_tmp_5_3; + q_acc_5_4 = q_acc_5_4 + q_tmp_5_4; + q_acc_5_5 = q_acc_5_5 + q_tmp_5_5; + } + const real_t elMatVec_0 = q_acc_0_0*src_dof_0 + q_acc_0_1*src_dof_1 + q_acc_0_2*src_dof_2 + q_acc_0_3*src_dof_3 + q_acc_0_4*src_dof_4 + q_acc_0_5*src_dof_5; + const real_t elMatVec_1 = q_acc_1_0*src_dof_0 + q_acc_1_1*src_dof_1 + q_acc_1_2*src_dof_2 + q_acc_1_3*src_dof_3 + q_acc_1_4*src_dof_4 + q_acc_1_5*src_dof_5; + const real_t elMatVec_2 = q_acc_2_0*src_dof_0 + q_acc_2_1*src_dof_1 + q_acc_2_2*src_dof_2 + q_acc_2_3*src_dof_3 + q_acc_2_4*src_dof_4 + q_acc_2_5*src_dof_5; + const real_t elMatVec_3 = q_acc_3_0*src_dof_0 + q_acc_3_1*src_dof_1 + q_acc_3_2*src_dof_2 + q_acc_3_3*src_dof_3 + q_acc_3_4*src_dof_4 + q_acc_3_5*src_dof_5; + const real_t elMatVec_4 = q_acc_4_0*src_dof_0 + q_acc_4_1*src_dof_1 + q_acc_4_2*src_dof_2 + q_acc_4_3*src_dof_3 + q_acc_4_4*src_dof_4 + q_acc_4_5*src_dof_5; + const real_t elMatVec_5 = q_acc_5_0*src_dof_0 + q_acc_5_1*src_dof_1 + q_acc_5_2*src_dof_2 + q_acc_5_3*src_dof_3 + q_acc_5_4*src_dof_4 + q_acc_5_5*src_dof_5; + _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1] = elMatVec_0 + _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatVec_1 + _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1] = elMatVec_2 + _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]; + _data_dstEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatVec_3 + _data_dstEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1] = elMatVec_4 + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]; + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))] = elMatVec_5 + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + } + } + } +} +} // namespace operatorgeneration + +} // namespace hyteg diff --git a/operators/supg_advection/noarch/P2ElementwiseSupgAdvectionAnnulusMap_computeInverseDiagonalOperatorValues_P2ElementwiseSupgAdvectionAnnulusMap_macro_2D.cpp b/operators/supg_advection/noarch/P2ElementwiseSupgAdvectionAnnulusMap_computeInverseDiagonalOperatorValues_P2ElementwiseSupgAdvectionAnnulusMap_macro_2D.cpp new file mode 100644 index 0000000000000000000000000000000000000000..339901c4448e984cce6a8f9686e7742fbf606d20 --- /dev/null +++ b/operators/supg_advection/noarch/P2ElementwiseSupgAdvectionAnnulusMap_computeInverseDiagonalOperatorValues_P2ElementwiseSupgAdvectionAnnulusMap_macro_2D.cpp @@ -0,0 +1,445 @@ +/* +* Copyright (c) 2017-2024 Nils Kohl, Daniel Bauer, Fabian Böhm. +* +* This file is part of HyTeG +* (see https://i10git.cs.fau.de/hyteg/hyteg). +* +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. +* +* You should have received a copy of the GNU General Public License +* along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +/* +* The entire file was generated with the HyTeG Operator Generator. +* +* Avoid modifying this file. If buggy, consider fixing the generator itself. +*/ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +#include "../P2ElementwiseSupgAdvectionAnnulusMap.hpp" + +#define FUNC_PREFIX + +namespace hyteg { + +namespace operatorgeneration { + +void P2ElementwiseSupgAdvectionAnnulusMap::computeInverseDiagonalOperatorValues_P2ElementwiseSupgAdvectionAnnulusMap_macro_2D( real_t * RESTRICT _data_cp_times_deltaEdge, real_t * RESTRICT _data_cp_times_deltaVertex, real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_uxEdge, real_t * RESTRICT _data_uxVertex, real_t * RESTRICT _data_uyEdge, real_t * RESTRICT _data_uyVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const +{ + { + const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669}; + + const real_t _data_q_p_0 [] = {0.33333333333333331, 0.20000000000000001, 0.59999999999999998, 0.20000000000000001}; + + const real_t _data_q_p_1 [] = {0.33333333333333331, 0.59999999999999998, 0.20000000000000001, 0.20000000000000001}; + + const real_t tmp_coords_jac_0_GRAY = 1.0 / (micro_edges_per_macro_edge_float)*1.0; + const real_t p_affine_const_0_0_GRAY = macro_vertex_coord_id_0comp0; + const real_t p_affine_const_0_1_GRAY = macro_vertex_coord_id_0comp1; + const real_t p_affine_const_1_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0); + const real_t p_affine_const_1_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1); + const real_t p_affine_const_2_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0); + const real_t p_affine_const_2_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1); + const real_t jac_affine_0_0_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_1_0_GRAY; + const real_t jac_affine_0_1_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_2_0_GRAY; + const real_t jac_affine_1_0_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_1_1_GRAY; + const real_t jac_affine_1_1_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_2_1_GRAY; + const real_t tmp_coords_jac_1_GRAY = jac_affine_0_0_GRAY*jac_affine_1_1_GRAY - jac_affine_0_1_GRAY*jac_affine_1_0_GRAY; + const real_t tmp_coords_jac_2_GRAY = 1.0 / (tmp_coords_jac_1_GRAY); + const real_t jac_affine_inv_0_0_GRAY = jac_affine_1_1_GRAY*tmp_coords_jac_2_GRAY; + const real_t jac_affine_inv_0_1_GRAY = -jac_affine_0_1_GRAY*tmp_coords_jac_2_GRAY; + const real_t jac_affine_inv_1_0_GRAY = -jac_affine_1_0_GRAY*tmp_coords_jac_2_GRAY; + const real_t jac_affine_inv_1_1_GRAY = jac_affine_0_0_GRAY*tmp_coords_jac_2_GRAY; + const real_t abs_det_jac_affine_GRAY = abs(tmp_coords_jac_1_GRAY); + const real_t tmp_qloop_1 = -rayVertex_1 + thrVertex_1; + const real_t tmp_qloop_7 = rayVertex_0 - thrVertex_0; + const real_t tmp_qloop_8 = -tmp_qloop_7; + const real_t tmp_qloop_9 = 1.0 / (tmp_qloop_1*(-rayVertex_0 + refVertex_0) - tmp_qloop_8*(-rayVertex_1 + refVertex_1)); + const real_t tmp_qloop_10 = -radRayVertex + radRefVertex; + const real_t tmp_qloop_11 = tmp_qloop_10*tmp_qloop_9; + const real_t tmp_qloop_12 = tmp_qloop_11*1.0; + { + /* FaceType.GRAY */ + for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1) + for (int64_t ctr_0 = 0; ctr_0 < -ctr_1 + micro_edges_per_macro_edge; ctr_0 += 1) + { + + const int64_t phantom_ctr_0 = ctr_0; + real_t _data_float_loop_ctr_array_dim_0[4]; + _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0; + _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1; + _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2; + _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3; + real_t _data_float_loop_ctr_array_dim_1[4]; + _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1; + + const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; + const real_t cp_times_delta_dof_0 = _data_cp_times_deltaVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]; + const real_t cp_times_delta_dof_1 = _data_cp_times_deltaVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + const real_t cp_times_delta_dof_2 = _data_cp_times_deltaVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t cp_times_delta_dof_3 = _data_cp_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t cp_times_delta_dof_4 = _data_cp_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t cp_times_delta_dof_5 = _data_cp_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]; + const real_t ux_dof_0 = _data_uxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]; + const real_t ux_dof_1 = _data_uxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + const real_t ux_dof_2 = _data_uxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t ux_dof_3 = _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t ux_dof_4 = _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t ux_dof_5 = _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]; + const real_t uy_dof_0 = _data_uyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]; + const real_t uy_dof_1 = _data_uyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + const real_t uy_dof_2 = _data_uyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t uy_dof_3 = _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t uy_dof_4 = _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t uy_dof_5 = _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]; + real_t q_acc_0_0 = 0.0; + real_t q_acc_1_1 = 0.0; + real_t q_acc_2_2 = 0.0; + real_t q_acc_3_3 = 0.0; + real_t q_acc_4_4 = 0.0; + real_t q_acc_5_5 = 0.0; + for (int64_t q = 0; q < 4; q += 1) + { + const real_t tmp_qloop_0 = p_affine_0_0 + (-p_affine_0_0 + p_affine_1_0)*_data_q_p_0[q] + (-p_affine_0_0 + p_affine_2_0)*_data_q_p_1[q]; + const real_t tmp_qloop_2 = (tmp_qloop_0*tmp_qloop_0); + const real_t tmp_qloop_3 = p_affine_0_1 + (-p_affine_0_1 + p_affine_1_1)*_data_q_p_0[q] + (-p_affine_0_1 + p_affine_2_1)*_data_q_p_1[q]; + const real_t tmp_qloop_4 = (tmp_qloop_3*tmp_qloop_3); + const real_t tmp_qloop_5 = tmp_qloop_2 + tmp_qloop_4; + const real_t tmp_qloop_6 = pow(tmp_qloop_5, -0.50000000000000000); + const real_t tmp_qloop_13 = tmp_qloop_12*tmp_qloop_6; + const real_t tmp_qloop_14 = tmp_qloop_1*tmp_qloop_13; + const real_t tmp_qloop_15 = pow(tmp_qloop_5, -1.5000000000000000); + const real_t tmp_qloop_16 = radRayVertex + tmp_qloop_11*(tmp_qloop_1*(-rayVertex_0 + tmp_qloop_0) - tmp_qloop_8*(-rayVertex_1 + tmp_qloop_3)); + const real_t tmp_qloop_17 = tmp_qloop_15*tmp_qloop_16; + const real_t tmp_qloop_18 = tmp_qloop_17*1.0; + const real_t tmp_qloop_19 = tmp_qloop_13*tmp_qloop_8; + const real_t tmp_qloop_20 = tmp_qloop_0*tmp_qloop_18; + const real_t tmp_qloop_23 = tmp_qloop_12*tmp_qloop_15; + const real_t tmp_qloop_24 = tmp_qloop_1*tmp_qloop_23; + const real_t tmp_qloop_25 = tmp_qloop_2*tmp_qloop_24; + const real_t tmp_qloop_26 = tmp_qloop_16*pow(tmp_qloop_5, -2.5000000000000000)*3.0; + const real_t tmp_qloop_27 = tmp_qloop_0*tmp_qloop_26*tmp_qloop_4; + const real_t tmp_qloop_28 = tmp_qloop_24*tmp_qloop_4 - tmp_qloop_27; + const real_t tmp_qloop_29 = tmp_qloop_23*tmp_qloop_7; + const real_t tmp_qloop_30 = tmp_qloop_0*tmp_qloop_3; + const real_t tmp_qloop_31 = tmp_qloop_24*tmp_qloop_30; + const real_t tmp_qloop_32 = tmp_qloop_17*2.0; + const real_t tmp_qloop_33 = tmp_qloop_2*tmp_qloop_26*tmp_qloop_3; + const real_t tmp_qloop_34 = tmp_qloop_18*tmp_qloop_3 - tmp_qloop_33; + const real_t tmp_qloop_35 = tmp_qloop_29*tmp_qloop_30; + const real_t tmp_qloop_36 = tmp_qloop_23*tmp_qloop_30*tmp_qloop_8; + const real_t tmp_qloop_37 = 4.0*_data_q_p_0[q]; + const real_t tmp_qloop_38 = 4.0*_data_q_p_1[q]; + const real_t tmp_qloop_39 = tmp_qloop_37 + tmp_qloop_38 - 3.0; + const real_t tmp_qloop_40 = jac_affine_inv_0_0_GRAY*tmp_qloop_39 + jac_affine_inv_1_0_GRAY*tmp_qloop_39; + const real_t tmp_qloop_41 = jac_affine_inv_0_1_GRAY*tmp_qloop_39 + jac_affine_inv_1_1_GRAY*tmp_qloop_39; + const real_t tmp_qloop_42 = tmp_qloop_37*_data_q_p_1[q]; + const real_t tmp_qloop_43 = (_data_q_p_0[q]*_data_q_p_0[q]); + const real_t tmp_qloop_44 = tmp_qloop_43*2.0; + const real_t tmp_qloop_45 = tmp_qloop_44 - _data_q_p_0[q]; + const real_t tmp_qloop_46 = (_data_q_p_1[q]*_data_q_p_1[q]); + const real_t tmp_qloop_47 = tmp_qloop_46*2.0; + const real_t tmp_qloop_48 = tmp_qloop_47 - _data_q_p_1[q]; + const real_t tmp_qloop_49 = tmp_qloop_38 - tmp_qloop_42 + tmp_qloop_46*-4.0; + const real_t tmp_qloop_50 = tmp_qloop_37 - tmp_qloop_42 + tmp_qloop_43*-4.0; + const real_t tmp_qloop_51 = tmp_qloop_42 + tmp_qloop_44 + tmp_qloop_47 - 3.0*_data_q_p_0[q] - 3.0*_data_q_p_1[q] + 1.0; + const real_t tmp_qloop_52 = tmp_qloop_42*ux_dof_3 + tmp_qloop_45*ux_dof_1 + tmp_qloop_48*ux_dof_2 + tmp_qloop_49*ux_dof_4 + tmp_qloop_50*ux_dof_5 + tmp_qloop_51*ux_dof_0; + const real_t tmp_qloop_53 = tmp_qloop_42*uy_dof_3 + tmp_qloop_45*uy_dof_1 + tmp_qloop_48*uy_dof_2 + tmp_qloop_49*uy_dof_4 + tmp_qloop_50*uy_dof_5 + tmp_qloop_51*uy_dof_0; + const real_t tmp_qloop_55 = tmp_qloop_37 - 1.0; + const real_t tmp_qloop_56 = jac_affine_inv_0_0_GRAY*tmp_qloop_55; + const real_t tmp_qloop_57 = jac_affine_inv_0_1_GRAY*tmp_qloop_55; + const real_t tmp_qloop_58 = tmp_qloop_38 - 1.0; + const real_t tmp_qloop_59 = jac_affine_inv_1_0_GRAY*tmp_qloop_58; + const real_t tmp_qloop_60 = jac_affine_inv_1_1_GRAY*tmp_qloop_58; + const real_t tmp_qloop_61 = jac_affine_inv_1_0_GRAY*tmp_qloop_37; + const real_t tmp_qloop_62 = jac_affine_inv_0_0_GRAY*tmp_qloop_38; + const real_t tmp_qloop_63 = tmp_qloop_61 + tmp_qloop_62; + const real_t tmp_qloop_64 = jac_affine_inv_1_1_GRAY*tmp_qloop_37; + const real_t tmp_qloop_65 = jac_affine_inv_0_1_GRAY*tmp_qloop_38; + const real_t tmp_qloop_66 = tmp_qloop_64 + tmp_qloop_65; + const real_t tmp_qloop_67 = -tmp_qloop_37 - 8.0*_data_q_p_1[q] + 4.0; + const real_t tmp_qloop_68 = jac_affine_inv_1_0_GRAY*tmp_qloop_67 - tmp_qloop_62; + const real_t tmp_qloop_69 = jac_affine_inv_1_1_GRAY*tmp_qloop_67 - tmp_qloop_65; + const real_t tmp_qloop_70 = -tmp_qloop_38 - 8.0*_data_q_p_0[q] + 4.0; + const real_t tmp_qloop_71 = jac_affine_inv_0_0_GRAY*tmp_qloop_70 - tmp_qloop_61; + const real_t tmp_qloop_72 = jac_affine_inv_0_1_GRAY*tmp_qloop_70 - tmp_qloop_64; + const real_t jac_blending_0_0 = tmp_qloop_0*tmp_qloop_14 + tmp_qloop_18*tmp_qloop_4; + const real_t jac_blending_0_1 = -tmp_qloop_0*tmp_qloop_15*tmp_qloop_16*tmp_qloop_3 - tmp_qloop_0*tmp_qloop_19; + const real_t jac_blending_1_0 = tmp_qloop_14*tmp_qloop_3 - tmp_qloop_20*tmp_qloop_3; + const real_t jac_blending_1_1 = tmp_qloop_15*tmp_qloop_16*tmp_qloop_2*1.0 - tmp_qloop_19*tmp_qloop_3; + const real_t tmp_qloop_21 = jac_blending_0_0*jac_blending_1_1 - jac_blending_0_1*jac_blending_1_0; + const real_t tmp_qloop_22 = 1.0 / (tmp_qloop_21); + const real_t abs_det_jac_blending = tmp_qloop_21; + const real_t tmp_qloop_54 = abs_det_jac_affine_GRAY*abs_det_jac_blending*(cp_times_delta_dof_0*tmp_qloop_51 + cp_times_delta_dof_1*tmp_qloop_45 + cp_times_delta_dof_2*tmp_qloop_48 + cp_times_delta_dof_3*tmp_qloop_42 + cp_times_delta_dof_4*tmp_qloop_49 + cp_times_delta_dof_5*tmp_qloop_50)*_data_q_w[q]; + const real_t jac_blending_inv_0_0 = jac_blending_1_1*tmp_qloop_22; + const real_t jac_blending_inv_0_1 = -jac_blending_0_1*tmp_qloop_22; + const real_t jac_blending_inv_1_0 = -jac_blending_1_0*tmp_qloop_22; + const real_t jac_blending_inv_1_1 = jac_blending_0_0*tmp_qloop_22; + const real_t hessian_blending_0_0_0 = tmp_qloop_14 - tmp_qloop_25 + tmp_qloop_28; + const real_t hessian_blending_1_0_0 = -tmp_qloop_26*(tmp_qloop_3*tmp_qloop_3*tmp_qloop_3) + tmp_qloop_29*tmp_qloop_4 + tmp_qloop_3*tmp_qloop_32 - tmp_qloop_31; + const real_t hessian_blending_0_0_1 = tmp_qloop_1*tmp_qloop_11*tmp_qloop_15*tmp_qloop_30*-2.0 - tmp_qloop_34; + const real_t hessian_blending_1_0_1 = tmp_qloop_1*tmp_qloop_10*tmp_qloop_6*tmp_qloop_9*1.0 - tmp_qloop_20 - tmp_qloop_28 - tmp_qloop_35; + const real_t hessian_blending_0_1_0 = tmp_qloop_10*tmp_qloop_15*tmp_qloop_2*tmp_qloop_8*tmp_qloop_9*1.0 - tmp_qloop_19 - tmp_qloop_31 - tmp_qloop_34; + const real_t hessian_blending_1_1_0 = -tmp_qloop_20 + tmp_qloop_27 - tmp_qloop_35 + tmp_qloop_36; + const real_t hessian_blending_0_1_1 = tmp_qloop_0*tmp_qloop_32 - (tmp_qloop_0*tmp_qloop_0*tmp_qloop_0)*tmp_qloop_26 + tmp_qloop_25 + tmp_qloop_36; + const real_t hessian_blending_1_1_1 = tmp_qloop_10*tmp_qloop_15*tmp_qloop_2*tmp_qloop_7*tmp_qloop_9*1.0 + tmp_qloop_10*tmp_qloop_15*tmp_qloop_4*tmp_qloop_8*tmp_qloop_9*1.0 - tmp_qloop_19 - tmp_qloop_33; + const real_t q_tmp_0_0 = tmp_qloop_54*((tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_40 + jac_blending_inv_1_0*tmp_qloop_41) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_40 + jac_blending_inv_1_1*tmp_qloop_41))*(tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_40 + jac_blending_inv_1_0*tmp_qloop_41) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_40 + jac_blending_inv_1_1*tmp_qloop_41))); + const real_t q_tmp_1_1 = tmp_qloop_54*((tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_56 + jac_blending_inv_1_0*tmp_qloop_57) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_56 + jac_blending_inv_1_1*tmp_qloop_57))*(tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_56 + jac_blending_inv_1_0*tmp_qloop_57) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_56 + jac_blending_inv_1_1*tmp_qloop_57))); + const real_t q_tmp_2_2 = tmp_qloop_54*((tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_59 + jac_blending_inv_1_0*tmp_qloop_60) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_59 + jac_blending_inv_1_1*tmp_qloop_60))*(tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_59 + jac_blending_inv_1_0*tmp_qloop_60) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_59 + jac_blending_inv_1_1*tmp_qloop_60))); + const real_t q_tmp_3_3 = tmp_qloop_54*((tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_63 + jac_blending_inv_1_0*tmp_qloop_66) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_63 + jac_blending_inv_1_1*tmp_qloop_66))*(tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_63 + jac_blending_inv_1_0*tmp_qloop_66) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_63 + jac_blending_inv_1_1*tmp_qloop_66))); + const real_t q_tmp_4_4 = tmp_qloop_54*((tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_68 + jac_blending_inv_1_0*tmp_qloop_69) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_68 + jac_blending_inv_1_1*tmp_qloop_69))*(tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_68 + jac_blending_inv_1_0*tmp_qloop_69) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_68 + jac_blending_inv_1_1*tmp_qloop_69))); + const real_t q_tmp_5_5 = tmp_qloop_54*((tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_71 + jac_blending_inv_1_0*tmp_qloop_72) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_71 + jac_blending_inv_1_1*tmp_qloop_72))*(tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_71 + jac_blending_inv_1_0*tmp_qloop_72) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_71 + jac_blending_inv_1_1*tmp_qloop_72))); + q_acc_0_0 = q_acc_0_0 + q_tmp_0_0; + q_acc_1_1 = q_acc_1_1 + q_tmp_1_1; + q_acc_2_2 = q_acc_2_2 + q_tmp_2_2; + q_acc_3_3 = q_acc_3_3 + q_tmp_3_3; + q_acc_4_4 = q_acc_4_4 + q_tmp_4_4; + q_acc_5_5 = q_acc_5_5 + q_tmp_5_5; + } + const real_t elMatDiag_0 = q_acc_0_0; + const real_t elMatDiag_1 = q_acc_1_1; + const real_t elMatDiag_2 = q_acc_2_2; + const real_t elMatDiag_3 = q_acc_3_3; + const real_t elMatDiag_4 = q_acc_4_4; + const real_t elMatDiag_5 = q_acc_5_5; + _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))] = elMatDiag_0 + _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]; + _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1] = elMatDiag_1 + _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatDiag_2 + _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))] = elMatDiag_3 + _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))] = elMatDiag_4 + _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))] = elMatDiag_5 + _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]; + } + } + const real_t tmp_coords_jac_0_BLUE = 1.0 / (micro_edges_per_macro_edge_float)*1.0; + const real_t tmp_coords_jac_1_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0); + const real_t tmp_coords_jac_2_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1); + const real_t tmp_coords_jac_3_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0); + const real_t tmp_coords_jac_4_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1); + const real_t p_affine_const_0_0_BLUE = tmp_coords_jac_1_BLUE; + const real_t p_affine_const_0_1_BLUE = tmp_coords_jac_2_BLUE; + const real_t p_affine_const_1_0_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_3_BLUE; + const real_t p_affine_const_1_1_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_4_BLUE; + const real_t p_affine_const_2_0_BLUE = tmp_coords_jac_1_BLUE + tmp_coords_jac_3_BLUE; + const real_t p_affine_const_2_1_BLUE = tmp_coords_jac_2_BLUE + tmp_coords_jac_4_BLUE; + const real_t jac_affine_0_0_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_1_0_BLUE; + const real_t jac_affine_0_1_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_2_0_BLUE; + const real_t jac_affine_1_0_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_1_1_BLUE; + const real_t jac_affine_1_1_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_2_1_BLUE; + const real_t tmp_coords_jac_5_BLUE = jac_affine_0_0_BLUE*jac_affine_1_1_BLUE - jac_affine_0_1_BLUE*jac_affine_1_0_BLUE; + const real_t tmp_coords_jac_6_BLUE = 1.0 / (tmp_coords_jac_5_BLUE); + const real_t jac_affine_inv_0_0_BLUE = jac_affine_1_1_BLUE*tmp_coords_jac_6_BLUE; + const real_t jac_affine_inv_0_1_BLUE = -jac_affine_0_1_BLUE*tmp_coords_jac_6_BLUE; + const real_t jac_affine_inv_1_0_BLUE = -jac_affine_1_0_BLUE*tmp_coords_jac_6_BLUE; + const real_t jac_affine_inv_1_1_BLUE = jac_affine_0_0_BLUE*tmp_coords_jac_6_BLUE; + const real_t abs_det_jac_affine_BLUE = abs(tmp_coords_jac_5_BLUE); + { + /* FaceType.BLUE */ + for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1) + for (int64_t ctr_0 = 0; ctr_0 < -ctr_1 + micro_edges_per_macro_edge - 1; ctr_0 += 1) + { + + const int64_t phantom_ctr_0 = ctr_0; + real_t _data_float_loop_ctr_array_dim_0[4]; + _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0; + _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1; + _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2; + _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3; + real_t _data_float_loop_ctr_array_dim_1[4]; + _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1; + + const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; + const real_t cp_times_delta_dof_0 = _data_cp_times_deltaVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + const real_t cp_times_delta_dof_1 = _data_cp_times_deltaVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t cp_times_delta_dof_2 = _data_cp_times_deltaVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]; + const real_t cp_times_delta_dof_3 = _data_cp_times_deltaEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t cp_times_delta_dof_4 = _data_cp_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]; + const real_t cp_times_delta_dof_5 = _data_cp_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t ux_dof_0 = _data_uxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + const real_t ux_dof_1 = _data_uxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t ux_dof_2 = _data_uxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]; + const real_t ux_dof_3 = _data_uxEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t ux_dof_4 = _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]; + const real_t ux_dof_5 = _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t uy_dof_0 = _data_uyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + const real_t uy_dof_1 = _data_uyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t uy_dof_2 = _data_uyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]; + const real_t uy_dof_3 = _data_uyEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t uy_dof_4 = _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]; + const real_t uy_dof_5 = _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + real_t q_acc_0_0 = 0.0; + real_t q_acc_1_1 = 0.0; + real_t q_acc_2_2 = 0.0; + real_t q_acc_3_3 = 0.0; + real_t q_acc_4_4 = 0.0; + real_t q_acc_5_5 = 0.0; + for (int64_t q = 0; q < 4; q += 1) + { + const real_t tmp_qloop_0 = p_affine_0_0 + (-p_affine_0_0 + p_affine_1_0)*_data_q_p_0[q] + (-p_affine_0_0 + p_affine_2_0)*_data_q_p_1[q]; + const real_t tmp_qloop_2 = (tmp_qloop_0*tmp_qloop_0); + const real_t tmp_qloop_3 = p_affine_0_1 + (-p_affine_0_1 + p_affine_1_1)*_data_q_p_0[q] + (-p_affine_0_1 + p_affine_2_1)*_data_q_p_1[q]; + const real_t tmp_qloop_4 = (tmp_qloop_3*tmp_qloop_3); + const real_t tmp_qloop_5 = tmp_qloop_2 + tmp_qloop_4; + const real_t tmp_qloop_6 = pow(tmp_qloop_5, -0.50000000000000000); + const real_t tmp_qloop_13 = tmp_qloop_12*tmp_qloop_6; + const real_t tmp_qloop_14 = tmp_qloop_1*tmp_qloop_13; + const real_t tmp_qloop_15 = pow(tmp_qloop_5, -1.5000000000000000); + const real_t tmp_qloop_16 = radRayVertex + tmp_qloop_11*(tmp_qloop_1*(-rayVertex_0 + tmp_qloop_0) - tmp_qloop_8*(-rayVertex_1 + tmp_qloop_3)); + const real_t tmp_qloop_17 = tmp_qloop_15*tmp_qloop_16; + const real_t tmp_qloop_18 = tmp_qloop_17*1.0; + const real_t tmp_qloop_19 = tmp_qloop_13*tmp_qloop_8; + const real_t tmp_qloop_20 = tmp_qloop_0*tmp_qloop_18; + const real_t tmp_qloop_23 = tmp_qloop_12*tmp_qloop_15; + const real_t tmp_qloop_24 = tmp_qloop_1*tmp_qloop_23; + const real_t tmp_qloop_25 = tmp_qloop_2*tmp_qloop_24; + const real_t tmp_qloop_26 = tmp_qloop_16*pow(tmp_qloop_5, -2.5000000000000000)*3.0; + const real_t tmp_qloop_27 = tmp_qloop_0*tmp_qloop_26*tmp_qloop_4; + const real_t tmp_qloop_28 = tmp_qloop_24*tmp_qloop_4 - tmp_qloop_27; + const real_t tmp_qloop_29 = tmp_qloop_23*tmp_qloop_7; + const real_t tmp_qloop_30 = tmp_qloop_0*tmp_qloop_3; + const real_t tmp_qloop_31 = tmp_qloop_24*tmp_qloop_30; + const real_t tmp_qloop_32 = tmp_qloop_17*2.0; + const real_t tmp_qloop_33 = tmp_qloop_2*tmp_qloop_26*tmp_qloop_3; + const real_t tmp_qloop_34 = tmp_qloop_18*tmp_qloop_3 - tmp_qloop_33; + const real_t tmp_qloop_35 = tmp_qloop_29*tmp_qloop_30; + const real_t tmp_qloop_36 = tmp_qloop_23*tmp_qloop_30*tmp_qloop_8; + const real_t tmp_qloop_37 = 4.0*_data_q_p_0[q]; + const real_t tmp_qloop_38 = 4.0*_data_q_p_1[q]; + const real_t tmp_qloop_39 = tmp_qloop_37 + tmp_qloop_38 - 3.0; + const real_t tmp_qloop_40 = jac_affine_inv_0_0_BLUE*tmp_qloop_39 + jac_affine_inv_1_0_BLUE*tmp_qloop_39; + const real_t tmp_qloop_41 = jac_affine_inv_0_1_BLUE*tmp_qloop_39 + jac_affine_inv_1_1_BLUE*tmp_qloop_39; + const real_t tmp_qloop_42 = tmp_qloop_37*_data_q_p_1[q]; + const real_t tmp_qloop_43 = (_data_q_p_0[q]*_data_q_p_0[q]); + const real_t tmp_qloop_44 = tmp_qloop_43*2.0; + const real_t tmp_qloop_45 = tmp_qloop_44 - _data_q_p_0[q]; + const real_t tmp_qloop_46 = (_data_q_p_1[q]*_data_q_p_1[q]); + const real_t tmp_qloop_47 = tmp_qloop_46*2.0; + const real_t tmp_qloop_48 = tmp_qloop_47 - _data_q_p_1[q]; + const real_t tmp_qloop_49 = tmp_qloop_38 - tmp_qloop_42 + tmp_qloop_46*-4.0; + const real_t tmp_qloop_50 = tmp_qloop_37 - tmp_qloop_42 + tmp_qloop_43*-4.0; + const real_t tmp_qloop_51 = tmp_qloop_42 + tmp_qloop_44 + tmp_qloop_47 - 3.0*_data_q_p_0[q] - 3.0*_data_q_p_1[q] + 1.0; + const real_t tmp_qloop_52 = tmp_qloop_42*ux_dof_3 + tmp_qloop_45*ux_dof_1 + tmp_qloop_48*ux_dof_2 + tmp_qloop_49*ux_dof_4 + tmp_qloop_50*ux_dof_5 + tmp_qloop_51*ux_dof_0; + const real_t tmp_qloop_53 = tmp_qloop_42*uy_dof_3 + tmp_qloop_45*uy_dof_1 + tmp_qloop_48*uy_dof_2 + tmp_qloop_49*uy_dof_4 + tmp_qloop_50*uy_dof_5 + tmp_qloop_51*uy_dof_0; + const real_t tmp_qloop_55 = tmp_qloop_37 - 1.0; + const real_t tmp_qloop_56 = jac_affine_inv_0_0_BLUE*tmp_qloop_55; + const real_t tmp_qloop_57 = jac_affine_inv_0_1_BLUE*tmp_qloop_55; + const real_t tmp_qloop_58 = tmp_qloop_38 - 1.0; + const real_t tmp_qloop_59 = jac_affine_inv_1_0_BLUE*tmp_qloop_58; + const real_t tmp_qloop_60 = jac_affine_inv_1_1_BLUE*tmp_qloop_58; + const real_t tmp_qloop_61 = jac_affine_inv_1_0_BLUE*tmp_qloop_37; + const real_t tmp_qloop_62 = jac_affine_inv_0_0_BLUE*tmp_qloop_38; + const real_t tmp_qloop_63 = tmp_qloop_61 + tmp_qloop_62; + const real_t tmp_qloop_64 = jac_affine_inv_1_1_BLUE*tmp_qloop_37; + const real_t tmp_qloop_65 = jac_affine_inv_0_1_BLUE*tmp_qloop_38; + const real_t tmp_qloop_66 = tmp_qloop_64 + tmp_qloop_65; + const real_t tmp_qloop_67 = -tmp_qloop_37 - 8.0*_data_q_p_1[q] + 4.0; + const real_t tmp_qloop_68 = jac_affine_inv_1_0_BLUE*tmp_qloop_67 - tmp_qloop_62; + const real_t tmp_qloop_69 = jac_affine_inv_1_1_BLUE*tmp_qloop_67 - tmp_qloop_65; + const real_t tmp_qloop_70 = -tmp_qloop_38 - 8.0*_data_q_p_0[q] + 4.0; + const real_t tmp_qloop_71 = jac_affine_inv_0_0_BLUE*tmp_qloop_70 - tmp_qloop_61; + const real_t tmp_qloop_72 = jac_affine_inv_0_1_BLUE*tmp_qloop_70 - tmp_qloop_64; + const real_t jac_blending_0_0 = tmp_qloop_0*tmp_qloop_14 + tmp_qloop_18*tmp_qloop_4; + const real_t jac_blending_0_1 = -tmp_qloop_0*tmp_qloop_15*tmp_qloop_16*tmp_qloop_3 - tmp_qloop_0*tmp_qloop_19; + const real_t jac_blending_1_0 = tmp_qloop_14*tmp_qloop_3 - tmp_qloop_20*tmp_qloop_3; + const real_t jac_blending_1_1 = tmp_qloop_15*tmp_qloop_16*tmp_qloop_2*1.0 - tmp_qloop_19*tmp_qloop_3; + const real_t tmp_qloop_21 = jac_blending_0_0*jac_blending_1_1 - jac_blending_0_1*jac_blending_1_0; + const real_t tmp_qloop_22 = 1.0 / (tmp_qloop_21); + const real_t abs_det_jac_blending = tmp_qloop_21; + const real_t tmp_qloop_54 = abs_det_jac_affine_BLUE*abs_det_jac_blending*(cp_times_delta_dof_0*tmp_qloop_51 + cp_times_delta_dof_1*tmp_qloop_45 + cp_times_delta_dof_2*tmp_qloop_48 + cp_times_delta_dof_3*tmp_qloop_42 + cp_times_delta_dof_4*tmp_qloop_49 + cp_times_delta_dof_5*tmp_qloop_50)*_data_q_w[q]; + const real_t jac_blending_inv_0_0 = jac_blending_1_1*tmp_qloop_22; + const real_t jac_blending_inv_0_1 = -jac_blending_0_1*tmp_qloop_22; + const real_t jac_blending_inv_1_0 = -jac_blending_1_0*tmp_qloop_22; + const real_t jac_blending_inv_1_1 = jac_blending_0_0*tmp_qloop_22; + const real_t hessian_blending_0_0_0 = tmp_qloop_14 - tmp_qloop_25 + tmp_qloop_28; + const real_t hessian_blending_1_0_0 = -tmp_qloop_26*(tmp_qloop_3*tmp_qloop_3*tmp_qloop_3) + tmp_qloop_29*tmp_qloop_4 + tmp_qloop_3*tmp_qloop_32 - tmp_qloop_31; + const real_t hessian_blending_0_0_1 = tmp_qloop_1*tmp_qloop_11*tmp_qloop_15*tmp_qloop_30*-2.0 - tmp_qloop_34; + const real_t hessian_blending_1_0_1 = tmp_qloop_1*tmp_qloop_10*tmp_qloop_6*tmp_qloop_9*1.0 - tmp_qloop_20 - tmp_qloop_28 - tmp_qloop_35; + const real_t hessian_blending_0_1_0 = tmp_qloop_10*tmp_qloop_15*tmp_qloop_2*tmp_qloop_8*tmp_qloop_9*1.0 - tmp_qloop_19 - tmp_qloop_31 - tmp_qloop_34; + const real_t hessian_blending_1_1_0 = -tmp_qloop_20 + tmp_qloop_27 - tmp_qloop_35 + tmp_qloop_36; + const real_t hessian_blending_0_1_1 = tmp_qloop_0*tmp_qloop_32 - (tmp_qloop_0*tmp_qloop_0*tmp_qloop_0)*tmp_qloop_26 + tmp_qloop_25 + tmp_qloop_36; + const real_t hessian_blending_1_1_1 = tmp_qloop_10*tmp_qloop_15*tmp_qloop_2*tmp_qloop_7*tmp_qloop_9*1.0 + tmp_qloop_10*tmp_qloop_15*tmp_qloop_4*tmp_qloop_8*tmp_qloop_9*1.0 - tmp_qloop_19 - tmp_qloop_33; + const real_t q_tmp_0_0 = tmp_qloop_54*((tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_40 + jac_blending_inv_1_0*tmp_qloop_41) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_40 + jac_blending_inv_1_1*tmp_qloop_41))*(tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_40 + jac_blending_inv_1_0*tmp_qloop_41) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_40 + jac_blending_inv_1_1*tmp_qloop_41))); + const real_t q_tmp_1_1 = tmp_qloop_54*((tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_56 + jac_blending_inv_1_0*tmp_qloop_57) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_56 + jac_blending_inv_1_1*tmp_qloop_57))*(tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_56 + jac_blending_inv_1_0*tmp_qloop_57) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_56 + jac_blending_inv_1_1*tmp_qloop_57))); + const real_t q_tmp_2_2 = tmp_qloop_54*((tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_59 + jac_blending_inv_1_0*tmp_qloop_60) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_59 + jac_blending_inv_1_1*tmp_qloop_60))*(tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_59 + jac_blending_inv_1_0*tmp_qloop_60) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_59 + jac_blending_inv_1_1*tmp_qloop_60))); + const real_t q_tmp_3_3 = tmp_qloop_54*((tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_63 + jac_blending_inv_1_0*tmp_qloop_66) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_63 + jac_blending_inv_1_1*tmp_qloop_66))*(tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_63 + jac_blending_inv_1_0*tmp_qloop_66) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_63 + jac_blending_inv_1_1*tmp_qloop_66))); + const real_t q_tmp_4_4 = tmp_qloop_54*((tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_68 + jac_blending_inv_1_0*tmp_qloop_69) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_68 + jac_blending_inv_1_1*tmp_qloop_69))*(tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_68 + jac_blending_inv_1_0*tmp_qloop_69) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_68 + jac_blending_inv_1_1*tmp_qloop_69))); + const real_t q_tmp_5_5 = tmp_qloop_54*((tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_71 + jac_blending_inv_1_0*tmp_qloop_72) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_71 + jac_blending_inv_1_1*tmp_qloop_72))*(tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_71 + jac_blending_inv_1_0*tmp_qloop_72) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_71 + jac_blending_inv_1_1*tmp_qloop_72))); + q_acc_0_0 = q_acc_0_0 + q_tmp_0_0; + q_acc_1_1 = q_acc_1_1 + q_tmp_1_1; + q_acc_2_2 = q_acc_2_2 + q_tmp_2_2; + q_acc_3_3 = q_acc_3_3 + q_tmp_3_3; + q_acc_4_4 = q_acc_4_4 + q_tmp_4_4; + q_acc_5_5 = q_acc_5_5 + q_tmp_5_5; + } + const real_t elMatDiag_0 = q_acc_0_0; + const real_t elMatDiag_1 = q_acc_1_1; + const real_t elMatDiag_2 = q_acc_2_2; + const real_t elMatDiag_3 = q_acc_3_3; + const real_t elMatDiag_4 = q_acc_4_4; + const real_t elMatDiag_5 = q_acc_5_5; + _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1] = elMatDiag_0 + _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatDiag_1 + _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1] = elMatDiag_2 + _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]; + _data_invDiag_Edge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatDiag_3 + _data_invDiag_Edge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1] = elMatDiag_4 + _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]; + _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))] = elMatDiag_5 + _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + } + } + } +} +} // namespace operatorgeneration + +} // namespace hyteg diff --git a/operators/supg_advection/noarch/P2ElementwiseSupgAdvectionAnnulusMap_toMatrix_P2ElementwiseSupgAdvectionAnnulusMap_macro_2D.cpp b/operators/supg_advection/noarch/P2ElementwiseSupgAdvectionAnnulusMap_toMatrix_P2ElementwiseSupgAdvectionAnnulusMap_macro_2D.cpp new file mode 100644 index 0000000000000000000000000000000000000000..bddd6dc76b537c69b4e9b51c8fb6035eb02baee6 --- /dev/null +++ b/operators/supg_advection/noarch/P2ElementwiseSupgAdvectionAnnulusMap_toMatrix_P2ElementwiseSupgAdvectionAnnulusMap_macro_2D.cpp @@ -0,0 +1,845 @@ +/* +* Copyright (c) 2017-2024 Nils Kohl, Daniel Bauer, Fabian Böhm. +* +* This file is part of HyTeG +* (see https://i10git.cs.fau.de/hyteg/hyteg). +* +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. +* +* You should have received a copy of the GNU General Public License +* along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +/* +* The entire file was generated with the HyTeG Operator Generator. +* +* Avoid modifying this file. If buggy, consider fixing the generator itself. +*/ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +#include "../P2ElementwiseSupgAdvectionAnnulusMap.hpp" + +#define FUNC_PREFIX + +namespace hyteg { + +namespace operatorgeneration { + +void P2ElementwiseSupgAdvectionAnnulusMap::toMatrix_P2ElementwiseSupgAdvectionAnnulusMap_macro_2D( real_t * RESTRICT _data_cp_times_deltaEdge, real_t * RESTRICT _data_cp_times_deltaVertex, idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t * RESTRICT _data_uxEdge, real_t * RESTRICT _data_uxVertex, real_t * RESTRICT _data_uyEdge, real_t * RESTRICT _data_uyVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const +{ + { + const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669}; + + const real_t _data_q_p_0 [] = {0.33333333333333331, 0.20000000000000001, 0.59999999999999998, 0.20000000000000001}; + + const real_t _data_q_p_1 [] = {0.33333333333333331, 0.59999999999999998, 0.20000000000000001, 0.20000000000000001}; + + const real_t tmp_coords_jac_0_GRAY = 1.0 / (micro_edges_per_macro_edge_float)*1.0; + const real_t p_affine_const_0_0_GRAY = macro_vertex_coord_id_0comp0; + const real_t p_affine_const_0_1_GRAY = macro_vertex_coord_id_0comp1; + const real_t p_affine_const_1_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0); + const real_t p_affine_const_1_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1); + const real_t p_affine_const_2_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0); + const real_t p_affine_const_2_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1); + const real_t jac_affine_0_0_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_1_0_GRAY; + const real_t jac_affine_0_1_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_2_0_GRAY; + const real_t jac_affine_1_0_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_1_1_GRAY; + const real_t jac_affine_1_1_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_2_1_GRAY; + const real_t tmp_coords_jac_1_GRAY = jac_affine_0_0_GRAY*jac_affine_1_1_GRAY - jac_affine_0_1_GRAY*jac_affine_1_0_GRAY; + const real_t tmp_coords_jac_2_GRAY = 1.0 / (tmp_coords_jac_1_GRAY); + const real_t jac_affine_inv_0_0_GRAY = jac_affine_1_1_GRAY*tmp_coords_jac_2_GRAY; + const real_t jac_affine_inv_0_1_GRAY = -jac_affine_0_1_GRAY*tmp_coords_jac_2_GRAY; + const real_t jac_affine_inv_1_0_GRAY = -jac_affine_1_0_GRAY*tmp_coords_jac_2_GRAY; + const real_t jac_affine_inv_1_1_GRAY = jac_affine_0_0_GRAY*tmp_coords_jac_2_GRAY; + const real_t abs_det_jac_affine_GRAY = abs(tmp_coords_jac_1_GRAY); + const real_t tmp_qloop_1 = -rayVertex_1 + thrVertex_1; + const real_t tmp_qloop_7 = rayVertex_0 - thrVertex_0; + const real_t tmp_qloop_8 = -tmp_qloop_7; + const real_t tmp_qloop_9 = 1.0 / (tmp_qloop_1*(-rayVertex_0 + refVertex_0) - tmp_qloop_8*(-rayVertex_1 + refVertex_1)); + const real_t tmp_qloop_10 = -radRayVertex + radRefVertex; + const real_t tmp_qloop_11 = tmp_qloop_10*tmp_qloop_9; + const real_t tmp_qloop_12 = tmp_qloop_11*1.0; + { + /* FaceType.GRAY */ + for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1) + for (int64_t ctr_0 = 0; ctr_0 < -ctr_1 + micro_edges_per_macro_edge; ctr_0 += 1) + { + + const int64_t phantom_ctr_0 = ctr_0; + real_t _data_float_loop_ctr_array_dim_0[4]; + _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0; + _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1; + _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2; + _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3; + real_t _data_float_loop_ctr_array_dim_1[4]; + _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1; + + const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; + const real_t cp_times_delta_dof_0 = _data_cp_times_deltaVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]; + const real_t cp_times_delta_dof_1 = _data_cp_times_deltaVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + const real_t cp_times_delta_dof_2 = _data_cp_times_deltaVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t cp_times_delta_dof_3 = _data_cp_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t cp_times_delta_dof_4 = _data_cp_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t cp_times_delta_dof_5 = _data_cp_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]; + const real_t ux_dof_0 = _data_uxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]; + const real_t ux_dof_1 = _data_uxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + const real_t ux_dof_2 = _data_uxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t ux_dof_3 = _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t ux_dof_4 = _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t ux_dof_5 = _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]; + const real_t uy_dof_0 = _data_uyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]; + const real_t uy_dof_1 = _data_uyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + const real_t uy_dof_2 = _data_uyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t uy_dof_3 = _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t uy_dof_4 = _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t uy_dof_5 = _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]; + real_t q_acc_0_0 = 0.0; + real_t q_acc_0_1 = 0.0; + real_t q_acc_0_2 = 0.0; + real_t q_acc_0_3 = 0.0; + real_t q_acc_0_4 = 0.0; + real_t q_acc_0_5 = 0.0; + real_t q_acc_1_0 = 0.0; + real_t q_acc_1_1 = 0.0; + real_t q_acc_1_2 = 0.0; + real_t q_acc_1_3 = 0.0; + real_t q_acc_1_4 = 0.0; + real_t q_acc_1_5 = 0.0; + real_t q_acc_2_0 = 0.0; + real_t q_acc_2_1 = 0.0; + real_t q_acc_2_2 = 0.0; + real_t q_acc_2_3 = 0.0; + real_t q_acc_2_4 = 0.0; + real_t q_acc_2_5 = 0.0; + real_t q_acc_3_0 = 0.0; + real_t q_acc_3_1 = 0.0; + real_t q_acc_3_2 = 0.0; + real_t q_acc_3_3 = 0.0; + real_t q_acc_3_4 = 0.0; + real_t q_acc_3_5 = 0.0; + real_t q_acc_4_0 = 0.0; + real_t q_acc_4_1 = 0.0; + real_t q_acc_4_2 = 0.0; + real_t q_acc_4_3 = 0.0; + real_t q_acc_4_4 = 0.0; + real_t q_acc_4_5 = 0.0; + real_t q_acc_5_0 = 0.0; + real_t q_acc_5_1 = 0.0; + real_t q_acc_5_2 = 0.0; + real_t q_acc_5_3 = 0.0; + real_t q_acc_5_4 = 0.0; + real_t q_acc_5_5 = 0.0; + for (int64_t q = 0; q < 4; q += 1) + { + const real_t tmp_qloop_0 = p_affine_0_0 + (-p_affine_0_0 + p_affine_1_0)*_data_q_p_0[q] + (-p_affine_0_0 + p_affine_2_0)*_data_q_p_1[q]; + const real_t tmp_qloop_2 = (tmp_qloop_0*tmp_qloop_0); + const real_t tmp_qloop_3 = p_affine_0_1 + (-p_affine_0_1 + p_affine_1_1)*_data_q_p_0[q] + (-p_affine_0_1 + p_affine_2_1)*_data_q_p_1[q]; + const real_t tmp_qloop_4 = (tmp_qloop_3*tmp_qloop_3); + const real_t tmp_qloop_5 = tmp_qloop_2 + tmp_qloop_4; + const real_t tmp_qloop_6 = pow(tmp_qloop_5, -0.50000000000000000); + const real_t tmp_qloop_13 = tmp_qloop_12*tmp_qloop_6; + const real_t tmp_qloop_14 = tmp_qloop_1*tmp_qloop_13; + const real_t tmp_qloop_15 = pow(tmp_qloop_5, -1.5000000000000000); + const real_t tmp_qloop_16 = radRayVertex + tmp_qloop_11*(tmp_qloop_1*(-rayVertex_0 + tmp_qloop_0) - tmp_qloop_8*(-rayVertex_1 + tmp_qloop_3)); + const real_t tmp_qloop_17 = tmp_qloop_15*tmp_qloop_16; + const real_t tmp_qloop_18 = tmp_qloop_17*1.0; + const real_t tmp_qloop_19 = tmp_qloop_13*tmp_qloop_8; + const real_t tmp_qloop_20 = tmp_qloop_0*tmp_qloop_18; + const real_t tmp_qloop_23 = tmp_qloop_12*tmp_qloop_15; + const real_t tmp_qloop_24 = tmp_qloop_1*tmp_qloop_23; + const real_t tmp_qloop_25 = tmp_qloop_2*tmp_qloop_24; + const real_t tmp_qloop_26 = tmp_qloop_16*pow(tmp_qloop_5, -2.5000000000000000)*3.0; + const real_t tmp_qloop_27 = tmp_qloop_0*tmp_qloop_26*tmp_qloop_4; + const real_t tmp_qloop_28 = tmp_qloop_24*tmp_qloop_4 - tmp_qloop_27; + const real_t tmp_qloop_29 = tmp_qloop_23*tmp_qloop_7; + const real_t tmp_qloop_30 = tmp_qloop_0*tmp_qloop_3; + const real_t tmp_qloop_31 = tmp_qloop_24*tmp_qloop_30; + const real_t tmp_qloop_32 = tmp_qloop_17*2.0; + const real_t tmp_qloop_33 = tmp_qloop_2*tmp_qloop_26*tmp_qloop_3; + const real_t tmp_qloop_34 = tmp_qloop_18*tmp_qloop_3 - tmp_qloop_33; + const real_t tmp_qloop_35 = tmp_qloop_29*tmp_qloop_30; + const real_t tmp_qloop_36 = tmp_qloop_23*tmp_qloop_30*tmp_qloop_8; + const real_t tmp_qloop_37 = 4.0*_data_q_p_0[q]; + const real_t tmp_qloop_38 = 4.0*_data_q_p_1[q]; + const real_t tmp_qloop_39 = tmp_qloop_37 + tmp_qloop_38 - 3.0; + const real_t tmp_qloop_40 = jac_affine_inv_0_0_GRAY*tmp_qloop_39 + jac_affine_inv_1_0_GRAY*tmp_qloop_39; + const real_t tmp_qloop_41 = jac_affine_inv_0_1_GRAY*tmp_qloop_39 + jac_affine_inv_1_1_GRAY*tmp_qloop_39; + const real_t tmp_qloop_42 = tmp_qloop_37*_data_q_p_1[q]; + const real_t tmp_qloop_43 = (_data_q_p_0[q]*_data_q_p_0[q]); + const real_t tmp_qloop_44 = tmp_qloop_43*2.0; + const real_t tmp_qloop_45 = tmp_qloop_44 - _data_q_p_0[q]; + const real_t tmp_qloop_46 = (_data_q_p_1[q]*_data_q_p_1[q]); + const real_t tmp_qloop_47 = tmp_qloop_46*2.0; + const real_t tmp_qloop_48 = tmp_qloop_47 - _data_q_p_1[q]; + const real_t tmp_qloop_49 = tmp_qloop_38 - tmp_qloop_42 + tmp_qloop_46*-4.0; + const real_t tmp_qloop_50 = tmp_qloop_37 - tmp_qloop_42 + tmp_qloop_43*-4.0; + const real_t tmp_qloop_51 = tmp_qloop_42 + tmp_qloop_44 + tmp_qloop_47 - 3.0*_data_q_p_0[q] - 3.0*_data_q_p_1[q] + 1.0; + const real_t tmp_qloop_52 = tmp_qloop_42*ux_dof_3 + tmp_qloop_45*ux_dof_1 + tmp_qloop_48*ux_dof_2 + tmp_qloop_49*ux_dof_4 + tmp_qloop_50*ux_dof_5 + tmp_qloop_51*ux_dof_0; + const real_t tmp_qloop_53 = tmp_qloop_42*uy_dof_3 + tmp_qloop_45*uy_dof_1 + tmp_qloop_48*uy_dof_2 + tmp_qloop_49*uy_dof_4 + tmp_qloop_50*uy_dof_5 + tmp_qloop_51*uy_dof_0; + const real_t tmp_qloop_56 = tmp_qloop_37 - 1.0; + const real_t tmp_qloop_57 = jac_affine_inv_0_0_GRAY*tmp_qloop_56; + const real_t tmp_qloop_58 = jac_affine_inv_0_1_GRAY*tmp_qloop_56; + const real_t tmp_qloop_62 = tmp_qloop_38 - 1.0; + const real_t tmp_qloop_63 = jac_affine_inv_1_0_GRAY*tmp_qloop_62; + const real_t tmp_qloop_64 = jac_affine_inv_1_1_GRAY*tmp_qloop_62; + const real_t tmp_qloop_67 = jac_affine_inv_1_0_GRAY*tmp_qloop_37; + const real_t tmp_qloop_68 = jac_affine_inv_0_0_GRAY*tmp_qloop_38; + const real_t tmp_qloop_69 = tmp_qloop_67 + tmp_qloop_68; + const real_t tmp_qloop_70 = jac_affine_inv_1_1_GRAY*tmp_qloop_37; + const real_t tmp_qloop_71 = jac_affine_inv_0_1_GRAY*tmp_qloop_38; + const real_t tmp_qloop_72 = tmp_qloop_70 + tmp_qloop_71; + const real_t tmp_qloop_75 = -tmp_qloop_37 - 8.0*_data_q_p_1[q] + 4.0; + const real_t tmp_qloop_76 = jac_affine_inv_1_0_GRAY*tmp_qloop_75 - tmp_qloop_68; + const real_t tmp_qloop_77 = jac_affine_inv_1_1_GRAY*tmp_qloop_75 - tmp_qloop_71; + const real_t tmp_qloop_80 = -tmp_qloop_38 - 8.0*_data_q_p_0[q] + 4.0; + const real_t tmp_qloop_81 = jac_affine_inv_0_0_GRAY*tmp_qloop_80 - tmp_qloop_67; + const real_t tmp_qloop_82 = jac_affine_inv_0_1_GRAY*tmp_qloop_80 - tmp_qloop_70; + const real_t jac_blending_0_0 = tmp_qloop_0*tmp_qloop_14 + tmp_qloop_18*tmp_qloop_4; + const real_t jac_blending_0_1 = -tmp_qloop_0*tmp_qloop_15*tmp_qloop_16*tmp_qloop_3 - tmp_qloop_0*tmp_qloop_19; + const real_t jac_blending_1_0 = tmp_qloop_14*tmp_qloop_3 - tmp_qloop_20*tmp_qloop_3; + const real_t jac_blending_1_1 = tmp_qloop_15*tmp_qloop_16*tmp_qloop_2*1.0 - tmp_qloop_19*tmp_qloop_3; + const real_t tmp_qloop_21 = jac_blending_0_0*jac_blending_1_1 - jac_blending_0_1*jac_blending_1_0; + const real_t tmp_qloop_22 = 1.0 / (tmp_qloop_21); + const real_t abs_det_jac_blending = tmp_qloop_21; + const real_t tmp_qloop_55 = abs_det_jac_affine_GRAY*abs_det_jac_blending*(cp_times_delta_dof_0*tmp_qloop_51 + cp_times_delta_dof_1*tmp_qloop_45 + cp_times_delta_dof_2*tmp_qloop_48 + cp_times_delta_dof_3*tmp_qloop_42 + cp_times_delta_dof_4*tmp_qloop_49 + cp_times_delta_dof_5*tmp_qloop_50)*_data_q_w[q]; + const real_t jac_blending_inv_0_0 = jac_blending_1_1*tmp_qloop_22; + const real_t jac_blending_inv_0_1 = -jac_blending_0_1*tmp_qloop_22; + const real_t jac_blending_inv_1_0 = -jac_blending_1_0*tmp_qloop_22; + const real_t jac_blending_inv_1_1 = jac_blending_0_0*tmp_qloop_22; + const real_t tmp_qloop_54 = tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_40 + jac_blending_inv_1_0*tmp_qloop_41) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_40 + jac_blending_inv_1_1*tmp_qloop_41); + const real_t tmp_qloop_59 = tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_57 + jac_blending_inv_1_0*tmp_qloop_58) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_57 + jac_blending_inv_1_1*tmp_qloop_58); + const real_t tmp_qloop_60 = tmp_qloop_54*tmp_qloop_55; + const real_t tmp_qloop_61 = tmp_qloop_59*tmp_qloop_60; + const real_t tmp_qloop_65 = tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_63 + jac_blending_inv_1_0*tmp_qloop_64) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_63 + jac_blending_inv_1_1*tmp_qloop_64); + const real_t tmp_qloop_66 = tmp_qloop_60*tmp_qloop_65; + const real_t tmp_qloop_73 = tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_69 + jac_blending_inv_1_0*tmp_qloop_72) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_69 + jac_blending_inv_1_1*tmp_qloop_72); + const real_t tmp_qloop_74 = tmp_qloop_60*tmp_qloop_73; + const real_t tmp_qloop_78 = tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_76 + jac_blending_inv_1_0*tmp_qloop_77) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_76 + jac_blending_inv_1_1*tmp_qloop_77); + const real_t tmp_qloop_79 = tmp_qloop_60*tmp_qloop_78; + const real_t tmp_qloop_83 = tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_81 + jac_blending_inv_1_0*tmp_qloop_82) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_81 + jac_blending_inv_1_1*tmp_qloop_82); + const real_t tmp_qloop_84 = tmp_qloop_60*tmp_qloop_83; + const real_t tmp_qloop_85 = tmp_qloop_55*tmp_qloop_59; + const real_t tmp_qloop_86 = tmp_qloop_65*tmp_qloop_85; + const real_t tmp_qloop_87 = tmp_qloop_73*tmp_qloop_85; + const real_t tmp_qloop_88 = tmp_qloop_78*tmp_qloop_85; + const real_t tmp_qloop_89 = tmp_qloop_83*tmp_qloop_85; + const real_t tmp_qloop_90 = tmp_qloop_55*tmp_qloop_65; + const real_t tmp_qloop_91 = tmp_qloop_73*tmp_qloop_90; + const real_t tmp_qloop_92 = tmp_qloop_78*tmp_qloop_90; + const real_t tmp_qloop_93 = tmp_qloop_83*tmp_qloop_90; + const real_t tmp_qloop_94 = tmp_qloop_55*tmp_qloop_73; + const real_t tmp_qloop_95 = tmp_qloop_78*tmp_qloop_94; + const real_t tmp_qloop_96 = tmp_qloop_83*tmp_qloop_94; + const real_t tmp_qloop_97 = tmp_qloop_55*tmp_qloop_78*tmp_qloop_83; + const real_t hessian_blending_0_0_0 = tmp_qloop_14 - tmp_qloop_25 + tmp_qloop_28; + const real_t hessian_blending_1_0_0 = -tmp_qloop_26*(tmp_qloop_3*tmp_qloop_3*tmp_qloop_3) + tmp_qloop_29*tmp_qloop_4 + tmp_qloop_3*tmp_qloop_32 - tmp_qloop_31; + const real_t hessian_blending_0_0_1 = tmp_qloop_1*tmp_qloop_11*tmp_qloop_15*tmp_qloop_30*-2.0 - tmp_qloop_34; + const real_t hessian_blending_1_0_1 = tmp_qloop_1*tmp_qloop_10*tmp_qloop_6*tmp_qloop_9*1.0 - tmp_qloop_20 - tmp_qloop_28 - tmp_qloop_35; + const real_t hessian_blending_0_1_0 = tmp_qloop_10*tmp_qloop_15*tmp_qloop_2*tmp_qloop_8*tmp_qloop_9*1.0 - tmp_qloop_19 - tmp_qloop_31 - tmp_qloop_34; + const real_t hessian_blending_1_1_0 = -tmp_qloop_20 + tmp_qloop_27 - tmp_qloop_35 + tmp_qloop_36; + const real_t hessian_blending_0_1_1 = tmp_qloop_0*tmp_qloop_32 - (tmp_qloop_0*tmp_qloop_0*tmp_qloop_0)*tmp_qloop_26 + tmp_qloop_25 + tmp_qloop_36; + const real_t hessian_blending_1_1_1 = tmp_qloop_10*tmp_qloop_15*tmp_qloop_2*tmp_qloop_7*tmp_qloop_9*1.0 + tmp_qloop_10*tmp_qloop_15*tmp_qloop_4*tmp_qloop_8*tmp_qloop_9*1.0 - tmp_qloop_19 - tmp_qloop_33; + const real_t q_tmp_0_0 = (tmp_qloop_54*tmp_qloop_54)*tmp_qloop_55; + const real_t q_tmp_0_1 = tmp_qloop_61; + const real_t q_tmp_0_2 = tmp_qloop_66; + const real_t q_tmp_0_3 = tmp_qloop_74; + const real_t q_tmp_0_4 = tmp_qloop_79; + const real_t q_tmp_0_5 = tmp_qloop_84; + const real_t q_tmp_1_0 = tmp_qloop_61; + const real_t q_tmp_1_1 = tmp_qloop_55*(tmp_qloop_59*tmp_qloop_59); + const real_t q_tmp_1_2 = tmp_qloop_86; + const real_t q_tmp_1_3 = tmp_qloop_87; + const real_t q_tmp_1_4 = tmp_qloop_88; + const real_t q_tmp_1_5 = tmp_qloop_89; + const real_t q_tmp_2_0 = tmp_qloop_66; + const real_t q_tmp_2_1 = tmp_qloop_86; + const real_t q_tmp_2_2 = tmp_qloop_55*(tmp_qloop_65*tmp_qloop_65); + const real_t q_tmp_2_3 = tmp_qloop_91; + const real_t q_tmp_2_4 = tmp_qloop_92; + const real_t q_tmp_2_5 = tmp_qloop_93; + const real_t q_tmp_3_0 = tmp_qloop_74; + const real_t q_tmp_3_1 = tmp_qloop_87; + const real_t q_tmp_3_2 = tmp_qloop_91; + const real_t q_tmp_3_3 = tmp_qloop_55*(tmp_qloop_73*tmp_qloop_73); + const real_t q_tmp_3_4 = tmp_qloop_95; + const real_t q_tmp_3_5 = tmp_qloop_96; + const real_t q_tmp_4_0 = tmp_qloop_79; + const real_t q_tmp_4_1 = tmp_qloop_88; + const real_t q_tmp_4_2 = tmp_qloop_92; + const real_t q_tmp_4_3 = tmp_qloop_95; + const real_t q_tmp_4_4 = tmp_qloop_55*(tmp_qloop_78*tmp_qloop_78); + const real_t q_tmp_4_5 = tmp_qloop_97; + const real_t q_tmp_5_0 = tmp_qloop_84; + const real_t q_tmp_5_1 = tmp_qloop_89; + const real_t q_tmp_5_2 = tmp_qloop_93; + const real_t q_tmp_5_3 = tmp_qloop_96; + const real_t q_tmp_5_4 = tmp_qloop_97; + const real_t q_tmp_5_5 = tmp_qloop_55*(tmp_qloop_83*tmp_qloop_83); + q_acc_0_0 = q_acc_0_0 + q_tmp_0_0; + q_acc_0_1 = q_acc_0_1 + q_tmp_0_1; + q_acc_0_2 = q_acc_0_2 + q_tmp_0_2; + q_acc_0_3 = q_acc_0_3 + q_tmp_0_3; + q_acc_0_4 = q_acc_0_4 + q_tmp_0_4; + q_acc_0_5 = q_acc_0_5 + q_tmp_0_5; + q_acc_1_0 = q_acc_1_0 + q_tmp_1_0; + q_acc_1_1 = q_acc_1_1 + q_tmp_1_1; + q_acc_1_2 = q_acc_1_2 + q_tmp_1_2; + q_acc_1_3 = q_acc_1_3 + q_tmp_1_3; + q_acc_1_4 = q_acc_1_4 + q_tmp_1_4; + q_acc_1_5 = q_acc_1_5 + q_tmp_1_5; + q_acc_2_0 = q_acc_2_0 + q_tmp_2_0; + q_acc_2_1 = q_acc_2_1 + q_tmp_2_1; + q_acc_2_2 = q_acc_2_2 + q_tmp_2_2; + q_acc_2_3 = q_acc_2_3 + q_tmp_2_3; + q_acc_2_4 = q_acc_2_4 + q_tmp_2_4; + q_acc_2_5 = q_acc_2_5 + q_tmp_2_5; + q_acc_3_0 = q_acc_3_0 + q_tmp_3_0; + q_acc_3_1 = q_acc_3_1 + q_tmp_3_1; + q_acc_3_2 = q_acc_3_2 + q_tmp_3_2; + q_acc_3_3 = q_acc_3_3 + q_tmp_3_3; + q_acc_3_4 = q_acc_3_4 + q_tmp_3_4; + q_acc_3_5 = q_acc_3_5 + q_tmp_3_5; + q_acc_4_0 = q_acc_4_0 + q_tmp_4_0; + q_acc_4_1 = q_acc_4_1 + q_tmp_4_1; + q_acc_4_2 = q_acc_4_2 + q_tmp_4_2; + q_acc_4_3 = q_acc_4_3 + q_tmp_4_3; + q_acc_4_4 = q_acc_4_4 + q_tmp_4_4; + q_acc_4_5 = q_acc_4_5 + q_tmp_4_5; + q_acc_5_0 = q_acc_5_0 + q_tmp_5_0; + q_acc_5_1 = q_acc_5_1 + q_tmp_5_1; + q_acc_5_2 = q_acc_5_2 + q_tmp_5_2; + q_acc_5_3 = q_acc_5_3 + q_tmp_5_3; + q_acc_5_4 = q_acc_5_4 + q_tmp_5_4; + q_acc_5_5 = q_acc_5_5 + q_tmp_5_5; + } + const real_t elMat_0_0 = q_acc_0_0; + const real_t elMat_0_1 = q_acc_0_1; + const real_t elMat_0_2 = q_acc_0_2; + const real_t elMat_0_3 = q_acc_0_3; + const real_t elMat_0_4 = q_acc_0_4; + const real_t elMat_0_5 = q_acc_0_5; + const real_t elMat_1_0 = q_acc_1_0; + const real_t elMat_1_1 = q_acc_1_1; + const real_t elMat_1_2 = q_acc_1_2; + const real_t elMat_1_3 = q_acc_1_3; + const real_t elMat_1_4 = q_acc_1_4; + const real_t elMat_1_5 = q_acc_1_5; + const real_t elMat_2_0 = q_acc_2_0; + const real_t elMat_2_1 = q_acc_2_1; + const real_t elMat_2_2 = q_acc_2_2; + const real_t elMat_2_3 = q_acc_2_3; + const real_t elMat_2_4 = q_acc_2_4; + const real_t elMat_2_5 = q_acc_2_5; + const real_t elMat_3_0 = q_acc_3_0; + const real_t elMat_3_1 = q_acc_3_1; + const real_t elMat_3_2 = q_acc_3_2; + const real_t elMat_3_3 = q_acc_3_3; + const real_t elMat_3_4 = q_acc_3_4; + const real_t elMat_3_5 = q_acc_3_5; + const real_t elMat_4_0 = q_acc_4_0; + const real_t elMat_4_1 = q_acc_4_1; + const real_t elMat_4_2 = q_acc_4_2; + const real_t elMat_4_3 = q_acc_4_3; + const real_t elMat_4_4 = q_acc_4_4; + const real_t elMat_4_5 = q_acc_4_5; + const real_t elMat_5_0 = q_acc_5_0; + const real_t elMat_5_1 = q_acc_5_1; + const real_t elMat_5_2 = q_acc_5_2; + const real_t elMat_5_3 = q_acc_5_3; + const real_t elMat_5_4 = q_acc_5_4; + const real_t elMat_5_5 = q_acc_5_5; + + std::vector< uint_t > _data_rowIdx( 6 ); + std::vector< uint_t > _data_colIdx( 6 ); + std::vector< real_t > _data_mat( 36 ); + + _data_rowIdx[0] = ((uint64_t)(_data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))])); + _data_rowIdx[1] = ((uint64_t)(_data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1])); + _data_rowIdx[2] = ((uint64_t)(_data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))])); + _data_rowIdx[3] = ((uint64_t)(_data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))])); + _data_rowIdx[4] = ((uint64_t)(_data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))])); + _data_rowIdx[5] = ((uint64_t)(_data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))])); + _data_colIdx[0] = ((uint64_t)(_data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))])); + _data_colIdx[1] = ((uint64_t)(_data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1])); + _data_colIdx[2] = ((uint64_t)(_data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))])); + _data_colIdx[3] = ((uint64_t)(_data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))])); + _data_colIdx[4] = ((uint64_t)(_data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))])); + _data_colIdx[5] = ((uint64_t)(_data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))])); + + /* Apply basis transformation */ + + + + _data_mat[0] = ((real_t)(elMat_0_0)); + _data_mat[1] = ((real_t)(elMat_0_1)); + _data_mat[2] = ((real_t)(elMat_0_2)); + _data_mat[3] = ((real_t)(elMat_0_3)); + _data_mat[4] = ((real_t)(elMat_0_4)); + _data_mat[5] = ((real_t)(elMat_0_5)); + _data_mat[6] = ((real_t)(elMat_1_0)); + _data_mat[7] = ((real_t)(elMat_1_1)); + _data_mat[8] = ((real_t)(elMat_1_2)); + _data_mat[9] = ((real_t)(elMat_1_3)); + _data_mat[10] = ((real_t)(elMat_1_4)); + _data_mat[11] = ((real_t)(elMat_1_5)); + _data_mat[12] = ((real_t)(elMat_2_0)); + _data_mat[13] = ((real_t)(elMat_2_1)); + _data_mat[14] = ((real_t)(elMat_2_2)); + _data_mat[15] = ((real_t)(elMat_2_3)); + _data_mat[16] = ((real_t)(elMat_2_4)); + _data_mat[17] = ((real_t)(elMat_2_5)); + _data_mat[18] = ((real_t)(elMat_3_0)); + _data_mat[19] = ((real_t)(elMat_3_1)); + _data_mat[20] = ((real_t)(elMat_3_2)); + _data_mat[21] = ((real_t)(elMat_3_3)); + _data_mat[22] = ((real_t)(elMat_3_4)); + _data_mat[23] = ((real_t)(elMat_3_5)); + _data_mat[24] = ((real_t)(elMat_4_0)); + _data_mat[25] = ((real_t)(elMat_4_1)); + _data_mat[26] = ((real_t)(elMat_4_2)); + _data_mat[27] = ((real_t)(elMat_4_3)); + _data_mat[28] = ((real_t)(elMat_4_4)); + _data_mat[29] = ((real_t)(elMat_4_5)); + _data_mat[30] = ((real_t)(elMat_5_0)); + _data_mat[31] = ((real_t)(elMat_5_1)); + _data_mat[32] = ((real_t)(elMat_5_2)); + _data_mat[33] = ((real_t)(elMat_5_3)); + _data_mat[34] = ((real_t)(elMat_5_4)); + _data_mat[35] = ((real_t)(elMat_5_5)); + + + mat->addValues( _data_rowIdx, _data_colIdx, _data_mat ); + } + } + const real_t tmp_coords_jac_0_BLUE = 1.0 / (micro_edges_per_macro_edge_float)*1.0; + const real_t tmp_coords_jac_1_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0); + const real_t tmp_coords_jac_2_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1); + const real_t tmp_coords_jac_3_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0); + const real_t tmp_coords_jac_4_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1); + const real_t p_affine_const_0_0_BLUE = tmp_coords_jac_1_BLUE; + const real_t p_affine_const_0_1_BLUE = tmp_coords_jac_2_BLUE; + const real_t p_affine_const_1_0_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_3_BLUE; + const real_t p_affine_const_1_1_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_4_BLUE; + const real_t p_affine_const_2_0_BLUE = tmp_coords_jac_1_BLUE + tmp_coords_jac_3_BLUE; + const real_t p_affine_const_2_1_BLUE = tmp_coords_jac_2_BLUE + tmp_coords_jac_4_BLUE; + const real_t jac_affine_0_0_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_1_0_BLUE; + const real_t jac_affine_0_1_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_2_0_BLUE; + const real_t jac_affine_1_0_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_1_1_BLUE; + const real_t jac_affine_1_1_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_2_1_BLUE; + const real_t tmp_coords_jac_5_BLUE = jac_affine_0_0_BLUE*jac_affine_1_1_BLUE - jac_affine_0_1_BLUE*jac_affine_1_0_BLUE; + const real_t tmp_coords_jac_6_BLUE = 1.0 / (tmp_coords_jac_5_BLUE); + const real_t jac_affine_inv_0_0_BLUE = jac_affine_1_1_BLUE*tmp_coords_jac_6_BLUE; + const real_t jac_affine_inv_0_1_BLUE = -jac_affine_0_1_BLUE*tmp_coords_jac_6_BLUE; + const real_t jac_affine_inv_1_0_BLUE = -jac_affine_1_0_BLUE*tmp_coords_jac_6_BLUE; + const real_t jac_affine_inv_1_1_BLUE = jac_affine_0_0_BLUE*tmp_coords_jac_6_BLUE; + const real_t abs_det_jac_affine_BLUE = abs(tmp_coords_jac_5_BLUE); + { + /* FaceType.BLUE */ + for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1) + for (int64_t ctr_0 = 0; ctr_0 < -ctr_1 + micro_edges_per_macro_edge - 1; ctr_0 += 1) + { + + const int64_t phantom_ctr_0 = ctr_0; + real_t _data_float_loop_ctr_array_dim_0[4]; + _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0; + _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1; + _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2; + _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3; + real_t _data_float_loop_ctr_array_dim_1[4]; + _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1; + + const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; + const real_t cp_times_delta_dof_0 = _data_cp_times_deltaVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + const real_t cp_times_delta_dof_1 = _data_cp_times_deltaVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t cp_times_delta_dof_2 = _data_cp_times_deltaVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]; + const real_t cp_times_delta_dof_3 = _data_cp_times_deltaEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t cp_times_delta_dof_4 = _data_cp_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]; + const real_t cp_times_delta_dof_5 = _data_cp_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t ux_dof_0 = _data_uxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + const real_t ux_dof_1 = _data_uxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t ux_dof_2 = _data_uxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]; + const real_t ux_dof_3 = _data_uxEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t ux_dof_4 = _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]; + const real_t ux_dof_5 = _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t uy_dof_0 = _data_uyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + const real_t uy_dof_1 = _data_uyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t uy_dof_2 = _data_uyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]; + const real_t uy_dof_3 = _data_uyEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t uy_dof_4 = _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]; + const real_t uy_dof_5 = _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + real_t q_acc_0_0 = 0.0; + real_t q_acc_0_1 = 0.0; + real_t q_acc_0_2 = 0.0; + real_t q_acc_0_3 = 0.0; + real_t q_acc_0_4 = 0.0; + real_t q_acc_0_5 = 0.0; + real_t q_acc_1_0 = 0.0; + real_t q_acc_1_1 = 0.0; + real_t q_acc_1_2 = 0.0; + real_t q_acc_1_3 = 0.0; + real_t q_acc_1_4 = 0.0; + real_t q_acc_1_5 = 0.0; + real_t q_acc_2_0 = 0.0; + real_t q_acc_2_1 = 0.0; + real_t q_acc_2_2 = 0.0; + real_t q_acc_2_3 = 0.0; + real_t q_acc_2_4 = 0.0; + real_t q_acc_2_5 = 0.0; + real_t q_acc_3_0 = 0.0; + real_t q_acc_3_1 = 0.0; + real_t q_acc_3_2 = 0.0; + real_t q_acc_3_3 = 0.0; + real_t q_acc_3_4 = 0.0; + real_t q_acc_3_5 = 0.0; + real_t q_acc_4_0 = 0.0; + real_t q_acc_4_1 = 0.0; + real_t q_acc_4_2 = 0.0; + real_t q_acc_4_3 = 0.0; + real_t q_acc_4_4 = 0.0; + real_t q_acc_4_5 = 0.0; + real_t q_acc_5_0 = 0.0; + real_t q_acc_5_1 = 0.0; + real_t q_acc_5_2 = 0.0; + real_t q_acc_5_3 = 0.0; + real_t q_acc_5_4 = 0.0; + real_t q_acc_5_5 = 0.0; + for (int64_t q = 0; q < 4; q += 1) + { + const real_t tmp_qloop_0 = p_affine_0_0 + (-p_affine_0_0 + p_affine_1_0)*_data_q_p_0[q] + (-p_affine_0_0 + p_affine_2_0)*_data_q_p_1[q]; + const real_t tmp_qloop_2 = (tmp_qloop_0*tmp_qloop_0); + const real_t tmp_qloop_3 = p_affine_0_1 + (-p_affine_0_1 + p_affine_1_1)*_data_q_p_0[q] + (-p_affine_0_1 + p_affine_2_1)*_data_q_p_1[q]; + const real_t tmp_qloop_4 = (tmp_qloop_3*tmp_qloop_3); + const real_t tmp_qloop_5 = tmp_qloop_2 + tmp_qloop_4; + const real_t tmp_qloop_6 = pow(tmp_qloop_5, -0.50000000000000000); + const real_t tmp_qloop_13 = tmp_qloop_12*tmp_qloop_6; + const real_t tmp_qloop_14 = tmp_qloop_1*tmp_qloop_13; + const real_t tmp_qloop_15 = pow(tmp_qloop_5, -1.5000000000000000); + const real_t tmp_qloop_16 = radRayVertex + tmp_qloop_11*(tmp_qloop_1*(-rayVertex_0 + tmp_qloop_0) - tmp_qloop_8*(-rayVertex_1 + tmp_qloop_3)); + const real_t tmp_qloop_17 = tmp_qloop_15*tmp_qloop_16; + const real_t tmp_qloop_18 = tmp_qloop_17*1.0; + const real_t tmp_qloop_19 = tmp_qloop_13*tmp_qloop_8; + const real_t tmp_qloop_20 = tmp_qloop_0*tmp_qloop_18; + const real_t tmp_qloop_23 = tmp_qloop_12*tmp_qloop_15; + const real_t tmp_qloop_24 = tmp_qloop_1*tmp_qloop_23; + const real_t tmp_qloop_25 = tmp_qloop_2*tmp_qloop_24; + const real_t tmp_qloop_26 = tmp_qloop_16*pow(tmp_qloop_5, -2.5000000000000000)*3.0; + const real_t tmp_qloop_27 = tmp_qloop_0*tmp_qloop_26*tmp_qloop_4; + const real_t tmp_qloop_28 = tmp_qloop_24*tmp_qloop_4 - tmp_qloop_27; + const real_t tmp_qloop_29 = tmp_qloop_23*tmp_qloop_7; + const real_t tmp_qloop_30 = tmp_qloop_0*tmp_qloop_3; + const real_t tmp_qloop_31 = tmp_qloop_24*tmp_qloop_30; + const real_t tmp_qloop_32 = tmp_qloop_17*2.0; + const real_t tmp_qloop_33 = tmp_qloop_2*tmp_qloop_26*tmp_qloop_3; + const real_t tmp_qloop_34 = tmp_qloop_18*tmp_qloop_3 - tmp_qloop_33; + const real_t tmp_qloop_35 = tmp_qloop_29*tmp_qloop_30; + const real_t tmp_qloop_36 = tmp_qloop_23*tmp_qloop_30*tmp_qloop_8; + const real_t tmp_qloop_37 = 4.0*_data_q_p_0[q]; + const real_t tmp_qloop_38 = 4.0*_data_q_p_1[q]; + const real_t tmp_qloop_39 = tmp_qloop_37 + tmp_qloop_38 - 3.0; + const real_t tmp_qloop_40 = jac_affine_inv_0_0_BLUE*tmp_qloop_39 + jac_affine_inv_1_0_BLUE*tmp_qloop_39; + const real_t tmp_qloop_41 = jac_affine_inv_0_1_BLUE*tmp_qloop_39 + jac_affine_inv_1_1_BLUE*tmp_qloop_39; + const real_t tmp_qloop_42 = tmp_qloop_37*_data_q_p_1[q]; + const real_t tmp_qloop_43 = (_data_q_p_0[q]*_data_q_p_0[q]); + const real_t tmp_qloop_44 = tmp_qloop_43*2.0; + const real_t tmp_qloop_45 = tmp_qloop_44 - _data_q_p_0[q]; + const real_t tmp_qloop_46 = (_data_q_p_1[q]*_data_q_p_1[q]); + const real_t tmp_qloop_47 = tmp_qloop_46*2.0; + const real_t tmp_qloop_48 = tmp_qloop_47 - _data_q_p_1[q]; + const real_t tmp_qloop_49 = tmp_qloop_38 - tmp_qloop_42 + tmp_qloop_46*-4.0; + const real_t tmp_qloop_50 = tmp_qloop_37 - tmp_qloop_42 + tmp_qloop_43*-4.0; + const real_t tmp_qloop_51 = tmp_qloop_42 + tmp_qloop_44 + tmp_qloop_47 - 3.0*_data_q_p_0[q] - 3.0*_data_q_p_1[q] + 1.0; + const real_t tmp_qloop_52 = tmp_qloop_42*ux_dof_3 + tmp_qloop_45*ux_dof_1 + tmp_qloop_48*ux_dof_2 + tmp_qloop_49*ux_dof_4 + tmp_qloop_50*ux_dof_5 + tmp_qloop_51*ux_dof_0; + const real_t tmp_qloop_53 = tmp_qloop_42*uy_dof_3 + tmp_qloop_45*uy_dof_1 + tmp_qloop_48*uy_dof_2 + tmp_qloop_49*uy_dof_4 + tmp_qloop_50*uy_dof_5 + tmp_qloop_51*uy_dof_0; + const real_t tmp_qloop_56 = tmp_qloop_37 - 1.0; + const real_t tmp_qloop_57 = jac_affine_inv_0_0_BLUE*tmp_qloop_56; + const real_t tmp_qloop_58 = jac_affine_inv_0_1_BLUE*tmp_qloop_56; + const real_t tmp_qloop_62 = tmp_qloop_38 - 1.0; + const real_t tmp_qloop_63 = jac_affine_inv_1_0_BLUE*tmp_qloop_62; + const real_t tmp_qloop_64 = jac_affine_inv_1_1_BLUE*tmp_qloop_62; + const real_t tmp_qloop_67 = jac_affine_inv_1_0_BLUE*tmp_qloop_37; + const real_t tmp_qloop_68 = jac_affine_inv_0_0_BLUE*tmp_qloop_38; + const real_t tmp_qloop_69 = tmp_qloop_67 + tmp_qloop_68; + const real_t tmp_qloop_70 = jac_affine_inv_1_1_BLUE*tmp_qloop_37; + const real_t tmp_qloop_71 = jac_affine_inv_0_1_BLUE*tmp_qloop_38; + const real_t tmp_qloop_72 = tmp_qloop_70 + tmp_qloop_71; + const real_t tmp_qloop_75 = -tmp_qloop_37 - 8.0*_data_q_p_1[q] + 4.0; + const real_t tmp_qloop_76 = jac_affine_inv_1_0_BLUE*tmp_qloop_75 - tmp_qloop_68; + const real_t tmp_qloop_77 = jac_affine_inv_1_1_BLUE*tmp_qloop_75 - tmp_qloop_71; + const real_t tmp_qloop_80 = -tmp_qloop_38 - 8.0*_data_q_p_0[q] + 4.0; + const real_t tmp_qloop_81 = jac_affine_inv_0_0_BLUE*tmp_qloop_80 - tmp_qloop_67; + const real_t tmp_qloop_82 = jac_affine_inv_0_1_BLUE*tmp_qloop_80 - tmp_qloop_70; + const real_t jac_blending_0_0 = tmp_qloop_0*tmp_qloop_14 + tmp_qloop_18*tmp_qloop_4; + const real_t jac_blending_0_1 = -tmp_qloop_0*tmp_qloop_15*tmp_qloop_16*tmp_qloop_3 - tmp_qloop_0*tmp_qloop_19; + const real_t jac_blending_1_0 = tmp_qloop_14*tmp_qloop_3 - tmp_qloop_20*tmp_qloop_3; + const real_t jac_blending_1_1 = tmp_qloop_15*tmp_qloop_16*tmp_qloop_2*1.0 - tmp_qloop_19*tmp_qloop_3; + const real_t tmp_qloop_21 = jac_blending_0_0*jac_blending_1_1 - jac_blending_0_1*jac_blending_1_0; + const real_t tmp_qloop_22 = 1.0 / (tmp_qloop_21); + const real_t abs_det_jac_blending = tmp_qloop_21; + const real_t tmp_qloop_55 = abs_det_jac_affine_BLUE*abs_det_jac_blending*(cp_times_delta_dof_0*tmp_qloop_51 + cp_times_delta_dof_1*tmp_qloop_45 + cp_times_delta_dof_2*tmp_qloop_48 + cp_times_delta_dof_3*tmp_qloop_42 + cp_times_delta_dof_4*tmp_qloop_49 + cp_times_delta_dof_5*tmp_qloop_50)*_data_q_w[q]; + const real_t jac_blending_inv_0_0 = jac_blending_1_1*tmp_qloop_22; + const real_t jac_blending_inv_0_1 = -jac_blending_0_1*tmp_qloop_22; + const real_t jac_blending_inv_1_0 = -jac_blending_1_0*tmp_qloop_22; + const real_t jac_blending_inv_1_1 = jac_blending_0_0*tmp_qloop_22; + const real_t tmp_qloop_54 = tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_40 + jac_blending_inv_1_0*tmp_qloop_41) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_40 + jac_blending_inv_1_1*tmp_qloop_41); + const real_t tmp_qloop_59 = tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_57 + jac_blending_inv_1_0*tmp_qloop_58) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_57 + jac_blending_inv_1_1*tmp_qloop_58); + const real_t tmp_qloop_60 = tmp_qloop_54*tmp_qloop_55; + const real_t tmp_qloop_61 = tmp_qloop_59*tmp_qloop_60; + const real_t tmp_qloop_65 = tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_63 + jac_blending_inv_1_0*tmp_qloop_64) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_63 + jac_blending_inv_1_1*tmp_qloop_64); + const real_t tmp_qloop_66 = tmp_qloop_60*tmp_qloop_65; + const real_t tmp_qloop_73 = tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_69 + jac_blending_inv_1_0*tmp_qloop_72) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_69 + jac_blending_inv_1_1*tmp_qloop_72); + const real_t tmp_qloop_74 = tmp_qloop_60*tmp_qloop_73; + const real_t tmp_qloop_78 = tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_76 + jac_blending_inv_1_0*tmp_qloop_77) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_76 + jac_blending_inv_1_1*tmp_qloop_77); + const real_t tmp_qloop_79 = tmp_qloop_60*tmp_qloop_78; + const real_t tmp_qloop_83 = tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_81 + jac_blending_inv_1_0*tmp_qloop_82) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_81 + jac_blending_inv_1_1*tmp_qloop_82); + const real_t tmp_qloop_84 = tmp_qloop_60*tmp_qloop_83; + const real_t tmp_qloop_85 = tmp_qloop_55*tmp_qloop_59; + const real_t tmp_qloop_86 = tmp_qloop_65*tmp_qloop_85; + const real_t tmp_qloop_87 = tmp_qloop_73*tmp_qloop_85; + const real_t tmp_qloop_88 = tmp_qloop_78*tmp_qloop_85; + const real_t tmp_qloop_89 = tmp_qloop_83*tmp_qloop_85; + const real_t tmp_qloop_90 = tmp_qloop_55*tmp_qloop_65; + const real_t tmp_qloop_91 = tmp_qloop_73*tmp_qloop_90; + const real_t tmp_qloop_92 = tmp_qloop_78*tmp_qloop_90; + const real_t tmp_qloop_93 = tmp_qloop_83*tmp_qloop_90; + const real_t tmp_qloop_94 = tmp_qloop_55*tmp_qloop_73; + const real_t tmp_qloop_95 = tmp_qloop_78*tmp_qloop_94; + const real_t tmp_qloop_96 = tmp_qloop_83*tmp_qloop_94; + const real_t tmp_qloop_97 = tmp_qloop_55*tmp_qloop_78*tmp_qloop_83; + const real_t hessian_blending_0_0_0 = tmp_qloop_14 - tmp_qloop_25 + tmp_qloop_28; + const real_t hessian_blending_1_0_0 = -tmp_qloop_26*(tmp_qloop_3*tmp_qloop_3*tmp_qloop_3) + tmp_qloop_29*tmp_qloop_4 + tmp_qloop_3*tmp_qloop_32 - tmp_qloop_31; + const real_t hessian_blending_0_0_1 = tmp_qloop_1*tmp_qloop_11*tmp_qloop_15*tmp_qloop_30*-2.0 - tmp_qloop_34; + const real_t hessian_blending_1_0_1 = tmp_qloop_1*tmp_qloop_10*tmp_qloop_6*tmp_qloop_9*1.0 - tmp_qloop_20 - tmp_qloop_28 - tmp_qloop_35; + const real_t hessian_blending_0_1_0 = tmp_qloop_10*tmp_qloop_15*tmp_qloop_2*tmp_qloop_8*tmp_qloop_9*1.0 - tmp_qloop_19 - tmp_qloop_31 - tmp_qloop_34; + const real_t hessian_blending_1_1_0 = -tmp_qloop_20 + tmp_qloop_27 - tmp_qloop_35 + tmp_qloop_36; + const real_t hessian_blending_0_1_1 = tmp_qloop_0*tmp_qloop_32 - (tmp_qloop_0*tmp_qloop_0*tmp_qloop_0)*tmp_qloop_26 + tmp_qloop_25 + tmp_qloop_36; + const real_t hessian_blending_1_1_1 = tmp_qloop_10*tmp_qloop_15*tmp_qloop_2*tmp_qloop_7*tmp_qloop_9*1.0 + tmp_qloop_10*tmp_qloop_15*tmp_qloop_4*tmp_qloop_8*tmp_qloop_9*1.0 - tmp_qloop_19 - tmp_qloop_33; + const real_t q_tmp_0_0 = (tmp_qloop_54*tmp_qloop_54)*tmp_qloop_55; + const real_t q_tmp_0_1 = tmp_qloop_61; + const real_t q_tmp_0_2 = tmp_qloop_66; + const real_t q_tmp_0_3 = tmp_qloop_74; + const real_t q_tmp_0_4 = tmp_qloop_79; + const real_t q_tmp_0_5 = tmp_qloop_84; + const real_t q_tmp_1_0 = tmp_qloop_61; + const real_t q_tmp_1_1 = tmp_qloop_55*(tmp_qloop_59*tmp_qloop_59); + const real_t q_tmp_1_2 = tmp_qloop_86; + const real_t q_tmp_1_3 = tmp_qloop_87; + const real_t q_tmp_1_4 = tmp_qloop_88; + const real_t q_tmp_1_5 = tmp_qloop_89; + const real_t q_tmp_2_0 = tmp_qloop_66; + const real_t q_tmp_2_1 = tmp_qloop_86; + const real_t q_tmp_2_2 = tmp_qloop_55*(tmp_qloop_65*tmp_qloop_65); + const real_t q_tmp_2_3 = tmp_qloop_91; + const real_t q_tmp_2_4 = tmp_qloop_92; + const real_t q_tmp_2_5 = tmp_qloop_93; + const real_t q_tmp_3_0 = tmp_qloop_74; + const real_t q_tmp_3_1 = tmp_qloop_87; + const real_t q_tmp_3_2 = tmp_qloop_91; + const real_t q_tmp_3_3 = tmp_qloop_55*(tmp_qloop_73*tmp_qloop_73); + const real_t q_tmp_3_4 = tmp_qloop_95; + const real_t q_tmp_3_5 = tmp_qloop_96; + const real_t q_tmp_4_0 = tmp_qloop_79; + const real_t q_tmp_4_1 = tmp_qloop_88; + const real_t q_tmp_4_2 = tmp_qloop_92; + const real_t q_tmp_4_3 = tmp_qloop_95; + const real_t q_tmp_4_4 = tmp_qloop_55*(tmp_qloop_78*tmp_qloop_78); + const real_t q_tmp_4_5 = tmp_qloop_97; + const real_t q_tmp_5_0 = tmp_qloop_84; + const real_t q_tmp_5_1 = tmp_qloop_89; + const real_t q_tmp_5_2 = tmp_qloop_93; + const real_t q_tmp_5_3 = tmp_qloop_96; + const real_t q_tmp_5_4 = tmp_qloop_97; + const real_t q_tmp_5_5 = tmp_qloop_55*(tmp_qloop_83*tmp_qloop_83); + q_acc_0_0 = q_acc_0_0 + q_tmp_0_0; + q_acc_0_1 = q_acc_0_1 + q_tmp_0_1; + q_acc_0_2 = q_acc_0_2 + q_tmp_0_2; + q_acc_0_3 = q_acc_0_3 + q_tmp_0_3; + q_acc_0_4 = q_acc_0_4 + q_tmp_0_4; + q_acc_0_5 = q_acc_0_5 + q_tmp_0_5; + q_acc_1_0 = q_acc_1_0 + q_tmp_1_0; + q_acc_1_1 = q_acc_1_1 + q_tmp_1_1; + q_acc_1_2 = q_acc_1_2 + q_tmp_1_2; + q_acc_1_3 = q_acc_1_3 + q_tmp_1_3; + q_acc_1_4 = q_acc_1_4 + q_tmp_1_4; + q_acc_1_5 = q_acc_1_5 + q_tmp_1_5; + q_acc_2_0 = q_acc_2_0 + q_tmp_2_0; + q_acc_2_1 = q_acc_2_1 + q_tmp_2_1; + q_acc_2_2 = q_acc_2_2 + q_tmp_2_2; + q_acc_2_3 = q_acc_2_3 + q_tmp_2_3; + q_acc_2_4 = q_acc_2_4 + q_tmp_2_4; + q_acc_2_5 = q_acc_2_5 + q_tmp_2_5; + q_acc_3_0 = q_acc_3_0 + q_tmp_3_0; + q_acc_3_1 = q_acc_3_1 + q_tmp_3_1; + q_acc_3_2 = q_acc_3_2 + q_tmp_3_2; + q_acc_3_3 = q_acc_3_3 + q_tmp_3_3; + q_acc_3_4 = q_acc_3_4 + q_tmp_3_4; + q_acc_3_5 = q_acc_3_5 + q_tmp_3_5; + q_acc_4_0 = q_acc_4_0 + q_tmp_4_0; + q_acc_4_1 = q_acc_4_1 + q_tmp_4_1; + q_acc_4_2 = q_acc_4_2 + q_tmp_4_2; + q_acc_4_3 = q_acc_4_3 + q_tmp_4_3; + q_acc_4_4 = q_acc_4_4 + q_tmp_4_4; + q_acc_4_5 = q_acc_4_5 + q_tmp_4_5; + q_acc_5_0 = q_acc_5_0 + q_tmp_5_0; + q_acc_5_1 = q_acc_5_1 + q_tmp_5_1; + q_acc_5_2 = q_acc_5_2 + q_tmp_5_2; + q_acc_5_3 = q_acc_5_3 + q_tmp_5_3; + q_acc_5_4 = q_acc_5_4 + q_tmp_5_4; + q_acc_5_5 = q_acc_5_5 + q_tmp_5_5; + } + const real_t elMat_0_0 = q_acc_0_0; + const real_t elMat_0_1 = q_acc_0_1; + const real_t elMat_0_2 = q_acc_0_2; + const real_t elMat_0_3 = q_acc_0_3; + const real_t elMat_0_4 = q_acc_0_4; + const real_t elMat_0_5 = q_acc_0_5; + const real_t elMat_1_0 = q_acc_1_0; + const real_t elMat_1_1 = q_acc_1_1; + const real_t elMat_1_2 = q_acc_1_2; + const real_t elMat_1_3 = q_acc_1_3; + const real_t elMat_1_4 = q_acc_1_4; + const real_t elMat_1_5 = q_acc_1_5; + const real_t elMat_2_0 = q_acc_2_0; + const real_t elMat_2_1 = q_acc_2_1; + const real_t elMat_2_2 = q_acc_2_2; + const real_t elMat_2_3 = q_acc_2_3; + const real_t elMat_2_4 = q_acc_2_4; + const real_t elMat_2_5 = q_acc_2_5; + const real_t elMat_3_0 = q_acc_3_0; + const real_t elMat_3_1 = q_acc_3_1; + const real_t elMat_3_2 = q_acc_3_2; + const real_t elMat_3_3 = q_acc_3_3; + const real_t elMat_3_4 = q_acc_3_4; + const real_t elMat_3_5 = q_acc_3_5; + const real_t elMat_4_0 = q_acc_4_0; + const real_t elMat_4_1 = q_acc_4_1; + const real_t elMat_4_2 = q_acc_4_2; + const real_t elMat_4_3 = q_acc_4_3; + const real_t elMat_4_4 = q_acc_4_4; + const real_t elMat_4_5 = q_acc_4_5; + const real_t elMat_5_0 = q_acc_5_0; + const real_t elMat_5_1 = q_acc_5_1; + const real_t elMat_5_2 = q_acc_5_2; + const real_t elMat_5_3 = q_acc_5_3; + const real_t elMat_5_4 = q_acc_5_4; + const real_t elMat_5_5 = q_acc_5_5; + + std::vector< uint_t > _data_rowIdx( 6 ); + std::vector< uint_t > _data_colIdx( 6 ); + std::vector< real_t > _data_mat( 36 ); + + _data_rowIdx[0] = ((uint64_t)(_data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1])); + _data_rowIdx[1] = ((uint64_t)(_data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))])); + _data_rowIdx[2] = ((uint64_t)(_data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1])); + _data_rowIdx[3] = ((uint64_t)(_data_dstEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))])); + _data_rowIdx[4] = ((uint64_t)(_data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1])); + _data_rowIdx[5] = ((uint64_t)(_data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))])); + _data_colIdx[0] = ((uint64_t)(_data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1])); + _data_colIdx[1] = ((uint64_t)(_data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))])); + _data_colIdx[2] = ((uint64_t)(_data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1])); + _data_colIdx[3] = ((uint64_t)(_data_srcEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))])); + _data_colIdx[4] = ((uint64_t)(_data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1])); + _data_colIdx[5] = ((uint64_t)(_data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))])); + + /* Apply basis transformation */ + + + + _data_mat[0] = ((real_t)(elMat_0_0)); + _data_mat[1] = ((real_t)(elMat_0_1)); + _data_mat[2] = ((real_t)(elMat_0_2)); + _data_mat[3] = ((real_t)(elMat_0_3)); + _data_mat[4] = ((real_t)(elMat_0_4)); + _data_mat[5] = ((real_t)(elMat_0_5)); + _data_mat[6] = ((real_t)(elMat_1_0)); + _data_mat[7] = ((real_t)(elMat_1_1)); + _data_mat[8] = ((real_t)(elMat_1_2)); + _data_mat[9] = ((real_t)(elMat_1_3)); + _data_mat[10] = ((real_t)(elMat_1_4)); + _data_mat[11] = ((real_t)(elMat_1_5)); + _data_mat[12] = ((real_t)(elMat_2_0)); + _data_mat[13] = ((real_t)(elMat_2_1)); + _data_mat[14] = ((real_t)(elMat_2_2)); + _data_mat[15] = ((real_t)(elMat_2_3)); + _data_mat[16] = ((real_t)(elMat_2_4)); + _data_mat[17] = ((real_t)(elMat_2_5)); + _data_mat[18] = ((real_t)(elMat_3_0)); + _data_mat[19] = ((real_t)(elMat_3_1)); + _data_mat[20] = ((real_t)(elMat_3_2)); + _data_mat[21] = ((real_t)(elMat_3_3)); + _data_mat[22] = ((real_t)(elMat_3_4)); + _data_mat[23] = ((real_t)(elMat_3_5)); + _data_mat[24] = ((real_t)(elMat_4_0)); + _data_mat[25] = ((real_t)(elMat_4_1)); + _data_mat[26] = ((real_t)(elMat_4_2)); + _data_mat[27] = ((real_t)(elMat_4_3)); + _data_mat[28] = ((real_t)(elMat_4_4)); + _data_mat[29] = ((real_t)(elMat_4_5)); + _data_mat[30] = ((real_t)(elMat_5_0)); + _data_mat[31] = ((real_t)(elMat_5_1)); + _data_mat[32] = ((real_t)(elMat_5_2)); + _data_mat[33] = ((real_t)(elMat_5_3)); + _data_mat[34] = ((real_t)(elMat_5_4)); + _data_mat[35] = ((real_t)(elMat_5_5)); + + + mat->addValues( _data_rowIdx, _data_colIdx, _data_mat ); + } + } + } +} +} // namespace operatorgeneration + +} // namespace hyteg diff --git a/operators/supg_advection/noarch/P2ElementwiseSupgAdvection_apply_P2ElementwiseSupgAdvection_macro_2D.cpp b/operators/supg_advection/noarch/P2ElementwiseSupgAdvection_apply_P2ElementwiseSupgAdvection_macro_2D.cpp new file mode 100644 index 0000000000000000000000000000000000000000..e14daf120eec7a2467cae9dd111d2833585b6c6d --- /dev/null +++ b/operators/supg_advection/noarch/P2ElementwiseSupgAdvection_apply_P2ElementwiseSupgAdvection_macro_2D.cpp @@ -0,0 +1,560 @@ +/* +* Copyright (c) 2017-2024 Nils Kohl, Daniel Bauer, Fabian Böhm. +* +* This file is part of HyTeG +* (see https://i10git.cs.fau.de/hyteg/hyteg). +* +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. +* +* You should have received a copy of the GNU General Public License +* along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +/* +* The entire file was generated with the HyTeG Operator Generator. +* +* Avoid modifying this file. If buggy, consider fixing the generator itself. +*/ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +#include "../P2ElementwiseSupgAdvection.hpp" + +#define FUNC_PREFIX + +namespace hyteg { + +namespace operatorgeneration { + +void P2ElementwiseSupgAdvection::apply_P2ElementwiseSupgAdvection_macro_2D( real_t * RESTRICT _data_cp_times_deltaEdge, real_t * RESTRICT _data_cp_times_deltaVertex, real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t * RESTRICT _data_uxEdge, real_t * RESTRICT _data_uxVertex, real_t * RESTRICT _data_uyEdge, real_t * RESTRICT _data_uyVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +{ + { + const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669}; + + const real_t _data_q_p_0 [] = {0.33333333333333331, 0.20000000000000001, 0.59999999999999998, 0.20000000000000001}; + + const real_t _data_q_p_1 [] = {0.33333333333333331, 0.59999999999999998, 0.20000000000000001, 0.20000000000000001}; + + const real_t tmp_coords_jac_0_GRAY = 1.0 / (micro_edges_per_macro_edge_float)*1.0; + const real_t p_affine_const_0_0_GRAY = macro_vertex_coord_id_0comp0; + const real_t p_affine_const_0_1_GRAY = macro_vertex_coord_id_0comp1; + const real_t p_affine_const_1_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0); + const real_t p_affine_const_1_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1); + const real_t p_affine_const_2_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0); + const real_t p_affine_const_2_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1); + const real_t jac_affine_0_0_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_1_0_GRAY; + const real_t jac_affine_0_1_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_2_0_GRAY; + const real_t jac_affine_1_0_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_1_1_GRAY; + const real_t jac_affine_1_1_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_2_1_GRAY; + const real_t tmp_coords_jac_1_GRAY = jac_affine_0_0_GRAY*jac_affine_1_1_GRAY - jac_affine_0_1_GRAY*jac_affine_1_0_GRAY; + const real_t tmp_coords_jac_2_GRAY = 1.0 / (tmp_coords_jac_1_GRAY); + const real_t jac_affine_inv_0_0_GRAY = jac_affine_1_1_GRAY*tmp_coords_jac_2_GRAY; + const real_t jac_affine_inv_0_1_GRAY = -jac_affine_0_1_GRAY*tmp_coords_jac_2_GRAY; + const real_t jac_affine_inv_1_0_GRAY = -jac_affine_1_0_GRAY*tmp_coords_jac_2_GRAY; + const real_t jac_affine_inv_1_1_GRAY = jac_affine_0_0_GRAY*tmp_coords_jac_2_GRAY; + const real_t abs_det_jac_affine_GRAY = abs(tmp_coords_jac_1_GRAY); + { + /* FaceType.GRAY */ + for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1) + for (int64_t ctr_0 = 0; ctr_0 < -ctr_1 + micro_edges_per_macro_edge; ctr_0 += 1) + { + + const int64_t phantom_ctr_0 = ctr_0; + real_t _data_float_loop_ctr_array_dim_0[4]; + _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0; + _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1; + _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2; + _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3; + real_t _data_float_loop_ctr_array_dim_1[4]; + _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1; + + const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; + const real_t src_dof_0 = _data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]; + const real_t src_dof_1 = _data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + const real_t src_dof_2 = _data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t src_dof_3 = _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t src_dof_4 = _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t src_dof_5 = _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]; + const real_t cp_times_delta_dof_0 = _data_cp_times_deltaVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]; + const real_t cp_times_delta_dof_1 = _data_cp_times_deltaVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + const real_t cp_times_delta_dof_2 = _data_cp_times_deltaVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t cp_times_delta_dof_3 = _data_cp_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t cp_times_delta_dof_4 = _data_cp_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t cp_times_delta_dof_5 = _data_cp_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]; + const real_t ux_dof_0 = _data_uxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]; + const real_t ux_dof_1 = _data_uxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + const real_t ux_dof_2 = _data_uxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t ux_dof_3 = _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t ux_dof_4 = _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t ux_dof_5 = _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]; + const real_t uy_dof_0 = _data_uyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]; + const real_t uy_dof_1 = _data_uyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + const real_t uy_dof_2 = _data_uyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t uy_dof_3 = _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t uy_dof_4 = _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t uy_dof_5 = _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]; + real_t q_acc_0_0 = 0.0; + real_t q_acc_0_1 = 0.0; + real_t q_acc_0_2 = 0.0; + real_t q_acc_0_3 = 0.0; + real_t q_acc_0_4 = 0.0; + real_t q_acc_0_5 = 0.0; + real_t q_acc_1_0 = 0.0; + real_t q_acc_1_1 = 0.0; + real_t q_acc_1_2 = 0.0; + real_t q_acc_1_3 = 0.0; + real_t q_acc_1_4 = 0.0; + real_t q_acc_1_5 = 0.0; + real_t q_acc_2_0 = 0.0; + real_t q_acc_2_1 = 0.0; + real_t q_acc_2_2 = 0.0; + real_t q_acc_2_3 = 0.0; + real_t q_acc_2_4 = 0.0; + real_t q_acc_2_5 = 0.0; + real_t q_acc_3_0 = 0.0; + real_t q_acc_3_1 = 0.0; + real_t q_acc_3_2 = 0.0; + real_t q_acc_3_3 = 0.0; + real_t q_acc_3_4 = 0.0; + real_t q_acc_3_5 = 0.0; + real_t q_acc_4_0 = 0.0; + real_t q_acc_4_1 = 0.0; + real_t q_acc_4_2 = 0.0; + real_t q_acc_4_3 = 0.0; + real_t q_acc_4_4 = 0.0; + real_t q_acc_4_5 = 0.0; + real_t q_acc_5_0 = 0.0; + real_t q_acc_5_1 = 0.0; + real_t q_acc_5_2 = 0.0; + real_t q_acc_5_3 = 0.0; + real_t q_acc_5_4 = 0.0; + real_t q_acc_5_5 = 0.0; + for (int64_t q = 0; q < 4; q += 1) + { + const real_t tmp_qloop_0 = 4.0*_data_q_p_0[q]; + const real_t tmp_qloop_1 = 4.0*_data_q_p_1[q]; + const real_t tmp_qloop_2 = tmp_qloop_0 + tmp_qloop_1 - 3.0; + const real_t tmp_qloop_3 = tmp_qloop_0*_data_q_p_1[q]; + const real_t tmp_qloop_4 = (_data_q_p_0[q]*_data_q_p_0[q]); + const real_t tmp_qloop_5 = tmp_qloop_4*2.0; + const real_t tmp_qloop_6 = tmp_qloop_5 - _data_q_p_0[q]; + const real_t tmp_qloop_7 = (_data_q_p_1[q]*_data_q_p_1[q]); + const real_t tmp_qloop_8 = tmp_qloop_7*2.0; + const real_t tmp_qloop_9 = tmp_qloop_8 - _data_q_p_1[q]; + const real_t tmp_qloop_10 = tmp_qloop_1 - tmp_qloop_3 + tmp_qloop_7*-4.0; + const real_t tmp_qloop_11 = tmp_qloop_0 - tmp_qloop_3 + tmp_qloop_4*-4.0; + const real_t tmp_qloop_12 = tmp_qloop_3 + tmp_qloop_5 + tmp_qloop_8 - 3.0*_data_q_p_0[q] - 3.0*_data_q_p_1[q] + 1.0; + const real_t tmp_qloop_13 = tmp_qloop_10*ux_dof_4 + tmp_qloop_11*ux_dof_5 + tmp_qloop_12*ux_dof_0 + tmp_qloop_3*ux_dof_3 + tmp_qloop_6*ux_dof_1 + tmp_qloop_9*ux_dof_2; + const real_t tmp_qloop_14 = tmp_qloop_10*uy_dof_4 + tmp_qloop_11*uy_dof_5 + tmp_qloop_12*uy_dof_0 + tmp_qloop_3*uy_dof_3 + tmp_qloop_6*uy_dof_1 + tmp_qloop_9*uy_dof_2; + const real_t tmp_qloop_15 = tmp_qloop_13*(jac_affine_inv_0_0_GRAY*tmp_qloop_2 + jac_affine_inv_1_0_GRAY*tmp_qloop_2) + tmp_qloop_14*(jac_affine_inv_0_1_GRAY*tmp_qloop_2 + jac_affine_inv_1_1_GRAY*tmp_qloop_2); + const real_t tmp_qloop_16 = abs_det_jac_affine_GRAY*(cp_times_delta_dof_0*tmp_qloop_12 + cp_times_delta_dof_1*tmp_qloop_6 + cp_times_delta_dof_2*tmp_qloop_9 + cp_times_delta_dof_3*tmp_qloop_3 + cp_times_delta_dof_4*tmp_qloop_10 + cp_times_delta_dof_5*tmp_qloop_11)*_data_q_w[q]; + const real_t tmp_qloop_17 = tmp_qloop_0 - 1.0; + const real_t tmp_qloop_18 = jac_affine_inv_0_0_GRAY*tmp_qloop_13*tmp_qloop_17 + jac_affine_inv_0_1_GRAY*tmp_qloop_14*tmp_qloop_17; + const real_t tmp_qloop_19 = tmp_qloop_15*tmp_qloop_16; + const real_t tmp_qloop_20 = tmp_qloop_18*tmp_qloop_19; + const real_t tmp_qloop_21 = tmp_qloop_1 - 1.0; + const real_t tmp_qloop_22 = jac_affine_inv_1_0_GRAY*tmp_qloop_13*tmp_qloop_21 + jac_affine_inv_1_1_GRAY*tmp_qloop_14*tmp_qloop_21; + const real_t tmp_qloop_23 = tmp_qloop_19*tmp_qloop_22; + const real_t tmp_qloop_24 = jac_affine_inv_1_0_GRAY*tmp_qloop_0; + const real_t tmp_qloop_25 = jac_affine_inv_0_0_GRAY*tmp_qloop_1; + const real_t tmp_qloop_26 = jac_affine_inv_1_1_GRAY*tmp_qloop_0; + const real_t tmp_qloop_27 = jac_affine_inv_0_1_GRAY*tmp_qloop_1; + const real_t tmp_qloop_28 = tmp_qloop_13*(tmp_qloop_24 + tmp_qloop_25) + tmp_qloop_14*(tmp_qloop_26 + tmp_qloop_27); + const real_t tmp_qloop_29 = tmp_qloop_19*tmp_qloop_28; + const real_t tmp_qloop_30 = -tmp_qloop_0 - 8.0*_data_q_p_1[q] + 4.0; + const real_t tmp_qloop_31 = tmp_qloop_13*(jac_affine_inv_1_0_GRAY*tmp_qloop_30 - tmp_qloop_25) + tmp_qloop_14*(jac_affine_inv_1_1_GRAY*tmp_qloop_30 - tmp_qloop_27); + const real_t tmp_qloop_32 = tmp_qloop_19*tmp_qloop_31; + const real_t tmp_qloop_33 = -tmp_qloop_1 - 8.0*_data_q_p_0[q] + 4.0; + const real_t tmp_qloop_34 = tmp_qloop_13*(jac_affine_inv_0_0_GRAY*tmp_qloop_33 - tmp_qloop_24) + tmp_qloop_14*(jac_affine_inv_0_1_GRAY*tmp_qloop_33 - tmp_qloop_26); + const real_t tmp_qloop_35 = tmp_qloop_19*tmp_qloop_34; + const real_t tmp_qloop_36 = tmp_qloop_16*tmp_qloop_18; + const real_t tmp_qloop_37 = tmp_qloop_22*tmp_qloop_36; + const real_t tmp_qloop_38 = tmp_qloop_28*tmp_qloop_36; + const real_t tmp_qloop_39 = tmp_qloop_31*tmp_qloop_36; + const real_t tmp_qloop_40 = tmp_qloop_34*tmp_qloop_36; + const real_t tmp_qloop_41 = tmp_qloop_16*tmp_qloop_22; + const real_t tmp_qloop_42 = tmp_qloop_28*tmp_qloop_41; + const real_t tmp_qloop_43 = tmp_qloop_31*tmp_qloop_41; + const real_t tmp_qloop_44 = tmp_qloop_34*tmp_qloop_41; + const real_t tmp_qloop_45 = tmp_qloop_16*tmp_qloop_28; + const real_t tmp_qloop_46 = tmp_qloop_31*tmp_qloop_45; + const real_t tmp_qloop_47 = tmp_qloop_34*tmp_qloop_45; + const real_t tmp_qloop_48 = tmp_qloop_16*tmp_qloop_31*tmp_qloop_34; + const real_t q_tmp_0_0 = (tmp_qloop_15*tmp_qloop_15)*tmp_qloop_16; + const real_t q_tmp_0_1 = tmp_qloop_20; + const real_t q_tmp_0_2 = tmp_qloop_23; + const real_t q_tmp_0_3 = tmp_qloop_29; + const real_t q_tmp_0_4 = tmp_qloop_32; + const real_t q_tmp_0_5 = tmp_qloop_35; + const real_t q_tmp_1_0 = tmp_qloop_20; + const real_t q_tmp_1_1 = tmp_qloop_16*(tmp_qloop_18*tmp_qloop_18); + const real_t q_tmp_1_2 = tmp_qloop_37; + const real_t q_tmp_1_3 = tmp_qloop_38; + const real_t q_tmp_1_4 = tmp_qloop_39; + const real_t q_tmp_1_5 = tmp_qloop_40; + const real_t q_tmp_2_0 = tmp_qloop_23; + const real_t q_tmp_2_1 = tmp_qloop_37; + const real_t q_tmp_2_2 = tmp_qloop_16*(tmp_qloop_22*tmp_qloop_22); + const real_t q_tmp_2_3 = tmp_qloop_42; + const real_t q_tmp_2_4 = tmp_qloop_43; + const real_t q_tmp_2_5 = tmp_qloop_44; + const real_t q_tmp_3_0 = tmp_qloop_29; + const real_t q_tmp_3_1 = tmp_qloop_38; + const real_t q_tmp_3_2 = tmp_qloop_42; + const real_t q_tmp_3_3 = tmp_qloop_16*(tmp_qloop_28*tmp_qloop_28); + const real_t q_tmp_3_4 = tmp_qloop_46; + const real_t q_tmp_3_5 = tmp_qloop_47; + const real_t q_tmp_4_0 = tmp_qloop_32; + const real_t q_tmp_4_1 = tmp_qloop_39; + const real_t q_tmp_4_2 = tmp_qloop_43; + const real_t q_tmp_4_3 = tmp_qloop_46; + const real_t q_tmp_4_4 = tmp_qloop_16*(tmp_qloop_31*tmp_qloop_31); + const real_t q_tmp_4_5 = tmp_qloop_48; + const real_t q_tmp_5_0 = tmp_qloop_35; + const real_t q_tmp_5_1 = tmp_qloop_40; + const real_t q_tmp_5_2 = tmp_qloop_44; + const real_t q_tmp_5_3 = tmp_qloop_47; + const real_t q_tmp_5_4 = tmp_qloop_48; + const real_t q_tmp_5_5 = tmp_qloop_16*(tmp_qloop_34*tmp_qloop_34); + q_acc_0_0 = q_acc_0_0 + q_tmp_0_0; + q_acc_0_1 = q_acc_0_1 + q_tmp_0_1; + q_acc_0_2 = q_acc_0_2 + q_tmp_0_2; + q_acc_0_3 = q_acc_0_3 + q_tmp_0_3; + q_acc_0_4 = q_acc_0_4 + q_tmp_0_4; + q_acc_0_5 = q_acc_0_5 + q_tmp_0_5; + q_acc_1_0 = q_acc_1_0 + q_tmp_1_0; + q_acc_1_1 = q_acc_1_1 + q_tmp_1_1; + q_acc_1_2 = q_acc_1_2 + q_tmp_1_2; + q_acc_1_3 = q_acc_1_3 + q_tmp_1_3; + q_acc_1_4 = q_acc_1_4 + q_tmp_1_4; + q_acc_1_5 = q_acc_1_5 + q_tmp_1_5; + q_acc_2_0 = q_acc_2_0 + q_tmp_2_0; + q_acc_2_1 = q_acc_2_1 + q_tmp_2_1; + q_acc_2_2 = q_acc_2_2 + q_tmp_2_2; + q_acc_2_3 = q_acc_2_3 + q_tmp_2_3; + q_acc_2_4 = q_acc_2_4 + q_tmp_2_4; + q_acc_2_5 = q_acc_2_5 + q_tmp_2_5; + q_acc_3_0 = q_acc_3_0 + q_tmp_3_0; + q_acc_3_1 = q_acc_3_1 + q_tmp_3_1; + q_acc_3_2 = q_acc_3_2 + q_tmp_3_2; + q_acc_3_3 = q_acc_3_3 + q_tmp_3_3; + q_acc_3_4 = q_acc_3_4 + q_tmp_3_4; + q_acc_3_5 = q_acc_3_5 + q_tmp_3_5; + q_acc_4_0 = q_acc_4_0 + q_tmp_4_0; + q_acc_4_1 = q_acc_4_1 + q_tmp_4_1; + q_acc_4_2 = q_acc_4_2 + q_tmp_4_2; + q_acc_4_3 = q_acc_4_3 + q_tmp_4_3; + q_acc_4_4 = q_acc_4_4 + q_tmp_4_4; + q_acc_4_5 = q_acc_4_5 + q_tmp_4_5; + q_acc_5_0 = q_acc_5_0 + q_tmp_5_0; + q_acc_5_1 = q_acc_5_1 + q_tmp_5_1; + q_acc_5_2 = q_acc_5_2 + q_tmp_5_2; + q_acc_5_3 = q_acc_5_3 + q_tmp_5_3; + q_acc_5_4 = q_acc_5_4 + q_tmp_5_4; + q_acc_5_5 = q_acc_5_5 + q_tmp_5_5; + } + const real_t elMatVec_0 = q_acc_0_0*src_dof_0 + q_acc_0_1*src_dof_1 + q_acc_0_2*src_dof_2 + q_acc_0_3*src_dof_3 + q_acc_0_4*src_dof_4 + q_acc_0_5*src_dof_5; + const real_t elMatVec_1 = q_acc_1_0*src_dof_0 + q_acc_1_1*src_dof_1 + q_acc_1_2*src_dof_2 + q_acc_1_3*src_dof_3 + q_acc_1_4*src_dof_4 + q_acc_1_5*src_dof_5; + const real_t elMatVec_2 = q_acc_2_0*src_dof_0 + q_acc_2_1*src_dof_1 + q_acc_2_2*src_dof_2 + q_acc_2_3*src_dof_3 + q_acc_2_4*src_dof_4 + q_acc_2_5*src_dof_5; + const real_t elMatVec_3 = q_acc_3_0*src_dof_0 + q_acc_3_1*src_dof_1 + q_acc_3_2*src_dof_2 + q_acc_3_3*src_dof_3 + q_acc_3_4*src_dof_4 + q_acc_3_5*src_dof_5; + const real_t elMatVec_4 = q_acc_4_0*src_dof_0 + q_acc_4_1*src_dof_1 + q_acc_4_2*src_dof_2 + q_acc_4_3*src_dof_3 + q_acc_4_4*src_dof_4 + q_acc_4_5*src_dof_5; + const real_t elMatVec_5 = q_acc_5_0*src_dof_0 + q_acc_5_1*src_dof_1 + q_acc_5_2*src_dof_2 + q_acc_5_3*src_dof_3 + q_acc_5_4*src_dof_4 + q_acc_5_5*src_dof_5; + _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))] = elMatVec_0 + _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]; + _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1] = elMatVec_1 + _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatVec_2 + _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))] = elMatVec_3 + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))] = elMatVec_4 + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))] = elMatVec_5 + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]; + } + } + const real_t tmp_coords_jac_0_BLUE = 1.0 / (micro_edges_per_macro_edge_float)*1.0; + const real_t tmp_coords_jac_1_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0); + const real_t tmp_coords_jac_2_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1); + const real_t tmp_coords_jac_3_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0); + const real_t tmp_coords_jac_4_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1); + const real_t p_affine_const_0_0_BLUE = tmp_coords_jac_1_BLUE; + const real_t p_affine_const_0_1_BLUE = tmp_coords_jac_2_BLUE; + const real_t p_affine_const_1_0_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_3_BLUE; + const real_t p_affine_const_1_1_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_4_BLUE; + const real_t p_affine_const_2_0_BLUE = tmp_coords_jac_1_BLUE + tmp_coords_jac_3_BLUE; + const real_t p_affine_const_2_1_BLUE = tmp_coords_jac_2_BLUE + tmp_coords_jac_4_BLUE; + const real_t jac_affine_0_0_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_1_0_BLUE; + const real_t jac_affine_0_1_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_2_0_BLUE; + const real_t jac_affine_1_0_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_1_1_BLUE; + const real_t jac_affine_1_1_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_2_1_BLUE; + const real_t tmp_coords_jac_5_BLUE = jac_affine_0_0_BLUE*jac_affine_1_1_BLUE - jac_affine_0_1_BLUE*jac_affine_1_0_BLUE; + const real_t tmp_coords_jac_6_BLUE = 1.0 / (tmp_coords_jac_5_BLUE); + const real_t jac_affine_inv_0_0_BLUE = jac_affine_1_1_BLUE*tmp_coords_jac_6_BLUE; + const real_t jac_affine_inv_0_1_BLUE = -jac_affine_0_1_BLUE*tmp_coords_jac_6_BLUE; + const real_t jac_affine_inv_1_0_BLUE = -jac_affine_1_0_BLUE*tmp_coords_jac_6_BLUE; + const real_t jac_affine_inv_1_1_BLUE = jac_affine_0_0_BLUE*tmp_coords_jac_6_BLUE; + const real_t abs_det_jac_affine_BLUE = abs(tmp_coords_jac_5_BLUE); + { + /* FaceType.BLUE */ + for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1) + for (int64_t ctr_0 = 0; ctr_0 < -ctr_1 + micro_edges_per_macro_edge - 1; ctr_0 += 1) + { + + const int64_t phantom_ctr_0 = ctr_0; + real_t _data_float_loop_ctr_array_dim_0[4]; + _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0; + _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1; + _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2; + _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3; + real_t _data_float_loop_ctr_array_dim_1[4]; + _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1; + + const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; + const real_t src_dof_0 = _data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + const real_t src_dof_1 = _data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t src_dof_2 = _data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]; + const real_t src_dof_3 = _data_srcEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t src_dof_4 = _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]; + const real_t src_dof_5 = _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t cp_times_delta_dof_0 = _data_cp_times_deltaVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + const real_t cp_times_delta_dof_1 = _data_cp_times_deltaVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t cp_times_delta_dof_2 = _data_cp_times_deltaVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]; + const real_t cp_times_delta_dof_3 = _data_cp_times_deltaEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t cp_times_delta_dof_4 = _data_cp_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]; + const real_t cp_times_delta_dof_5 = _data_cp_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t ux_dof_0 = _data_uxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + const real_t ux_dof_1 = _data_uxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t ux_dof_2 = _data_uxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]; + const real_t ux_dof_3 = _data_uxEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t ux_dof_4 = _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]; + const real_t ux_dof_5 = _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t uy_dof_0 = _data_uyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + const real_t uy_dof_1 = _data_uyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t uy_dof_2 = _data_uyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]; + const real_t uy_dof_3 = _data_uyEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t uy_dof_4 = _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]; + const real_t uy_dof_5 = _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + real_t q_acc_0_0 = 0.0; + real_t q_acc_0_1 = 0.0; + real_t q_acc_0_2 = 0.0; + real_t q_acc_0_3 = 0.0; + real_t q_acc_0_4 = 0.0; + real_t q_acc_0_5 = 0.0; + real_t q_acc_1_0 = 0.0; + real_t q_acc_1_1 = 0.0; + real_t q_acc_1_2 = 0.0; + real_t q_acc_1_3 = 0.0; + real_t q_acc_1_4 = 0.0; + real_t q_acc_1_5 = 0.0; + real_t q_acc_2_0 = 0.0; + real_t q_acc_2_1 = 0.0; + real_t q_acc_2_2 = 0.0; + real_t q_acc_2_3 = 0.0; + real_t q_acc_2_4 = 0.0; + real_t q_acc_2_5 = 0.0; + real_t q_acc_3_0 = 0.0; + real_t q_acc_3_1 = 0.0; + real_t q_acc_3_2 = 0.0; + real_t q_acc_3_3 = 0.0; + real_t q_acc_3_4 = 0.0; + real_t q_acc_3_5 = 0.0; + real_t q_acc_4_0 = 0.0; + real_t q_acc_4_1 = 0.0; + real_t q_acc_4_2 = 0.0; + real_t q_acc_4_3 = 0.0; + real_t q_acc_4_4 = 0.0; + real_t q_acc_4_5 = 0.0; + real_t q_acc_5_0 = 0.0; + real_t q_acc_5_1 = 0.0; + real_t q_acc_5_2 = 0.0; + real_t q_acc_5_3 = 0.0; + real_t q_acc_5_4 = 0.0; + real_t q_acc_5_5 = 0.0; + for (int64_t q = 0; q < 4; q += 1) + { + const real_t tmp_qloop_0 = 4.0*_data_q_p_0[q]; + const real_t tmp_qloop_1 = 4.0*_data_q_p_1[q]; + const real_t tmp_qloop_2 = tmp_qloop_0 + tmp_qloop_1 - 3.0; + const real_t tmp_qloop_3 = tmp_qloop_0*_data_q_p_1[q]; + const real_t tmp_qloop_4 = (_data_q_p_0[q]*_data_q_p_0[q]); + const real_t tmp_qloop_5 = tmp_qloop_4*2.0; + const real_t tmp_qloop_6 = tmp_qloop_5 - _data_q_p_0[q]; + const real_t tmp_qloop_7 = (_data_q_p_1[q]*_data_q_p_1[q]); + const real_t tmp_qloop_8 = tmp_qloop_7*2.0; + const real_t tmp_qloop_9 = tmp_qloop_8 - _data_q_p_1[q]; + const real_t tmp_qloop_10 = tmp_qloop_1 - tmp_qloop_3 + tmp_qloop_7*-4.0; + const real_t tmp_qloop_11 = tmp_qloop_0 - tmp_qloop_3 + tmp_qloop_4*-4.0; + const real_t tmp_qloop_12 = tmp_qloop_3 + tmp_qloop_5 + tmp_qloop_8 - 3.0*_data_q_p_0[q] - 3.0*_data_q_p_1[q] + 1.0; + const real_t tmp_qloop_13 = tmp_qloop_10*ux_dof_4 + tmp_qloop_11*ux_dof_5 + tmp_qloop_12*ux_dof_0 + tmp_qloop_3*ux_dof_3 + tmp_qloop_6*ux_dof_1 + tmp_qloop_9*ux_dof_2; + const real_t tmp_qloop_14 = tmp_qloop_10*uy_dof_4 + tmp_qloop_11*uy_dof_5 + tmp_qloop_12*uy_dof_0 + tmp_qloop_3*uy_dof_3 + tmp_qloop_6*uy_dof_1 + tmp_qloop_9*uy_dof_2; + const real_t tmp_qloop_15 = tmp_qloop_13*(jac_affine_inv_0_0_BLUE*tmp_qloop_2 + jac_affine_inv_1_0_BLUE*tmp_qloop_2) + tmp_qloop_14*(jac_affine_inv_0_1_BLUE*tmp_qloop_2 + jac_affine_inv_1_1_BLUE*tmp_qloop_2); + const real_t tmp_qloop_16 = abs_det_jac_affine_BLUE*(cp_times_delta_dof_0*tmp_qloop_12 + cp_times_delta_dof_1*tmp_qloop_6 + cp_times_delta_dof_2*tmp_qloop_9 + cp_times_delta_dof_3*tmp_qloop_3 + cp_times_delta_dof_4*tmp_qloop_10 + cp_times_delta_dof_5*tmp_qloop_11)*_data_q_w[q]; + const real_t tmp_qloop_17 = tmp_qloop_0 - 1.0; + const real_t tmp_qloop_18 = jac_affine_inv_0_0_BLUE*tmp_qloop_13*tmp_qloop_17 + jac_affine_inv_0_1_BLUE*tmp_qloop_14*tmp_qloop_17; + const real_t tmp_qloop_19 = tmp_qloop_15*tmp_qloop_16; + const real_t tmp_qloop_20 = tmp_qloop_18*tmp_qloop_19; + const real_t tmp_qloop_21 = tmp_qloop_1 - 1.0; + const real_t tmp_qloop_22 = jac_affine_inv_1_0_BLUE*tmp_qloop_13*tmp_qloop_21 + jac_affine_inv_1_1_BLUE*tmp_qloop_14*tmp_qloop_21; + const real_t tmp_qloop_23 = tmp_qloop_19*tmp_qloop_22; + const real_t tmp_qloop_24 = jac_affine_inv_1_0_BLUE*tmp_qloop_0; + const real_t tmp_qloop_25 = jac_affine_inv_0_0_BLUE*tmp_qloop_1; + const real_t tmp_qloop_26 = jac_affine_inv_1_1_BLUE*tmp_qloop_0; + const real_t tmp_qloop_27 = jac_affine_inv_0_1_BLUE*tmp_qloop_1; + const real_t tmp_qloop_28 = tmp_qloop_13*(tmp_qloop_24 + tmp_qloop_25) + tmp_qloop_14*(tmp_qloop_26 + tmp_qloop_27); + const real_t tmp_qloop_29 = tmp_qloop_19*tmp_qloop_28; + const real_t tmp_qloop_30 = -tmp_qloop_0 - 8.0*_data_q_p_1[q] + 4.0; + const real_t tmp_qloop_31 = tmp_qloop_13*(jac_affine_inv_1_0_BLUE*tmp_qloop_30 - tmp_qloop_25) + tmp_qloop_14*(jac_affine_inv_1_1_BLUE*tmp_qloop_30 - tmp_qloop_27); + const real_t tmp_qloop_32 = tmp_qloop_19*tmp_qloop_31; + const real_t tmp_qloop_33 = -tmp_qloop_1 - 8.0*_data_q_p_0[q] + 4.0; + const real_t tmp_qloop_34 = tmp_qloop_13*(jac_affine_inv_0_0_BLUE*tmp_qloop_33 - tmp_qloop_24) + tmp_qloop_14*(jac_affine_inv_0_1_BLUE*tmp_qloop_33 - tmp_qloop_26); + const real_t tmp_qloop_35 = tmp_qloop_19*tmp_qloop_34; + const real_t tmp_qloop_36 = tmp_qloop_16*tmp_qloop_18; + const real_t tmp_qloop_37 = tmp_qloop_22*tmp_qloop_36; + const real_t tmp_qloop_38 = tmp_qloop_28*tmp_qloop_36; + const real_t tmp_qloop_39 = tmp_qloop_31*tmp_qloop_36; + const real_t tmp_qloop_40 = tmp_qloop_34*tmp_qloop_36; + const real_t tmp_qloop_41 = tmp_qloop_16*tmp_qloop_22; + const real_t tmp_qloop_42 = tmp_qloop_28*tmp_qloop_41; + const real_t tmp_qloop_43 = tmp_qloop_31*tmp_qloop_41; + const real_t tmp_qloop_44 = tmp_qloop_34*tmp_qloop_41; + const real_t tmp_qloop_45 = tmp_qloop_16*tmp_qloop_28; + const real_t tmp_qloop_46 = tmp_qloop_31*tmp_qloop_45; + const real_t tmp_qloop_47 = tmp_qloop_34*tmp_qloop_45; + const real_t tmp_qloop_48 = tmp_qloop_16*tmp_qloop_31*tmp_qloop_34; + const real_t q_tmp_0_0 = (tmp_qloop_15*tmp_qloop_15)*tmp_qloop_16; + const real_t q_tmp_0_1 = tmp_qloop_20; + const real_t q_tmp_0_2 = tmp_qloop_23; + const real_t q_tmp_0_3 = tmp_qloop_29; + const real_t q_tmp_0_4 = tmp_qloop_32; + const real_t q_tmp_0_5 = tmp_qloop_35; + const real_t q_tmp_1_0 = tmp_qloop_20; + const real_t q_tmp_1_1 = tmp_qloop_16*(tmp_qloop_18*tmp_qloop_18); + const real_t q_tmp_1_2 = tmp_qloop_37; + const real_t q_tmp_1_3 = tmp_qloop_38; + const real_t q_tmp_1_4 = tmp_qloop_39; + const real_t q_tmp_1_5 = tmp_qloop_40; + const real_t q_tmp_2_0 = tmp_qloop_23; + const real_t q_tmp_2_1 = tmp_qloop_37; + const real_t q_tmp_2_2 = tmp_qloop_16*(tmp_qloop_22*tmp_qloop_22); + const real_t q_tmp_2_3 = tmp_qloop_42; + const real_t q_tmp_2_4 = tmp_qloop_43; + const real_t q_tmp_2_5 = tmp_qloop_44; + const real_t q_tmp_3_0 = tmp_qloop_29; + const real_t q_tmp_3_1 = tmp_qloop_38; + const real_t q_tmp_3_2 = tmp_qloop_42; + const real_t q_tmp_3_3 = tmp_qloop_16*(tmp_qloop_28*tmp_qloop_28); + const real_t q_tmp_3_4 = tmp_qloop_46; + const real_t q_tmp_3_5 = tmp_qloop_47; + const real_t q_tmp_4_0 = tmp_qloop_32; + const real_t q_tmp_4_1 = tmp_qloop_39; + const real_t q_tmp_4_2 = tmp_qloop_43; + const real_t q_tmp_4_3 = tmp_qloop_46; + const real_t q_tmp_4_4 = tmp_qloop_16*(tmp_qloop_31*tmp_qloop_31); + const real_t q_tmp_4_5 = tmp_qloop_48; + const real_t q_tmp_5_0 = tmp_qloop_35; + const real_t q_tmp_5_1 = tmp_qloop_40; + const real_t q_tmp_5_2 = tmp_qloop_44; + const real_t q_tmp_5_3 = tmp_qloop_47; + const real_t q_tmp_5_4 = tmp_qloop_48; + const real_t q_tmp_5_5 = tmp_qloop_16*(tmp_qloop_34*tmp_qloop_34); + q_acc_0_0 = q_acc_0_0 + q_tmp_0_0; + q_acc_0_1 = q_acc_0_1 + q_tmp_0_1; + q_acc_0_2 = q_acc_0_2 + q_tmp_0_2; + q_acc_0_3 = q_acc_0_3 + q_tmp_0_3; + q_acc_0_4 = q_acc_0_4 + q_tmp_0_4; + q_acc_0_5 = q_acc_0_5 + q_tmp_0_5; + q_acc_1_0 = q_acc_1_0 + q_tmp_1_0; + q_acc_1_1 = q_acc_1_1 + q_tmp_1_1; + q_acc_1_2 = q_acc_1_2 + q_tmp_1_2; + q_acc_1_3 = q_acc_1_3 + q_tmp_1_3; + q_acc_1_4 = q_acc_1_4 + q_tmp_1_4; + q_acc_1_5 = q_acc_1_5 + q_tmp_1_5; + q_acc_2_0 = q_acc_2_0 + q_tmp_2_0; + q_acc_2_1 = q_acc_2_1 + q_tmp_2_1; + q_acc_2_2 = q_acc_2_2 + q_tmp_2_2; + q_acc_2_3 = q_acc_2_3 + q_tmp_2_3; + q_acc_2_4 = q_acc_2_4 + q_tmp_2_4; + q_acc_2_5 = q_acc_2_5 + q_tmp_2_5; + q_acc_3_0 = q_acc_3_0 + q_tmp_3_0; + q_acc_3_1 = q_acc_3_1 + q_tmp_3_1; + q_acc_3_2 = q_acc_3_2 + q_tmp_3_2; + q_acc_3_3 = q_acc_3_3 + q_tmp_3_3; + q_acc_3_4 = q_acc_3_4 + q_tmp_3_4; + q_acc_3_5 = q_acc_3_5 + q_tmp_3_5; + q_acc_4_0 = q_acc_4_0 + q_tmp_4_0; + q_acc_4_1 = q_acc_4_1 + q_tmp_4_1; + q_acc_4_2 = q_acc_4_2 + q_tmp_4_2; + q_acc_4_3 = q_acc_4_3 + q_tmp_4_3; + q_acc_4_4 = q_acc_4_4 + q_tmp_4_4; + q_acc_4_5 = q_acc_4_5 + q_tmp_4_5; + q_acc_5_0 = q_acc_5_0 + q_tmp_5_0; + q_acc_5_1 = q_acc_5_1 + q_tmp_5_1; + q_acc_5_2 = q_acc_5_2 + q_tmp_5_2; + q_acc_5_3 = q_acc_5_3 + q_tmp_5_3; + q_acc_5_4 = q_acc_5_4 + q_tmp_5_4; + q_acc_5_5 = q_acc_5_5 + q_tmp_5_5; + } + const real_t elMatVec_0 = q_acc_0_0*src_dof_0 + q_acc_0_1*src_dof_1 + q_acc_0_2*src_dof_2 + q_acc_0_3*src_dof_3 + q_acc_0_4*src_dof_4 + q_acc_0_5*src_dof_5; + const real_t elMatVec_1 = q_acc_1_0*src_dof_0 + q_acc_1_1*src_dof_1 + q_acc_1_2*src_dof_2 + q_acc_1_3*src_dof_3 + q_acc_1_4*src_dof_4 + q_acc_1_5*src_dof_5; + const real_t elMatVec_2 = q_acc_2_0*src_dof_0 + q_acc_2_1*src_dof_1 + q_acc_2_2*src_dof_2 + q_acc_2_3*src_dof_3 + q_acc_2_4*src_dof_4 + q_acc_2_5*src_dof_5; + const real_t elMatVec_3 = q_acc_3_0*src_dof_0 + q_acc_3_1*src_dof_1 + q_acc_3_2*src_dof_2 + q_acc_3_3*src_dof_3 + q_acc_3_4*src_dof_4 + q_acc_3_5*src_dof_5; + const real_t elMatVec_4 = q_acc_4_0*src_dof_0 + q_acc_4_1*src_dof_1 + q_acc_4_2*src_dof_2 + q_acc_4_3*src_dof_3 + q_acc_4_4*src_dof_4 + q_acc_4_5*src_dof_5; + const real_t elMatVec_5 = q_acc_5_0*src_dof_0 + q_acc_5_1*src_dof_1 + q_acc_5_2*src_dof_2 + q_acc_5_3*src_dof_3 + q_acc_5_4*src_dof_4 + q_acc_5_5*src_dof_5; + _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1] = elMatVec_0 + _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatVec_1 + _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1] = elMatVec_2 + _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]; + _data_dstEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatVec_3 + _data_dstEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1] = elMatVec_4 + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]; + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))] = elMatVec_5 + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + } + } + } +} +} // namespace operatorgeneration + +} // namespace hyteg diff --git a/operators/supg_advection/noarch/P2ElementwiseSupgAdvection_computeInverseDiagonalOperatorValues_P2ElementwiseSupgAdvection_macro_2D.cpp b/operators/supg_advection/noarch/P2ElementwiseSupgAdvection_computeInverseDiagonalOperatorValues_P2ElementwiseSupgAdvection_macro_2D.cpp new file mode 100644 index 0000000000000000000000000000000000000000..09067a59c87202aef200b743fae247f2cf4813bb --- /dev/null +++ b/operators/supg_advection/noarch/P2ElementwiseSupgAdvection_computeInverseDiagonalOperatorValues_P2ElementwiseSupgAdvection_macro_2D.cpp @@ -0,0 +1,318 @@ +/* +* Copyright (c) 2017-2024 Nils Kohl, Daniel Bauer, Fabian Böhm. +* +* This file is part of HyTeG +* (see https://i10git.cs.fau.de/hyteg/hyteg). +* +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. +* +* You should have received a copy of the GNU General Public License +* along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +/* +* The entire file was generated with the HyTeG Operator Generator. +* +* Avoid modifying this file. If buggy, consider fixing the generator itself. +*/ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +#include "../P2ElementwiseSupgAdvection.hpp" + +#define FUNC_PREFIX + +namespace hyteg { + +namespace operatorgeneration { + +void P2ElementwiseSupgAdvection::computeInverseDiagonalOperatorValues_P2ElementwiseSupgAdvection_macro_2D( real_t * RESTRICT _data_cp_times_deltaEdge, real_t * RESTRICT _data_cp_times_deltaVertex, real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_uxEdge, real_t * RESTRICT _data_uxVertex, real_t * RESTRICT _data_uyEdge, real_t * RESTRICT _data_uyVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +{ + { + const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669}; + + const real_t _data_q_p_0 [] = {0.33333333333333331, 0.20000000000000001, 0.59999999999999998, 0.20000000000000001}; + + const real_t _data_q_p_1 [] = {0.33333333333333331, 0.59999999999999998, 0.20000000000000001, 0.20000000000000001}; + + const real_t tmp_coords_jac_0_GRAY = 1.0 / (micro_edges_per_macro_edge_float)*1.0; + const real_t p_affine_const_0_0_GRAY = macro_vertex_coord_id_0comp0; + const real_t p_affine_const_0_1_GRAY = macro_vertex_coord_id_0comp1; + const real_t p_affine_const_1_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0); + const real_t p_affine_const_1_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1); + const real_t p_affine_const_2_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0); + const real_t p_affine_const_2_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1); + const real_t jac_affine_0_0_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_1_0_GRAY; + const real_t jac_affine_0_1_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_2_0_GRAY; + const real_t jac_affine_1_0_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_1_1_GRAY; + const real_t jac_affine_1_1_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_2_1_GRAY; + const real_t tmp_coords_jac_1_GRAY = jac_affine_0_0_GRAY*jac_affine_1_1_GRAY - jac_affine_0_1_GRAY*jac_affine_1_0_GRAY; + const real_t tmp_coords_jac_2_GRAY = 1.0 / (tmp_coords_jac_1_GRAY); + const real_t jac_affine_inv_0_0_GRAY = jac_affine_1_1_GRAY*tmp_coords_jac_2_GRAY; + const real_t jac_affine_inv_0_1_GRAY = -jac_affine_0_1_GRAY*tmp_coords_jac_2_GRAY; + const real_t jac_affine_inv_1_0_GRAY = -jac_affine_1_0_GRAY*tmp_coords_jac_2_GRAY; + const real_t jac_affine_inv_1_1_GRAY = jac_affine_0_0_GRAY*tmp_coords_jac_2_GRAY; + const real_t abs_det_jac_affine_GRAY = abs(tmp_coords_jac_1_GRAY); + { + /* FaceType.GRAY */ + for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1) + for (int64_t ctr_0 = 0; ctr_0 < -ctr_1 + micro_edges_per_macro_edge; ctr_0 += 1) + { + + const int64_t phantom_ctr_0 = ctr_0; + real_t _data_float_loop_ctr_array_dim_0[4]; + _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0; + _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1; + _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2; + _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3; + real_t _data_float_loop_ctr_array_dim_1[4]; + _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1; + + const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; + const real_t cp_times_delta_dof_0 = _data_cp_times_deltaVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]; + const real_t cp_times_delta_dof_1 = _data_cp_times_deltaVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + const real_t cp_times_delta_dof_2 = _data_cp_times_deltaVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t cp_times_delta_dof_3 = _data_cp_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t cp_times_delta_dof_4 = _data_cp_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t cp_times_delta_dof_5 = _data_cp_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]; + const real_t ux_dof_0 = _data_uxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]; + const real_t ux_dof_1 = _data_uxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + const real_t ux_dof_2 = _data_uxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t ux_dof_3 = _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t ux_dof_4 = _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t ux_dof_5 = _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]; + const real_t uy_dof_0 = _data_uyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]; + const real_t uy_dof_1 = _data_uyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + const real_t uy_dof_2 = _data_uyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t uy_dof_3 = _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t uy_dof_4 = _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t uy_dof_5 = _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]; + real_t q_acc_0_0 = 0.0; + real_t q_acc_1_1 = 0.0; + real_t q_acc_2_2 = 0.0; + real_t q_acc_3_3 = 0.0; + real_t q_acc_4_4 = 0.0; + real_t q_acc_5_5 = 0.0; + for (int64_t q = 0; q < 4; q += 1) + { + const real_t tmp_qloop_0 = 4.0*_data_q_p_0[q]; + const real_t tmp_qloop_1 = 4.0*_data_q_p_1[q]; + const real_t tmp_qloop_2 = tmp_qloop_0 + tmp_qloop_1 - 3.0; + const real_t tmp_qloop_3 = tmp_qloop_0*_data_q_p_1[q]; + const real_t tmp_qloop_4 = (_data_q_p_0[q]*_data_q_p_0[q]); + const real_t tmp_qloop_5 = tmp_qloop_4*2.0; + const real_t tmp_qloop_6 = tmp_qloop_5 - _data_q_p_0[q]; + const real_t tmp_qloop_7 = (_data_q_p_1[q]*_data_q_p_1[q]); + const real_t tmp_qloop_8 = tmp_qloop_7*2.0; + const real_t tmp_qloop_9 = tmp_qloop_8 - _data_q_p_1[q]; + const real_t tmp_qloop_10 = tmp_qloop_1 - tmp_qloop_3 + tmp_qloop_7*-4.0; + const real_t tmp_qloop_11 = tmp_qloop_0 - tmp_qloop_3 + tmp_qloop_4*-4.0; + const real_t tmp_qloop_12 = tmp_qloop_3 + tmp_qloop_5 + tmp_qloop_8 - 3.0*_data_q_p_0[q] - 3.0*_data_q_p_1[q] + 1.0; + const real_t tmp_qloop_13 = tmp_qloop_10*ux_dof_4 + tmp_qloop_11*ux_dof_5 + tmp_qloop_12*ux_dof_0 + tmp_qloop_3*ux_dof_3 + tmp_qloop_6*ux_dof_1 + tmp_qloop_9*ux_dof_2; + const real_t tmp_qloop_14 = tmp_qloop_10*uy_dof_4 + tmp_qloop_11*uy_dof_5 + tmp_qloop_12*uy_dof_0 + tmp_qloop_3*uy_dof_3 + tmp_qloop_6*uy_dof_1 + tmp_qloop_9*uy_dof_2; + const real_t tmp_qloop_15 = abs_det_jac_affine_GRAY*(cp_times_delta_dof_0*tmp_qloop_12 + cp_times_delta_dof_1*tmp_qloop_6 + cp_times_delta_dof_2*tmp_qloop_9 + cp_times_delta_dof_3*tmp_qloop_3 + cp_times_delta_dof_4*tmp_qloop_10 + cp_times_delta_dof_5*tmp_qloop_11)*_data_q_w[q]; + const real_t tmp_qloop_16 = tmp_qloop_0 - 1.0; + const real_t tmp_qloop_17 = tmp_qloop_1 - 1.0; + const real_t tmp_qloop_18 = jac_affine_inv_1_0_GRAY*tmp_qloop_0; + const real_t tmp_qloop_19 = jac_affine_inv_0_0_GRAY*tmp_qloop_1; + const real_t tmp_qloop_20 = jac_affine_inv_1_1_GRAY*tmp_qloop_0; + const real_t tmp_qloop_21 = jac_affine_inv_0_1_GRAY*tmp_qloop_1; + const real_t tmp_qloop_22 = -tmp_qloop_0 - 8.0*_data_q_p_1[q] + 4.0; + const real_t tmp_qloop_23 = -tmp_qloop_1 - 8.0*_data_q_p_0[q] + 4.0; + const real_t q_tmp_0_0 = tmp_qloop_15*((tmp_qloop_13*(jac_affine_inv_0_0_GRAY*tmp_qloop_2 + jac_affine_inv_1_0_GRAY*tmp_qloop_2) + tmp_qloop_14*(jac_affine_inv_0_1_GRAY*tmp_qloop_2 + jac_affine_inv_1_1_GRAY*tmp_qloop_2))*(tmp_qloop_13*(jac_affine_inv_0_0_GRAY*tmp_qloop_2 + jac_affine_inv_1_0_GRAY*tmp_qloop_2) + tmp_qloop_14*(jac_affine_inv_0_1_GRAY*tmp_qloop_2 + jac_affine_inv_1_1_GRAY*tmp_qloop_2))); + const real_t q_tmp_1_1 = tmp_qloop_15*((jac_affine_inv_0_0_GRAY*tmp_qloop_13*tmp_qloop_16 + jac_affine_inv_0_1_GRAY*tmp_qloop_14*tmp_qloop_16)*(jac_affine_inv_0_0_GRAY*tmp_qloop_13*tmp_qloop_16 + jac_affine_inv_0_1_GRAY*tmp_qloop_14*tmp_qloop_16)); + const real_t q_tmp_2_2 = tmp_qloop_15*((jac_affine_inv_1_0_GRAY*tmp_qloop_13*tmp_qloop_17 + jac_affine_inv_1_1_GRAY*tmp_qloop_14*tmp_qloop_17)*(jac_affine_inv_1_0_GRAY*tmp_qloop_13*tmp_qloop_17 + jac_affine_inv_1_1_GRAY*tmp_qloop_14*tmp_qloop_17)); + const real_t q_tmp_3_3 = tmp_qloop_15*((tmp_qloop_13*(tmp_qloop_18 + tmp_qloop_19) + tmp_qloop_14*(tmp_qloop_20 + tmp_qloop_21))*(tmp_qloop_13*(tmp_qloop_18 + tmp_qloop_19) + tmp_qloop_14*(tmp_qloop_20 + tmp_qloop_21))); + const real_t q_tmp_4_4 = tmp_qloop_15*((tmp_qloop_13*(jac_affine_inv_1_0_GRAY*tmp_qloop_22 - tmp_qloop_19) + tmp_qloop_14*(jac_affine_inv_1_1_GRAY*tmp_qloop_22 - tmp_qloop_21))*(tmp_qloop_13*(jac_affine_inv_1_0_GRAY*tmp_qloop_22 - tmp_qloop_19) + tmp_qloop_14*(jac_affine_inv_1_1_GRAY*tmp_qloop_22 - tmp_qloop_21))); + const real_t q_tmp_5_5 = tmp_qloop_15*((tmp_qloop_13*(jac_affine_inv_0_0_GRAY*tmp_qloop_23 - tmp_qloop_18) + tmp_qloop_14*(jac_affine_inv_0_1_GRAY*tmp_qloop_23 - tmp_qloop_20))*(tmp_qloop_13*(jac_affine_inv_0_0_GRAY*tmp_qloop_23 - tmp_qloop_18) + tmp_qloop_14*(jac_affine_inv_0_1_GRAY*tmp_qloop_23 - tmp_qloop_20))); + q_acc_0_0 = q_acc_0_0 + q_tmp_0_0; + q_acc_1_1 = q_acc_1_1 + q_tmp_1_1; + q_acc_2_2 = q_acc_2_2 + q_tmp_2_2; + q_acc_3_3 = q_acc_3_3 + q_tmp_3_3; + q_acc_4_4 = q_acc_4_4 + q_tmp_4_4; + q_acc_5_5 = q_acc_5_5 + q_tmp_5_5; + } + const real_t elMatDiag_0 = q_acc_0_0; + const real_t elMatDiag_1 = q_acc_1_1; + const real_t elMatDiag_2 = q_acc_2_2; + const real_t elMatDiag_3 = q_acc_3_3; + const real_t elMatDiag_4 = q_acc_4_4; + const real_t elMatDiag_5 = q_acc_5_5; + _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))] = elMatDiag_0 + _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]; + _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1] = elMatDiag_1 + _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatDiag_2 + _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))] = elMatDiag_3 + _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))] = elMatDiag_4 + _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))] = elMatDiag_5 + _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]; + } + } + const real_t tmp_coords_jac_0_BLUE = 1.0 / (micro_edges_per_macro_edge_float)*1.0; + const real_t tmp_coords_jac_1_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0); + const real_t tmp_coords_jac_2_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1); + const real_t tmp_coords_jac_3_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0); + const real_t tmp_coords_jac_4_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1); + const real_t p_affine_const_0_0_BLUE = tmp_coords_jac_1_BLUE; + const real_t p_affine_const_0_1_BLUE = tmp_coords_jac_2_BLUE; + const real_t p_affine_const_1_0_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_3_BLUE; + const real_t p_affine_const_1_1_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_4_BLUE; + const real_t p_affine_const_2_0_BLUE = tmp_coords_jac_1_BLUE + tmp_coords_jac_3_BLUE; + const real_t p_affine_const_2_1_BLUE = tmp_coords_jac_2_BLUE + tmp_coords_jac_4_BLUE; + const real_t jac_affine_0_0_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_1_0_BLUE; + const real_t jac_affine_0_1_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_2_0_BLUE; + const real_t jac_affine_1_0_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_1_1_BLUE; + const real_t jac_affine_1_1_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_2_1_BLUE; + const real_t tmp_coords_jac_5_BLUE = jac_affine_0_0_BLUE*jac_affine_1_1_BLUE - jac_affine_0_1_BLUE*jac_affine_1_0_BLUE; + const real_t tmp_coords_jac_6_BLUE = 1.0 / (tmp_coords_jac_5_BLUE); + const real_t jac_affine_inv_0_0_BLUE = jac_affine_1_1_BLUE*tmp_coords_jac_6_BLUE; + const real_t jac_affine_inv_0_1_BLUE = -jac_affine_0_1_BLUE*tmp_coords_jac_6_BLUE; + const real_t jac_affine_inv_1_0_BLUE = -jac_affine_1_0_BLUE*tmp_coords_jac_6_BLUE; + const real_t jac_affine_inv_1_1_BLUE = jac_affine_0_0_BLUE*tmp_coords_jac_6_BLUE; + const real_t abs_det_jac_affine_BLUE = abs(tmp_coords_jac_5_BLUE); + { + /* FaceType.BLUE */ + for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1) + for (int64_t ctr_0 = 0; ctr_0 < -ctr_1 + micro_edges_per_macro_edge - 1; ctr_0 += 1) + { + + const int64_t phantom_ctr_0 = ctr_0; + real_t _data_float_loop_ctr_array_dim_0[4]; + _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0; + _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1; + _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2; + _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3; + real_t _data_float_loop_ctr_array_dim_1[4]; + _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1; + + const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; + const real_t cp_times_delta_dof_0 = _data_cp_times_deltaVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + const real_t cp_times_delta_dof_1 = _data_cp_times_deltaVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t cp_times_delta_dof_2 = _data_cp_times_deltaVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]; + const real_t cp_times_delta_dof_3 = _data_cp_times_deltaEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t cp_times_delta_dof_4 = _data_cp_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]; + const real_t cp_times_delta_dof_5 = _data_cp_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t ux_dof_0 = _data_uxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + const real_t ux_dof_1 = _data_uxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t ux_dof_2 = _data_uxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]; + const real_t ux_dof_3 = _data_uxEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t ux_dof_4 = _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]; + const real_t ux_dof_5 = _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t uy_dof_0 = _data_uyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + const real_t uy_dof_1 = _data_uyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t uy_dof_2 = _data_uyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]; + const real_t uy_dof_3 = _data_uyEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t uy_dof_4 = _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]; + const real_t uy_dof_5 = _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + real_t q_acc_0_0 = 0.0; + real_t q_acc_1_1 = 0.0; + real_t q_acc_2_2 = 0.0; + real_t q_acc_3_3 = 0.0; + real_t q_acc_4_4 = 0.0; + real_t q_acc_5_5 = 0.0; + for (int64_t q = 0; q < 4; q += 1) + { + const real_t tmp_qloop_0 = 4.0*_data_q_p_0[q]; + const real_t tmp_qloop_1 = 4.0*_data_q_p_1[q]; + const real_t tmp_qloop_2 = tmp_qloop_0 + tmp_qloop_1 - 3.0; + const real_t tmp_qloop_3 = tmp_qloop_0*_data_q_p_1[q]; + const real_t tmp_qloop_4 = (_data_q_p_0[q]*_data_q_p_0[q]); + const real_t tmp_qloop_5 = tmp_qloop_4*2.0; + const real_t tmp_qloop_6 = tmp_qloop_5 - _data_q_p_0[q]; + const real_t tmp_qloop_7 = (_data_q_p_1[q]*_data_q_p_1[q]); + const real_t tmp_qloop_8 = tmp_qloop_7*2.0; + const real_t tmp_qloop_9 = tmp_qloop_8 - _data_q_p_1[q]; + const real_t tmp_qloop_10 = tmp_qloop_1 - tmp_qloop_3 + tmp_qloop_7*-4.0; + const real_t tmp_qloop_11 = tmp_qloop_0 - tmp_qloop_3 + tmp_qloop_4*-4.0; + const real_t tmp_qloop_12 = tmp_qloop_3 + tmp_qloop_5 + tmp_qloop_8 - 3.0*_data_q_p_0[q] - 3.0*_data_q_p_1[q] + 1.0; + const real_t tmp_qloop_13 = tmp_qloop_10*ux_dof_4 + tmp_qloop_11*ux_dof_5 + tmp_qloop_12*ux_dof_0 + tmp_qloop_3*ux_dof_3 + tmp_qloop_6*ux_dof_1 + tmp_qloop_9*ux_dof_2; + const real_t tmp_qloop_14 = tmp_qloop_10*uy_dof_4 + tmp_qloop_11*uy_dof_5 + tmp_qloop_12*uy_dof_0 + tmp_qloop_3*uy_dof_3 + tmp_qloop_6*uy_dof_1 + tmp_qloop_9*uy_dof_2; + const real_t tmp_qloop_15 = abs_det_jac_affine_BLUE*(cp_times_delta_dof_0*tmp_qloop_12 + cp_times_delta_dof_1*tmp_qloop_6 + cp_times_delta_dof_2*tmp_qloop_9 + cp_times_delta_dof_3*tmp_qloop_3 + cp_times_delta_dof_4*tmp_qloop_10 + cp_times_delta_dof_5*tmp_qloop_11)*_data_q_w[q]; + const real_t tmp_qloop_16 = tmp_qloop_0 - 1.0; + const real_t tmp_qloop_17 = tmp_qloop_1 - 1.0; + const real_t tmp_qloop_18 = jac_affine_inv_1_0_BLUE*tmp_qloop_0; + const real_t tmp_qloop_19 = jac_affine_inv_0_0_BLUE*tmp_qloop_1; + const real_t tmp_qloop_20 = jac_affine_inv_1_1_BLUE*tmp_qloop_0; + const real_t tmp_qloop_21 = jac_affine_inv_0_1_BLUE*tmp_qloop_1; + const real_t tmp_qloop_22 = -tmp_qloop_0 - 8.0*_data_q_p_1[q] + 4.0; + const real_t tmp_qloop_23 = -tmp_qloop_1 - 8.0*_data_q_p_0[q] + 4.0; + const real_t q_tmp_0_0 = tmp_qloop_15*((tmp_qloop_13*(jac_affine_inv_0_0_BLUE*tmp_qloop_2 + jac_affine_inv_1_0_BLUE*tmp_qloop_2) + tmp_qloop_14*(jac_affine_inv_0_1_BLUE*tmp_qloop_2 + jac_affine_inv_1_1_BLUE*tmp_qloop_2))*(tmp_qloop_13*(jac_affine_inv_0_0_BLUE*tmp_qloop_2 + jac_affine_inv_1_0_BLUE*tmp_qloop_2) + tmp_qloop_14*(jac_affine_inv_0_1_BLUE*tmp_qloop_2 + jac_affine_inv_1_1_BLUE*tmp_qloop_2))); + const real_t q_tmp_1_1 = tmp_qloop_15*((jac_affine_inv_0_0_BLUE*tmp_qloop_13*tmp_qloop_16 + jac_affine_inv_0_1_BLUE*tmp_qloop_14*tmp_qloop_16)*(jac_affine_inv_0_0_BLUE*tmp_qloop_13*tmp_qloop_16 + jac_affine_inv_0_1_BLUE*tmp_qloop_14*tmp_qloop_16)); + const real_t q_tmp_2_2 = tmp_qloop_15*((jac_affine_inv_1_0_BLUE*tmp_qloop_13*tmp_qloop_17 + jac_affine_inv_1_1_BLUE*tmp_qloop_14*tmp_qloop_17)*(jac_affine_inv_1_0_BLUE*tmp_qloop_13*tmp_qloop_17 + jac_affine_inv_1_1_BLUE*tmp_qloop_14*tmp_qloop_17)); + const real_t q_tmp_3_3 = tmp_qloop_15*((tmp_qloop_13*(tmp_qloop_18 + tmp_qloop_19) + tmp_qloop_14*(tmp_qloop_20 + tmp_qloop_21))*(tmp_qloop_13*(tmp_qloop_18 + tmp_qloop_19) + tmp_qloop_14*(tmp_qloop_20 + tmp_qloop_21))); + const real_t q_tmp_4_4 = tmp_qloop_15*((tmp_qloop_13*(jac_affine_inv_1_0_BLUE*tmp_qloop_22 - tmp_qloop_19) + tmp_qloop_14*(jac_affine_inv_1_1_BLUE*tmp_qloop_22 - tmp_qloop_21))*(tmp_qloop_13*(jac_affine_inv_1_0_BLUE*tmp_qloop_22 - tmp_qloop_19) + tmp_qloop_14*(jac_affine_inv_1_1_BLUE*tmp_qloop_22 - tmp_qloop_21))); + const real_t q_tmp_5_5 = tmp_qloop_15*((tmp_qloop_13*(jac_affine_inv_0_0_BLUE*tmp_qloop_23 - tmp_qloop_18) + tmp_qloop_14*(jac_affine_inv_0_1_BLUE*tmp_qloop_23 - tmp_qloop_20))*(tmp_qloop_13*(jac_affine_inv_0_0_BLUE*tmp_qloop_23 - tmp_qloop_18) + tmp_qloop_14*(jac_affine_inv_0_1_BLUE*tmp_qloop_23 - tmp_qloop_20))); + q_acc_0_0 = q_acc_0_0 + q_tmp_0_0; + q_acc_1_1 = q_acc_1_1 + q_tmp_1_1; + q_acc_2_2 = q_acc_2_2 + q_tmp_2_2; + q_acc_3_3 = q_acc_3_3 + q_tmp_3_3; + q_acc_4_4 = q_acc_4_4 + q_tmp_4_4; + q_acc_5_5 = q_acc_5_5 + q_tmp_5_5; + } + const real_t elMatDiag_0 = q_acc_0_0; + const real_t elMatDiag_1 = q_acc_1_1; + const real_t elMatDiag_2 = q_acc_2_2; + const real_t elMatDiag_3 = q_acc_3_3; + const real_t elMatDiag_4 = q_acc_4_4; + const real_t elMatDiag_5 = q_acc_5_5; + _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1] = elMatDiag_0 + _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatDiag_1 + _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1] = elMatDiag_2 + _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]; + _data_invDiag_Edge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatDiag_3 + _data_invDiag_Edge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1] = elMatDiag_4 + _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]; + _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))] = elMatDiag_5 + _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + } + } + } +} +} // namespace operatorgeneration + +} // namespace hyteg diff --git a/operators/supg_advection/noarch/P2ElementwiseSupgAdvection_toMatrix_P2ElementwiseSupgAdvection_macro_2D.cpp b/operators/supg_advection/noarch/P2ElementwiseSupgAdvection_toMatrix_P2ElementwiseSupgAdvection_macro_2D.cpp new file mode 100644 index 0000000000000000000000000000000000000000..d8e7c0365d13094bae5821f58975e181f1899c67 --- /dev/null +++ b/operators/supg_advection/noarch/P2ElementwiseSupgAdvection_toMatrix_P2ElementwiseSupgAdvection_macro_2D.cpp @@ -0,0 +1,718 @@ +/* +* Copyright (c) 2017-2024 Nils Kohl, Daniel Bauer, Fabian Böhm. +* +* This file is part of HyTeG +* (see https://i10git.cs.fau.de/hyteg/hyteg). +* +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. +* +* You should have received a copy of the GNU General Public License +* along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +/* +* The entire file was generated with the HyTeG Operator Generator. +* +* Avoid modifying this file. If buggy, consider fixing the generator itself. +*/ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +#include "../P2ElementwiseSupgAdvection.hpp" + +#define FUNC_PREFIX + +namespace hyteg { + +namespace operatorgeneration { + +void P2ElementwiseSupgAdvection::toMatrix_P2ElementwiseSupgAdvection_macro_2D( real_t * RESTRICT _data_cp_times_deltaEdge, real_t * RESTRICT _data_cp_times_deltaVertex, idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t * RESTRICT _data_uxEdge, real_t * RESTRICT _data_uxVertex, real_t * RESTRICT _data_uyEdge, real_t * RESTRICT _data_uyVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +{ + { + const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669}; + + const real_t _data_q_p_0 [] = {0.33333333333333331, 0.20000000000000001, 0.59999999999999998, 0.20000000000000001}; + + const real_t _data_q_p_1 [] = {0.33333333333333331, 0.59999999999999998, 0.20000000000000001, 0.20000000000000001}; + + const real_t tmp_coords_jac_0_GRAY = 1.0 / (micro_edges_per_macro_edge_float)*1.0; + const real_t p_affine_const_0_0_GRAY = macro_vertex_coord_id_0comp0; + const real_t p_affine_const_0_1_GRAY = macro_vertex_coord_id_0comp1; + const real_t p_affine_const_1_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0); + const real_t p_affine_const_1_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1); + const real_t p_affine_const_2_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0); + const real_t p_affine_const_2_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1); + const real_t jac_affine_0_0_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_1_0_GRAY; + const real_t jac_affine_0_1_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_2_0_GRAY; + const real_t jac_affine_1_0_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_1_1_GRAY; + const real_t jac_affine_1_1_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_2_1_GRAY; + const real_t tmp_coords_jac_1_GRAY = jac_affine_0_0_GRAY*jac_affine_1_1_GRAY - jac_affine_0_1_GRAY*jac_affine_1_0_GRAY; + const real_t tmp_coords_jac_2_GRAY = 1.0 / (tmp_coords_jac_1_GRAY); + const real_t jac_affine_inv_0_0_GRAY = jac_affine_1_1_GRAY*tmp_coords_jac_2_GRAY; + const real_t jac_affine_inv_0_1_GRAY = -jac_affine_0_1_GRAY*tmp_coords_jac_2_GRAY; + const real_t jac_affine_inv_1_0_GRAY = -jac_affine_1_0_GRAY*tmp_coords_jac_2_GRAY; + const real_t jac_affine_inv_1_1_GRAY = jac_affine_0_0_GRAY*tmp_coords_jac_2_GRAY; + const real_t abs_det_jac_affine_GRAY = abs(tmp_coords_jac_1_GRAY); + { + /* FaceType.GRAY */ + for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1) + for (int64_t ctr_0 = 0; ctr_0 < -ctr_1 + micro_edges_per_macro_edge; ctr_0 += 1) + { + + const int64_t phantom_ctr_0 = ctr_0; + real_t _data_float_loop_ctr_array_dim_0[4]; + _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0; + _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1; + _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2; + _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3; + real_t _data_float_loop_ctr_array_dim_1[4]; + _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1; + + const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; + const real_t cp_times_delta_dof_0 = _data_cp_times_deltaVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]; + const real_t cp_times_delta_dof_1 = _data_cp_times_deltaVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + const real_t cp_times_delta_dof_2 = _data_cp_times_deltaVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t cp_times_delta_dof_3 = _data_cp_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t cp_times_delta_dof_4 = _data_cp_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t cp_times_delta_dof_5 = _data_cp_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]; + const real_t ux_dof_0 = _data_uxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]; + const real_t ux_dof_1 = _data_uxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + const real_t ux_dof_2 = _data_uxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t ux_dof_3 = _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t ux_dof_4 = _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t ux_dof_5 = _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]; + const real_t uy_dof_0 = _data_uyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]; + const real_t uy_dof_1 = _data_uyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + const real_t uy_dof_2 = _data_uyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t uy_dof_3 = _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t uy_dof_4 = _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t uy_dof_5 = _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]; + real_t q_acc_0_0 = 0.0; + real_t q_acc_0_1 = 0.0; + real_t q_acc_0_2 = 0.0; + real_t q_acc_0_3 = 0.0; + real_t q_acc_0_4 = 0.0; + real_t q_acc_0_5 = 0.0; + real_t q_acc_1_0 = 0.0; + real_t q_acc_1_1 = 0.0; + real_t q_acc_1_2 = 0.0; + real_t q_acc_1_3 = 0.0; + real_t q_acc_1_4 = 0.0; + real_t q_acc_1_5 = 0.0; + real_t q_acc_2_0 = 0.0; + real_t q_acc_2_1 = 0.0; + real_t q_acc_2_2 = 0.0; + real_t q_acc_2_3 = 0.0; + real_t q_acc_2_4 = 0.0; + real_t q_acc_2_5 = 0.0; + real_t q_acc_3_0 = 0.0; + real_t q_acc_3_1 = 0.0; + real_t q_acc_3_2 = 0.0; + real_t q_acc_3_3 = 0.0; + real_t q_acc_3_4 = 0.0; + real_t q_acc_3_5 = 0.0; + real_t q_acc_4_0 = 0.0; + real_t q_acc_4_1 = 0.0; + real_t q_acc_4_2 = 0.0; + real_t q_acc_4_3 = 0.0; + real_t q_acc_4_4 = 0.0; + real_t q_acc_4_5 = 0.0; + real_t q_acc_5_0 = 0.0; + real_t q_acc_5_1 = 0.0; + real_t q_acc_5_2 = 0.0; + real_t q_acc_5_3 = 0.0; + real_t q_acc_5_4 = 0.0; + real_t q_acc_5_5 = 0.0; + for (int64_t q = 0; q < 4; q += 1) + { + const real_t tmp_qloop_0 = 4.0*_data_q_p_0[q]; + const real_t tmp_qloop_1 = 4.0*_data_q_p_1[q]; + const real_t tmp_qloop_2 = tmp_qloop_0 + tmp_qloop_1 - 3.0; + const real_t tmp_qloop_3 = tmp_qloop_0*_data_q_p_1[q]; + const real_t tmp_qloop_4 = (_data_q_p_0[q]*_data_q_p_0[q]); + const real_t tmp_qloop_5 = tmp_qloop_4*2.0; + const real_t tmp_qloop_6 = tmp_qloop_5 - _data_q_p_0[q]; + const real_t tmp_qloop_7 = (_data_q_p_1[q]*_data_q_p_1[q]); + const real_t tmp_qloop_8 = tmp_qloop_7*2.0; + const real_t tmp_qloop_9 = tmp_qloop_8 - _data_q_p_1[q]; + const real_t tmp_qloop_10 = tmp_qloop_1 - tmp_qloop_3 + tmp_qloop_7*-4.0; + const real_t tmp_qloop_11 = tmp_qloop_0 - tmp_qloop_3 + tmp_qloop_4*-4.0; + const real_t tmp_qloop_12 = tmp_qloop_3 + tmp_qloop_5 + tmp_qloop_8 - 3.0*_data_q_p_0[q] - 3.0*_data_q_p_1[q] + 1.0; + const real_t tmp_qloop_13 = tmp_qloop_10*ux_dof_4 + tmp_qloop_11*ux_dof_5 + tmp_qloop_12*ux_dof_0 + tmp_qloop_3*ux_dof_3 + tmp_qloop_6*ux_dof_1 + tmp_qloop_9*ux_dof_2; + const real_t tmp_qloop_14 = tmp_qloop_10*uy_dof_4 + tmp_qloop_11*uy_dof_5 + tmp_qloop_12*uy_dof_0 + tmp_qloop_3*uy_dof_3 + tmp_qloop_6*uy_dof_1 + tmp_qloop_9*uy_dof_2; + const real_t tmp_qloop_15 = tmp_qloop_13*(jac_affine_inv_0_0_GRAY*tmp_qloop_2 + jac_affine_inv_1_0_GRAY*tmp_qloop_2) + tmp_qloop_14*(jac_affine_inv_0_1_GRAY*tmp_qloop_2 + jac_affine_inv_1_1_GRAY*tmp_qloop_2); + const real_t tmp_qloop_16 = abs_det_jac_affine_GRAY*(cp_times_delta_dof_0*tmp_qloop_12 + cp_times_delta_dof_1*tmp_qloop_6 + cp_times_delta_dof_2*tmp_qloop_9 + cp_times_delta_dof_3*tmp_qloop_3 + cp_times_delta_dof_4*tmp_qloop_10 + cp_times_delta_dof_5*tmp_qloop_11)*_data_q_w[q]; + const real_t tmp_qloop_17 = tmp_qloop_0 - 1.0; + const real_t tmp_qloop_18 = jac_affine_inv_0_0_GRAY*tmp_qloop_13*tmp_qloop_17 + jac_affine_inv_0_1_GRAY*tmp_qloop_14*tmp_qloop_17; + const real_t tmp_qloop_19 = tmp_qloop_15*tmp_qloop_16; + const real_t tmp_qloop_20 = tmp_qloop_18*tmp_qloop_19; + const real_t tmp_qloop_21 = tmp_qloop_1 - 1.0; + const real_t tmp_qloop_22 = jac_affine_inv_1_0_GRAY*tmp_qloop_13*tmp_qloop_21 + jac_affine_inv_1_1_GRAY*tmp_qloop_14*tmp_qloop_21; + const real_t tmp_qloop_23 = tmp_qloop_19*tmp_qloop_22; + const real_t tmp_qloop_24 = jac_affine_inv_1_0_GRAY*tmp_qloop_0; + const real_t tmp_qloop_25 = jac_affine_inv_0_0_GRAY*tmp_qloop_1; + const real_t tmp_qloop_26 = jac_affine_inv_1_1_GRAY*tmp_qloop_0; + const real_t tmp_qloop_27 = jac_affine_inv_0_1_GRAY*tmp_qloop_1; + const real_t tmp_qloop_28 = tmp_qloop_13*(tmp_qloop_24 + tmp_qloop_25) + tmp_qloop_14*(tmp_qloop_26 + tmp_qloop_27); + const real_t tmp_qloop_29 = tmp_qloop_19*tmp_qloop_28; + const real_t tmp_qloop_30 = -tmp_qloop_0 - 8.0*_data_q_p_1[q] + 4.0; + const real_t tmp_qloop_31 = tmp_qloop_13*(jac_affine_inv_1_0_GRAY*tmp_qloop_30 - tmp_qloop_25) + tmp_qloop_14*(jac_affine_inv_1_1_GRAY*tmp_qloop_30 - tmp_qloop_27); + const real_t tmp_qloop_32 = tmp_qloop_19*tmp_qloop_31; + const real_t tmp_qloop_33 = -tmp_qloop_1 - 8.0*_data_q_p_0[q] + 4.0; + const real_t tmp_qloop_34 = tmp_qloop_13*(jac_affine_inv_0_0_GRAY*tmp_qloop_33 - tmp_qloop_24) + tmp_qloop_14*(jac_affine_inv_0_1_GRAY*tmp_qloop_33 - tmp_qloop_26); + const real_t tmp_qloop_35 = tmp_qloop_19*tmp_qloop_34; + const real_t tmp_qloop_36 = tmp_qloop_16*tmp_qloop_18; + const real_t tmp_qloop_37 = tmp_qloop_22*tmp_qloop_36; + const real_t tmp_qloop_38 = tmp_qloop_28*tmp_qloop_36; + const real_t tmp_qloop_39 = tmp_qloop_31*tmp_qloop_36; + const real_t tmp_qloop_40 = tmp_qloop_34*tmp_qloop_36; + const real_t tmp_qloop_41 = tmp_qloop_16*tmp_qloop_22; + const real_t tmp_qloop_42 = tmp_qloop_28*tmp_qloop_41; + const real_t tmp_qloop_43 = tmp_qloop_31*tmp_qloop_41; + const real_t tmp_qloop_44 = tmp_qloop_34*tmp_qloop_41; + const real_t tmp_qloop_45 = tmp_qloop_16*tmp_qloop_28; + const real_t tmp_qloop_46 = tmp_qloop_31*tmp_qloop_45; + const real_t tmp_qloop_47 = tmp_qloop_34*tmp_qloop_45; + const real_t tmp_qloop_48 = tmp_qloop_16*tmp_qloop_31*tmp_qloop_34; + const real_t q_tmp_0_0 = (tmp_qloop_15*tmp_qloop_15)*tmp_qloop_16; + const real_t q_tmp_0_1 = tmp_qloop_20; + const real_t q_tmp_0_2 = tmp_qloop_23; + const real_t q_tmp_0_3 = tmp_qloop_29; + const real_t q_tmp_0_4 = tmp_qloop_32; + const real_t q_tmp_0_5 = tmp_qloop_35; + const real_t q_tmp_1_0 = tmp_qloop_20; + const real_t q_tmp_1_1 = tmp_qloop_16*(tmp_qloop_18*tmp_qloop_18); + const real_t q_tmp_1_2 = tmp_qloop_37; + const real_t q_tmp_1_3 = tmp_qloop_38; + const real_t q_tmp_1_4 = tmp_qloop_39; + const real_t q_tmp_1_5 = tmp_qloop_40; + const real_t q_tmp_2_0 = tmp_qloop_23; + const real_t q_tmp_2_1 = tmp_qloop_37; + const real_t q_tmp_2_2 = tmp_qloop_16*(tmp_qloop_22*tmp_qloop_22); + const real_t q_tmp_2_3 = tmp_qloop_42; + const real_t q_tmp_2_4 = tmp_qloop_43; + const real_t q_tmp_2_5 = tmp_qloop_44; + const real_t q_tmp_3_0 = tmp_qloop_29; + const real_t q_tmp_3_1 = tmp_qloop_38; + const real_t q_tmp_3_2 = tmp_qloop_42; + const real_t q_tmp_3_3 = tmp_qloop_16*(tmp_qloop_28*tmp_qloop_28); + const real_t q_tmp_3_4 = tmp_qloop_46; + const real_t q_tmp_3_5 = tmp_qloop_47; + const real_t q_tmp_4_0 = tmp_qloop_32; + const real_t q_tmp_4_1 = tmp_qloop_39; + const real_t q_tmp_4_2 = tmp_qloop_43; + const real_t q_tmp_4_3 = tmp_qloop_46; + const real_t q_tmp_4_4 = tmp_qloop_16*(tmp_qloop_31*tmp_qloop_31); + const real_t q_tmp_4_5 = tmp_qloop_48; + const real_t q_tmp_5_0 = tmp_qloop_35; + const real_t q_tmp_5_1 = tmp_qloop_40; + const real_t q_tmp_5_2 = tmp_qloop_44; + const real_t q_tmp_5_3 = tmp_qloop_47; + const real_t q_tmp_5_4 = tmp_qloop_48; + const real_t q_tmp_5_5 = tmp_qloop_16*(tmp_qloop_34*tmp_qloop_34); + q_acc_0_0 = q_acc_0_0 + q_tmp_0_0; + q_acc_0_1 = q_acc_0_1 + q_tmp_0_1; + q_acc_0_2 = q_acc_0_2 + q_tmp_0_2; + q_acc_0_3 = q_acc_0_3 + q_tmp_0_3; + q_acc_0_4 = q_acc_0_4 + q_tmp_0_4; + q_acc_0_5 = q_acc_0_5 + q_tmp_0_5; + q_acc_1_0 = q_acc_1_0 + q_tmp_1_0; + q_acc_1_1 = q_acc_1_1 + q_tmp_1_1; + q_acc_1_2 = q_acc_1_2 + q_tmp_1_2; + q_acc_1_3 = q_acc_1_3 + q_tmp_1_3; + q_acc_1_4 = q_acc_1_4 + q_tmp_1_4; + q_acc_1_5 = q_acc_1_5 + q_tmp_1_5; + q_acc_2_0 = q_acc_2_0 + q_tmp_2_0; + q_acc_2_1 = q_acc_2_1 + q_tmp_2_1; + q_acc_2_2 = q_acc_2_2 + q_tmp_2_2; + q_acc_2_3 = q_acc_2_3 + q_tmp_2_3; + q_acc_2_4 = q_acc_2_4 + q_tmp_2_4; + q_acc_2_5 = q_acc_2_5 + q_tmp_2_5; + q_acc_3_0 = q_acc_3_0 + q_tmp_3_0; + q_acc_3_1 = q_acc_3_1 + q_tmp_3_1; + q_acc_3_2 = q_acc_3_2 + q_tmp_3_2; + q_acc_3_3 = q_acc_3_3 + q_tmp_3_3; + q_acc_3_4 = q_acc_3_4 + q_tmp_3_4; + q_acc_3_5 = q_acc_3_5 + q_tmp_3_5; + q_acc_4_0 = q_acc_4_0 + q_tmp_4_0; + q_acc_4_1 = q_acc_4_1 + q_tmp_4_1; + q_acc_4_2 = q_acc_4_2 + q_tmp_4_2; + q_acc_4_3 = q_acc_4_3 + q_tmp_4_3; + q_acc_4_4 = q_acc_4_4 + q_tmp_4_4; + q_acc_4_5 = q_acc_4_5 + q_tmp_4_5; + q_acc_5_0 = q_acc_5_0 + q_tmp_5_0; + q_acc_5_1 = q_acc_5_1 + q_tmp_5_1; + q_acc_5_2 = q_acc_5_2 + q_tmp_5_2; + q_acc_5_3 = q_acc_5_3 + q_tmp_5_3; + q_acc_5_4 = q_acc_5_4 + q_tmp_5_4; + q_acc_5_5 = q_acc_5_5 + q_tmp_5_5; + } + const real_t elMat_0_0 = q_acc_0_0; + const real_t elMat_0_1 = q_acc_0_1; + const real_t elMat_0_2 = q_acc_0_2; + const real_t elMat_0_3 = q_acc_0_3; + const real_t elMat_0_4 = q_acc_0_4; + const real_t elMat_0_5 = q_acc_0_5; + const real_t elMat_1_0 = q_acc_1_0; + const real_t elMat_1_1 = q_acc_1_1; + const real_t elMat_1_2 = q_acc_1_2; + const real_t elMat_1_3 = q_acc_1_3; + const real_t elMat_1_4 = q_acc_1_4; + const real_t elMat_1_5 = q_acc_1_5; + const real_t elMat_2_0 = q_acc_2_0; + const real_t elMat_2_1 = q_acc_2_1; + const real_t elMat_2_2 = q_acc_2_2; + const real_t elMat_2_3 = q_acc_2_3; + const real_t elMat_2_4 = q_acc_2_4; + const real_t elMat_2_5 = q_acc_2_5; + const real_t elMat_3_0 = q_acc_3_0; + const real_t elMat_3_1 = q_acc_3_1; + const real_t elMat_3_2 = q_acc_3_2; + const real_t elMat_3_3 = q_acc_3_3; + const real_t elMat_3_4 = q_acc_3_4; + const real_t elMat_3_5 = q_acc_3_5; + const real_t elMat_4_0 = q_acc_4_0; + const real_t elMat_4_1 = q_acc_4_1; + const real_t elMat_4_2 = q_acc_4_2; + const real_t elMat_4_3 = q_acc_4_3; + const real_t elMat_4_4 = q_acc_4_4; + const real_t elMat_4_5 = q_acc_4_5; + const real_t elMat_5_0 = q_acc_5_0; + const real_t elMat_5_1 = q_acc_5_1; + const real_t elMat_5_2 = q_acc_5_2; + const real_t elMat_5_3 = q_acc_5_3; + const real_t elMat_5_4 = q_acc_5_4; + const real_t elMat_5_5 = q_acc_5_5; + + std::vector< uint_t > _data_rowIdx( 6 ); + std::vector< uint_t > _data_colIdx( 6 ); + std::vector< real_t > _data_mat( 36 ); + + _data_rowIdx[0] = ((uint64_t)(_data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))])); + _data_rowIdx[1] = ((uint64_t)(_data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1])); + _data_rowIdx[2] = ((uint64_t)(_data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))])); + _data_rowIdx[3] = ((uint64_t)(_data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))])); + _data_rowIdx[4] = ((uint64_t)(_data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))])); + _data_rowIdx[5] = ((uint64_t)(_data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))])); + _data_colIdx[0] = ((uint64_t)(_data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))])); + _data_colIdx[1] = ((uint64_t)(_data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1])); + _data_colIdx[2] = ((uint64_t)(_data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))])); + _data_colIdx[3] = ((uint64_t)(_data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))])); + _data_colIdx[4] = ((uint64_t)(_data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))])); + _data_colIdx[5] = ((uint64_t)(_data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))])); + + /* Apply basis transformation */ + + + + _data_mat[0] = ((real_t)(elMat_0_0)); + _data_mat[1] = ((real_t)(elMat_0_1)); + _data_mat[2] = ((real_t)(elMat_0_2)); + _data_mat[3] = ((real_t)(elMat_0_3)); + _data_mat[4] = ((real_t)(elMat_0_4)); + _data_mat[5] = ((real_t)(elMat_0_5)); + _data_mat[6] = ((real_t)(elMat_1_0)); + _data_mat[7] = ((real_t)(elMat_1_1)); + _data_mat[8] = ((real_t)(elMat_1_2)); + _data_mat[9] = ((real_t)(elMat_1_3)); + _data_mat[10] = ((real_t)(elMat_1_4)); + _data_mat[11] = ((real_t)(elMat_1_5)); + _data_mat[12] = ((real_t)(elMat_2_0)); + _data_mat[13] = ((real_t)(elMat_2_1)); + _data_mat[14] = ((real_t)(elMat_2_2)); + _data_mat[15] = ((real_t)(elMat_2_3)); + _data_mat[16] = ((real_t)(elMat_2_4)); + _data_mat[17] = ((real_t)(elMat_2_5)); + _data_mat[18] = ((real_t)(elMat_3_0)); + _data_mat[19] = ((real_t)(elMat_3_1)); + _data_mat[20] = ((real_t)(elMat_3_2)); + _data_mat[21] = ((real_t)(elMat_3_3)); + _data_mat[22] = ((real_t)(elMat_3_4)); + _data_mat[23] = ((real_t)(elMat_3_5)); + _data_mat[24] = ((real_t)(elMat_4_0)); + _data_mat[25] = ((real_t)(elMat_4_1)); + _data_mat[26] = ((real_t)(elMat_4_2)); + _data_mat[27] = ((real_t)(elMat_4_3)); + _data_mat[28] = ((real_t)(elMat_4_4)); + _data_mat[29] = ((real_t)(elMat_4_5)); + _data_mat[30] = ((real_t)(elMat_5_0)); + _data_mat[31] = ((real_t)(elMat_5_1)); + _data_mat[32] = ((real_t)(elMat_5_2)); + _data_mat[33] = ((real_t)(elMat_5_3)); + _data_mat[34] = ((real_t)(elMat_5_4)); + _data_mat[35] = ((real_t)(elMat_5_5)); + + + mat->addValues( _data_rowIdx, _data_colIdx, _data_mat ); + } + } + const real_t tmp_coords_jac_0_BLUE = 1.0 / (micro_edges_per_macro_edge_float)*1.0; + const real_t tmp_coords_jac_1_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0); + const real_t tmp_coords_jac_2_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1); + const real_t tmp_coords_jac_3_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0); + const real_t tmp_coords_jac_4_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1); + const real_t p_affine_const_0_0_BLUE = tmp_coords_jac_1_BLUE; + const real_t p_affine_const_0_1_BLUE = tmp_coords_jac_2_BLUE; + const real_t p_affine_const_1_0_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_3_BLUE; + const real_t p_affine_const_1_1_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_4_BLUE; + const real_t p_affine_const_2_0_BLUE = tmp_coords_jac_1_BLUE + tmp_coords_jac_3_BLUE; + const real_t p_affine_const_2_1_BLUE = tmp_coords_jac_2_BLUE + tmp_coords_jac_4_BLUE; + const real_t jac_affine_0_0_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_1_0_BLUE; + const real_t jac_affine_0_1_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_2_0_BLUE; + const real_t jac_affine_1_0_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_1_1_BLUE; + const real_t jac_affine_1_1_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_2_1_BLUE; + const real_t tmp_coords_jac_5_BLUE = jac_affine_0_0_BLUE*jac_affine_1_1_BLUE - jac_affine_0_1_BLUE*jac_affine_1_0_BLUE; + const real_t tmp_coords_jac_6_BLUE = 1.0 / (tmp_coords_jac_5_BLUE); + const real_t jac_affine_inv_0_0_BLUE = jac_affine_1_1_BLUE*tmp_coords_jac_6_BLUE; + const real_t jac_affine_inv_0_1_BLUE = -jac_affine_0_1_BLUE*tmp_coords_jac_6_BLUE; + const real_t jac_affine_inv_1_0_BLUE = -jac_affine_1_0_BLUE*tmp_coords_jac_6_BLUE; + const real_t jac_affine_inv_1_1_BLUE = jac_affine_0_0_BLUE*tmp_coords_jac_6_BLUE; + const real_t abs_det_jac_affine_BLUE = abs(tmp_coords_jac_5_BLUE); + { + /* FaceType.BLUE */ + for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1) + for (int64_t ctr_0 = 0; ctr_0 < -ctr_1 + micro_edges_per_macro_edge - 1; ctr_0 += 1) + { + + const int64_t phantom_ctr_0 = ctr_0; + real_t _data_float_loop_ctr_array_dim_0[4]; + _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0; + _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1; + _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2; + _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3; + real_t _data_float_loop_ctr_array_dim_1[4]; + _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1; + + const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; + const real_t cp_times_delta_dof_0 = _data_cp_times_deltaVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + const real_t cp_times_delta_dof_1 = _data_cp_times_deltaVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t cp_times_delta_dof_2 = _data_cp_times_deltaVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]; + const real_t cp_times_delta_dof_3 = _data_cp_times_deltaEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t cp_times_delta_dof_4 = _data_cp_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]; + const real_t cp_times_delta_dof_5 = _data_cp_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t ux_dof_0 = _data_uxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + const real_t ux_dof_1 = _data_uxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t ux_dof_2 = _data_uxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]; + const real_t ux_dof_3 = _data_uxEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t ux_dof_4 = _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]; + const real_t ux_dof_5 = _data_uxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t uy_dof_0 = _data_uyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + const real_t uy_dof_1 = _data_uyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t uy_dof_2 = _data_uyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]; + const real_t uy_dof_3 = _data_uyEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t uy_dof_4 = _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]; + const real_t uy_dof_5 = _data_uyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + real_t q_acc_0_0 = 0.0; + real_t q_acc_0_1 = 0.0; + real_t q_acc_0_2 = 0.0; + real_t q_acc_0_3 = 0.0; + real_t q_acc_0_4 = 0.0; + real_t q_acc_0_5 = 0.0; + real_t q_acc_1_0 = 0.0; + real_t q_acc_1_1 = 0.0; + real_t q_acc_1_2 = 0.0; + real_t q_acc_1_3 = 0.0; + real_t q_acc_1_4 = 0.0; + real_t q_acc_1_5 = 0.0; + real_t q_acc_2_0 = 0.0; + real_t q_acc_2_1 = 0.0; + real_t q_acc_2_2 = 0.0; + real_t q_acc_2_3 = 0.0; + real_t q_acc_2_4 = 0.0; + real_t q_acc_2_5 = 0.0; + real_t q_acc_3_0 = 0.0; + real_t q_acc_3_1 = 0.0; + real_t q_acc_3_2 = 0.0; + real_t q_acc_3_3 = 0.0; + real_t q_acc_3_4 = 0.0; + real_t q_acc_3_5 = 0.0; + real_t q_acc_4_0 = 0.0; + real_t q_acc_4_1 = 0.0; + real_t q_acc_4_2 = 0.0; + real_t q_acc_4_3 = 0.0; + real_t q_acc_4_4 = 0.0; + real_t q_acc_4_5 = 0.0; + real_t q_acc_5_0 = 0.0; + real_t q_acc_5_1 = 0.0; + real_t q_acc_5_2 = 0.0; + real_t q_acc_5_3 = 0.0; + real_t q_acc_5_4 = 0.0; + real_t q_acc_5_5 = 0.0; + for (int64_t q = 0; q < 4; q += 1) + { + const real_t tmp_qloop_0 = 4.0*_data_q_p_0[q]; + const real_t tmp_qloop_1 = 4.0*_data_q_p_1[q]; + const real_t tmp_qloop_2 = tmp_qloop_0 + tmp_qloop_1 - 3.0; + const real_t tmp_qloop_3 = tmp_qloop_0*_data_q_p_1[q]; + const real_t tmp_qloop_4 = (_data_q_p_0[q]*_data_q_p_0[q]); + const real_t tmp_qloop_5 = tmp_qloop_4*2.0; + const real_t tmp_qloop_6 = tmp_qloop_5 - _data_q_p_0[q]; + const real_t tmp_qloop_7 = (_data_q_p_1[q]*_data_q_p_1[q]); + const real_t tmp_qloop_8 = tmp_qloop_7*2.0; + const real_t tmp_qloop_9 = tmp_qloop_8 - _data_q_p_1[q]; + const real_t tmp_qloop_10 = tmp_qloop_1 - tmp_qloop_3 + tmp_qloop_7*-4.0; + const real_t tmp_qloop_11 = tmp_qloop_0 - tmp_qloop_3 + tmp_qloop_4*-4.0; + const real_t tmp_qloop_12 = tmp_qloop_3 + tmp_qloop_5 + tmp_qloop_8 - 3.0*_data_q_p_0[q] - 3.0*_data_q_p_1[q] + 1.0; + const real_t tmp_qloop_13 = tmp_qloop_10*ux_dof_4 + tmp_qloop_11*ux_dof_5 + tmp_qloop_12*ux_dof_0 + tmp_qloop_3*ux_dof_3 + tmp_qloop_6*ux_dof_1 + tmp_qloop_9*ux_dof_2; + const real_t tmp_qloop_14 = tmp_qloop_10*uy_dof_4 + tmp_qloop_11*uy_dof_5 + tmp_qloop_12*uy_dof_0 + tmp_qloop_3*uy_dof_3 + tmp_qloop_6*uy_dof_1 + tmp_qloop_9*uy_dof_2; + const real_t tmp_qloop_15 = tmp_qloop_13*(jac_affine_inv_0_0_BLUE*tmp_qloop_2 + jac_affine_inv_1_0_BLUE*tmp_qloop_2) + tmp_qloop_14*(jac_affine_inv_0_1_BLUE*tmp_qloop_2 + jac_affine_inv_1_1_BLUE*tmp_qloop_2); + const real_t tmp_qloop_16 = abs_det_jac_affine_BLUE*(cp_times_delta_dof_0*tmp_qloop_12 + cp_times_delta_dof_1*tmp_qloop_6 + cp_times_delta_dof_2*tmp_qloop_9 + cp_times_delta_dof_3*tmp_qloop_3 + cp_times_delta_dof_4*tmp_qloop_10 + cp_times_delta_dof_5*tmp_qloop_11)*_data_q_w[q]; + const real_t tmp_qloop_17 = tmp_qloop_0 - 1.0; + const real_t tmp_qloop_18 = jac_affine_inv_0_0_BLUE*tmp_qloop_13*tmp_qloop_17 + jac_affine_inv_0_1_BLUE*tmp_qloop_14*tmp_qloop_17; + const real_t tmp_qloop_19 = tmp_qloop_15*tmp_qloop_16; + const real_t tmp_qloop_20 = tmp_qloop_18*tmp_qloop_19; + const real_t tmp_qloop_21 = tmp_qloop_1 - 1.0; + const real_t tmp_qloop_22 = jac_affine_inv_1_0_BLUE*tmp_qloop_13*tmp_qloop_21 + jac_affine_inv_1_1_BLUE*tmp_qloop_14*tmp_qloop_21; + const real_t tmp_qloop_23 = tmp_qloop_19*tmp_qloop_22; + const real_t tmp_qloop_24 = jac_affine_inv_1_0_BLUE*tmp_qloop_0; + const real_t tmp_qloop_25 = jac_affine_inv_0_0_BLUE*tmp_qloop_1; + const real_t tmp_qloop_26 = jac_affine_inv_1_1_BLUE*tmp_qloop_0; + const real_t tmp_qloop_27 = jac_affine_inv_0_1_BLUE*tmp_qloop_1; + const real_t tmp_qloop_28 = tmp_qloop_13*(tmp_qloop_24 + tmp_qloop_25) + tmp_qloop_14*(tmp_qloop_26 + tmp_qloop_27); + const real_t tmp_qloop_29 = tmp_qloop_19*tmp_qloop_28; + const real_t tmp_qloop_30 = -tmp_qloop_0 - 8.0*_data_q_p_1[q] + 4.0; + const real_t tmp_qloop_31 = tmp_qloop_13*(jac_affine_inv_1_0_BLUE*tmp_qloop_30 - tmp_qloop_25) + tmp_qloop_14*(jac_affine_inv_1_1_BLUE*tmp_qloop_30 - tmp_qloop_27); + const real_t tmp_qloop_32 = tmp_qloop_19*tmp_qloop_31; + const real_t tmp_qloop_33 = -tmp_qloop_1 - 8.0*_data_q_p_0[q] + 4.0; + const real_t tmp_qloop_34 = tmp_qloop_13*(jac_affine_inv_0_0_BLUE*tmp_qloop_33 - tmp_qloop_24) + tmp_qloop_14*(jac_affine_inv_0_1_BLUE*tmp_qloop_33 - tmp_qloop_26); + const real_t tmp_qloop_35 = tmp_qloop_19*tmp_qloop_34; + const real_t tmp_qloop_36 = tmp_qloop_16*tmp_qloop_18; + const real_t tmp_qloop_37 = tmp_qloop_22*tmp_qloop_36; + const real_t tmp_qloop_38 = tmp_qloop_28*tmp_qloop_36; + const real_t tmp_qloop_39 = tmp_qloop_31*tmp_qloop_36; + const real_t tmp_qloop_40 = tmp_qloop_34*tmp_qloop_36; + const real_t tmp_qloop_41 = tmp_qloop_16*tmp_qloop_22; + const real_t tmp_qloop_42 = tmp_qloop_28*tmp_qloop_41; + const real_t tmp_qloop_43 = tmp_qloop_31*tmp_qloop_41; + const real_t tmp_qloop_44 = tmp_qloop_34*tmp_qloop_41; + const real_t tmp_qloop_45 = tmp_qloop_16*tmp_qloop_28; + const real_t tmp_qloop_46 = tmp_qloop_31*tmp_qloop_45; + const real_t tmp_qloop_47 = tmp_qloop_34*tmp_qloop_45; + const real_t tmp_qloop_48 = tmp_qloop_16*tmp_qloop_31*tmp_qloop_34; + const real_t q_tmp_0_0 = (tmp_qloop_15*tmp_qloop_15)*tmp_qloop_16; + const real_t q_tmp_0_1 = tmp_qloop_20; + const real_t q_tmp_0_2 = tmp_qloop_23; + const real_t q_tmp_0_3 = tmp_qloop_29; + const real_t q_tmp_0_4 = tmp_qloop_32; + const real_t q_tmp_0_5 = tmp_qloop_35; + const real_t q_tmp_1_0 = tmp_qloop_20; + const real_t q_tmp_1_1 = tmp_qloop_16*(tmp_qloop_18*tmp_qloop_18); + const real_t q_tmp_1_2 = tmp_qloop_37; + const real_t q_tmp_1_3 = tmp_qloop_38; + const real_t q_tmp_1_4 = tmp_qloop_39; + const real_t q_tmp_1_5 = tmp_qloop_40; + const real_t q_tmp_2_0 = tmp_qloop_23; + const real_t q_tmp_2_1 = tmp_qloop_37; + const real_t q_tmp_2_2 = tmp_qloop_16*(tmp_qloop_22*tmp_qloop_22); + const real_t q_tmp_2_3 = tmp_qloop_42; + const real_t q_tmp_2_4 = tmp_qloop_43; + const real_t q_tmp_2_5 = tmp_qloop_44; + const real_t q_tmp_3_0 = tmp_qloop_29; + const real_t q_tmp_3_1 = tmp_qloop_38; + const real_t q_tmp_3_2 = tmp_qloop_42; + const real_t q_tmp_3_3 = tmp_qloop_16*(tmp_qloop_28*tmp_qloop_28); + const real_t q_tmp_3_4 = tmp_qloop_46; + const real_t q_tmp_3_5 = tmp_qloop_47; + const real_t q_tmp_4_0 = tmp_qloop_32; + const real_t q_tmp_4_1 = tmp_qloop_39; + const real_t q_tmp_4_2 = tmp_qloop_43; + const real_t q_tmp_4_3 = tmp_qloop_46; + const real_t q_tmp_4_4 = tmp_qloop_16*(tmp_qloop_31*tmp_qloop_31); + const real_t q_tmp_4_5 = tmp_qloop_48; + const real_t q_tmp_5_0 = tmp_qloop_35; + const real_t q_tmp_5_1 = tmp_qloop_40; + const real_t q_tmp_5_2 = tmp_qloop_44; + const real_t q_tmp_5_3 = tmp_qloop_47; + const real_t q_tmp_5_4 = tmp_qloop_48; + const real_t q_tmp_5_5 = tmp_qloop_16*(tmp_qloop_34*tmp_qloop_34); + q_acc_0_0 = q_acc_0_0 + q_tmp_0_0; + q_acc_0_1 = q_acc_0_1 + q_tmp_0_1; + q_acc_0_2 = q_acc_0_2 + q_tmp_0_2; + q_acc_0_3 = q_acc_0_3 + q_tmp_0_3; + q_acc_0_4 = q_acc_0_4 + q_tmp_0_4; + q_acc_0_5 = q_acc_0_5 + q_tmp_0_5; + q_acc_1_0 = q_acc_1_0 + q_tmp_1_0; + q_acc_1_1 = q_acc_1_1 + q_tmp_1_1; + q_acc_1_2 = q_acc_1_2 + q_tmp_1_2; + q_acc_1_3 = q_acc_1_3 + q_tmp_1_3; + q_acc_1_4 = q_acc_1_4 + q_tmp_1_4; + q_acc_1_5 = q_acc_1_5 + q_tmp_1_5; + q_acc_2_0 = q_acc_2_0 + q_tmp_2_0; + q_acc_2_1 = q_acc_2_1 + q_tmp_2_1; + q_acc_2_2 = q_acc_2_2 + q_tmp_2_2; + q_acc_2_3 = q_acc_2_3 + q_tmp_2_3; + q_acc_2_4 = q_acc_2_4 + q_tmp_2_4; + q_acc_2_5 = q_acc_2_5 + q_tmp_2_5; + q_acc_3_0 = q_acc_3_0 + q_tmp_3_0; + q_acc_3_1 = q_acc_3_1 + q_tmp_3_1; + q_acc_3_2 = q_acc_3_2 + q_tmp_3_2; + q_acc_3_3 = q_acc_3_3 + q_tmp_3_3; + q_acc_3_4 = q_acc_3_4 + q_tmp_3_4; + q_acc_3_5 = q_acc_3_5 + q_tmp_3_5; + q_acc_4_0 = q_acc_4_0 + q_tmp_4_0; + q_acc_4_1 = q_acc_4_1 + q_tmp_4_1; + q_acc_4_2 = q_acc_4_2 + q_tmp_4_2; + q_acc_4_3 = q_acc_4_3 + q_tmp_4_3; + q_acc_4_4 = q_acc_4_4 + q_tmp_4_4; + q_acc_4_5 = q_acc_4_5 + q_tmp_4_5; + q_acc_5_0 = q_acc_5_0 + q_tmp_5_0; + q_acc_5_1 = q_acc_5_1 + q_tmp_5_1; + q_acc_5_2 = q_acc_5_2 + q_tmp_5_2; + q_acc_5_3 = q_acc_5_3 + q_tmp_5_3; + q_acc_5_4 = q_acc_5_4 + q_tmp_5_4; + q_acc_5_5 = q_acc_5_5 + q_tmp_5_5; + } + const real_t elMat_0_0 = q_acc_0_0; + const real_t elMat_0_1 = q_acc_0_1; + const real_t elMat_0_2 = q_acc_0_2; + const real_t elMat_0_3 = q_acc_0_3; + const real_t elMat_0_4 = q_acc_0_4; + const real_t elMat_0_5 = q_acc_0_5; + const real_t elMat_1_0 = q_acc_1_0; + const real_t elMat_1_1 = q_acc_1_1; + const real_t elMat_1_2 = q_acc_1_2; + const real_t elMat_1_3 = q_acc_1_3; + const real_t elMat_1_4 = q_acc_1_4; + const real_t elMat_1_5 = q_acc_1_5; + const real_t elMat_2_0 = q_acc_2_0; + const real_t elMat_2_1 = q_acc_2_1; + const real_t elMat_2_2 = q_acc_2_2; + const real_t elMat_2_3 = q_acc_2_3; + const real_t elMat_2_4 = q_acc_2_4; + const real_t elMat_2_5 = q_acc_2_5; + const real_t elMat_3_0 = q_acc_3_0; + const real_t elMat_3_1 = q_acc_3_1; + const real_t elMat_3_2 = q_acc_3_2; + const real_t elMat_3_3 = q_acc_3_3; + const real_t elMat_3_4 = q_acc_3_4; + const real_t elMat_3_5 = q_acc_3_5; + const real_t elMat_4_0 = q_acc_4_0; + const real_t elMat_4_1 = q_acc_4_1; + const real_t elMat_4_2 = q_acc_4_2; + const real_t elMat_4_3 = q_acc_4_3; + const real_t elMat_4_4 = q_acc_4_4; + const real_t elMat_4_5 = q_acc_4_5; + const real_t elMat_5_0 = q_acc_5_0; + const real_t elMat_5_1 = q_acc_5_1; + const real_t elMat_5_2 = q_acc_5_2; + const real_t elMat_5_3 = q_acc_5_3; + const real_t elMat_5_4 = q_acc_5_4; + const real_t elMat_5_5 = q_acc_5_5; + + std::vector< uint_t > _data_rowIdx( 6 ); + std::vector< uint_t > _data_colIdx( 6 ); + std::vector< real_t > _data_mat( 36 ); + + _data_rowIdx[0] = ((uint64_t)(_data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1])); + _data_rowIdx[1] = ((uint64_t)(_data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))])); + _data_rowIdx[2] = ((uint64_t)(_data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1])); + _data_rowIdx[3] = ((uint64_t)(_data_dstEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))])); + _data_rowIdx[4] = ((uint64_t)(_data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1])); + _data_rowIdx[5] = ((uint64_t)(_data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))])); + _data_colIdx[0] = ((uint64_t)(_data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1])); + _data_colIdx[1] = ((uint64_t)(_data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))])); + _data_colIdx[2] = ((uint64_t)(_data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1])); + _data_colIdx[3] = ((uint64_t)(_data_srcEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))])); + _data_colIdx[4] = ((uint64_t)(_data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1])); + _data_colIdx[5] = ((uint64_t)(_data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))])); + + /* Apply basis transformation */ + + + + _data_mat[0] = ((real_t)(elMat_0_0)); + _data_mat[1] = ((real_t)(elMat_0_1)); + _data_mat[2] = ((real_t)(elMat_0_2)); + _data_mat[3] = ((real_t)(elMat_0_3)); + _data_mat[4] = ((real_t)(elMat_0_4)); + _data_mat[5] = ((real_t)(elMat_0_5)); + _data_mat[6] = ((real_t)(elMat_1_0)); + _data_mat[7] = ((real_t)(elMat_1_1)); + _data_mat[8] = ((real_t)(elMat_1_2)); + _data_mat[9] = ((real_t)(elMat_1_3)); + _data_mat[10] = ((real_t)(elMat_1_4)); + _data_mat[11] = ((real_t)(elMat_1_5)); + _data_mat[12] = ((real_t)(elMat_2_0)); + _data_mat[13] = ((real_t)(elMat_2_1)); + _data_mat[14] = ((real_t)(elMat_2_2)); + _data_mat[15] = ((real_t)(elMat_2_3)); + _data_mat[16] = ((real_t)(elMat_2_4)); + _data_mat[17] = ((real_t)(elMat_2_5)); + _data_mat[18] = ((real_t)(elMat_3_0)); + _data_mat[19] = ((real_t)(elMat_3_1)); + _data_mat[20] = ((real_t)(elMat_3_2)); + _data_mat[21] = ((real_t)(elMat_3_3)); + _data_mat[22] = ((real_t)(elMat_3_4)); + _data_mat[23] = ((real_t)(elMat_3_5)); + _data_mat[24] = ((real_t)(elMat_4_0)); + _data_mat[25] = ((real_t)(elMat_4_1)); + _data_mat[26] = ((real_t)(elMat_4_2)); + _data_mat[27] = ((real_t)(elMat_4_3)); + _data_mat[28] = ((real_t)(elMat_4_4)); + _data_mat[29] = ((real_t)(elMat_4_5)); + _data_mat[30] = ((real_t)(elMat_5_0)); + _data_mat[31] = ((real_t)(elMat_5_1)); + _data_mat[32] = ((real_t)(elMat_5_2)); + _data_mat[33] = ((real_t)(elMat_5_3)); + _data_mat[34] = ((real_t)(elMat_5_4)); + _data_mat[35] = ((real_t)(elMat_5_5)); + + + mat->addValues( _data_rowIdx, _data_colIdx, _data_mat ); + } + } + } +} +} // namespace operatorgeneration + +} // namespace hyteg diff --git a/operators/supg_diffusion/CMakeLists.txt b/operators/supg_diffusion/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..0196ac6f83fd8ec8652479f68d5edb9a022cfab2 --- /dev/null +++ b/operators/supg_diffusion/CMakeLists.txt @@ -0,0 +1,52 @@ +add_library( opgen-supg_diffusion + + P2ElementwiseSupgDiffusion.cpp + P2ElementwiseSupgDiffusion.hpp + P2ElementwiseSupgDiffusionAnnulusMap.cpp + P2ElementwiseSupgDiffusionAnnulusMap.hpp +) + +if(HYTEG_BUILD_WITH_AVX AND WALBERLA_DOUBLE_ACCURACY) + target_sources(opgen-supg_diffusion PRIVATE + + avx/P2ElementwiseSupgDiffusionAnnulusMap_apply_P2ElementwiseSupgDiffusionAnnulusMap_macro_2D.cpp + avx/P2ElementwiseSupgDiffusionAnnulusMap_computeInverseDiagonalOperatorValues_P2ElementwiseSupgDiffusionAnnulusMap_macro_2D.cpp + avx/P2ElementwiseSupgDiffusion_apply_P2ElementwiseSupgDiffusion_macro_2D.cpp + avx/P2ElementwiseSupgDiffusion_computeInverseDiagonalOperatorValues_P2ElementwiseSupgDiffusion_macro_2D.cpp + noarch/P2ElementwiseSupgDiffusionAnnulusMap_toMatrix_P2ElementwiseSupgDiffusionAnnulusMap_macro_2D.cpp + noarch/P2ElementwiseSupgDiffusion_toMatrix_P2ElementwiseSupgDiffusion_macro_2D.cpp + ) + + set_source_files_properties( + + avx/P2ElementwiseSupgDiffusionAnnulusMap_apply_P2ElementwiseSupgDiffusionAnnulusMap_macro_2D.cpp + avx/P2ElementwiseSupgDiffusionAnnulusMap_computeInverseDiagonalOperatorValues_P2ElementwiseSupgDiffusionAnnulusMap_macro_2D.cpp + avx/P2ElementwiseSupgDiffusion_apply_P2ElementwiseSupgDiffusion_macro_2D.cpp + avx/P2ElementwiseSupgDiffusion_computeInverseDiagonalOperatorValues_P2ElementwiseSupgDiffusion_macro_2D.cpp + + PROPERTIES COMPILE_OPTIONS ${HYTEG_COMPILER_NATIVE_FLAGS} + ) +else() + if(HYTEG_BUILD_WITH_AVX AND NOT WALBERLA_DOUBLE_ACCURACY) + message(WARNING "AVX vectorization only available in double precision. Using scalar kernels.") + endif() + + target_sources(opgen-supg_diffusion PRIVATE + + noarch/P2ElementwiseSupgDiffusionAnnulusMap_apply_P2ElementwiseSupgDiffusionAnnulusMap_macro_2D.cpp + noarch/P2ElementwiseSupgDiffusionAnnulusMap_computeInverseDiagonalOperatorValues_P2ElementwiseSupgDiffusionAnnulusMap_macro_2D.cpp + noarch/P2ElementwiseSupgDiffusionAnnulusMap_toMatrix_P2ElementwiseSupgDiffusionAnnulusMap_macro_2D.cpp + noarch/P2ElementwiseSupgDiffusion_apply_P2ElementwiseSupgDiffusion_macro_2D.cpp + noarch/P2ElementwiseSupgDiffusion_computeInverseDiagonalOperatorValues_P2ElementwiseSupgDiffusion_macro_2D.cpp + noarch/P2ElementwiseSupgDiffusion_toMatrix_P2ElementwiseSupgDiffusion_macro_2D.cpp + ) +endif() + +if (HYTEG_BUILD_WITH_PETSC) + target_link_libraries(opgen-supg_diffusion PUBLIC PETSc::PETSc) +endif () +if (WALBERLA_BUILD_WITH_HALF_PRECISION_SUPPORT) + target_compile_features(opgen-supg_diffusion PUBLIC cxx_std_23) +else () + target_compile_features(opgen-supg_diffusion PUBLIC cxx_std_17) +endif () diff --git a/operators/supg_diffusion/P2ElementwiseSupgDiffusion.cpp b/operators/supg_diffusion/P2ElementwiseSupgDiffusion.cpp new file mode 100644 index 0000000000000000000000000000000000000000..3672880492641734e893f9fad9586edbf827ca38 --- /dev/null +++ b/operators/supg_diffusion/P2ElementwiseSupgDiffusion.cpp @@ -0,0 +1,397 @@ +/* +* Copyright (c) 2017-2024 Nils Kohl, Daniel Bauer, Fabian Böhm. +* +* This file is part of HyTeG +* (see https://i10git.cs.fau.de/hyteg/hyteg). +* +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. +* +* You should have received a copy of the GNU General Public License +* along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +/* +* The entire file was generated with the HyTeG Operator Generator. +* +* Avoid modifying this file. If buggy, consider fixing the generator itself. +*/ + +// Unfortunately, the inverse diagonal kernel wrapper triggers a GCC bug (maybe +// (related to) https://gcc.gnu.org/bugzilla/show_bug.cgi?id=107087) causing a +// warning in an internal standard library header (bits/stl_algobase.h). As a +// workaround, we disable the warning and include this header indirectly through +// a public header. +#include <waLBerlaDefinitions.h> +#ifdef WALBERLA_CXX_COMPILER_IS_GNU +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wnonnull" +#endif +#include <cmath> +#ifdef WALBERLA_CXX_COMPILER_IS_GNU +#pragma GCC diagnostic pop +#endif + +#include "P2ElementwiseSupgDiffusion.hpp" + +#define FUNC_PREFIX + +namespace hyteg { + +namespace operatorgeneration { + +P2ElementwiseSupgDiffusion::P2ElementwiseSupgDiffusion( const std::shared_ptr< PrimitiveStorage >& storage, + size_t minLevel, + size_t maxLevel, + const P2Function< real_t >& _diffusivity_times_delta, + const P2Function< real_t >& _wx, + const P2Function< real_t >& _wy ) +: Operator( storage, minLevel, maxLevel ) +, diffusivity_times_delta( _diffusivity_times_delta ) +, wx( _wx ) +, wy( _wy ) +{} + +void P2ElementwiseSupgDiffusion::apply( const P2Function< real_t >& src, + const P2Function< real_t >& dst, + uint_t level, + DoFType flag, + UpdateType updateType ) const +{ + this->startTiming( "apply" ); + + // Make sure that halos are up-to-date + this->timingTree_->start( "pre-communication" ); + if ( this->storage_->hasGlobalCells() ) + { + WALBERLA_ABORT( "Not implemented." ); + } + else + { + communication::syncFunctionBetweenPrimitives( src, level, communication::syncDirection_t::LOW2HIGH ); + communication::syncFunctionBetweenPrimitives( diffusivity_times_delta, level, communication::syncDirection_t::LOW2HIGH ); + communication::syncFunctionBetweenPrimitives( wx, level, communication::syncDirection_t::LOW2HIGH ); + communication::syncFunctionBetweenPrimitives( wy, level, communication::syncDirection_t::LOW2HIGH ); + } + this->timingTree_->stop( "pre-communication" ); + + if ( updateType == Replace ) + { + // We need to zero the destination array (including halos). + // However, we must not zero out anything that is not flagged with the specified BCs. + // Therefore, we first zero out everything that flagged, and then, later, + // the halos of the highest dim primitives. + dst.interpolate( walberla::numeric_cast< real_t >( 0 ), level, flag ); + } + + if ( storage_->hasGlobalCells() ) + { + WALBERLA_ABORT( "Not implemented." ); + } + else + { + for ( auto& it : storage_->getFaces() ) + { + Face& face = *it.second; + + // get hold of the actual numerical data in the functions + real_t* _data_srcVertex = face.getData( src.getVertexDoFFunction().getFaceDataID() )->getPointer( level ); + real_t* _data_srcEdge = face.getData( src.getEdgeDoFFunction().getFaceDataID() )->getPointer( level ); + real_t* _data_dstVertex = face.getData( dst.getVertexDoFFunction().getFaceDataID() )->getPointer( level ); + real_t* _data_dstEdge = face.getData( dst.getEdgeDoFFunction().getFaceDataID() )->getPointer( level ); + real_t* _data_diffusivity_times_deltaVertex = + face.getData( diffusivity_times_delta.getVertexDoFFunction().getFaceDataID() )->getPointer( level ); + real_t* _data_diffusivity_times_deltaEdge = + face.getData( diffusivity_times_delta.getEdgeDoFFunction().getFaceDataID() )->getPointer( level ); + real_t* _data_wxVertex = face.getData( wx.getVertexDoFFunction().getFaceDataID() )->getPointer( level ); + real_t* _data_wxEdge = face.getData( wx.getEdgeDoFFunction().getFaceDataID() )->getPointer( level ); + real_t* _data_wyVertex = face.getData( wy.getVertexDoFFunction().getFaceDataID() )->getPointer( level ); + real_t* _data_wyEdge = face.getData( wy.getEdgeDoFFunction().getFaceDataID() )->getPointer( level ); + + // Zero out dst halos only + // + // This is also necessary when using update type == Add. + // During additive comm we then skip zeroing the data on the lower-dim primitives. + for ( const auto& idx : vertexdof::macroface::Iterator( level ) ) + { + if ( vertexdof::macroface::isVertexOnBoundary( level, idx ) ) + { + auto arrayIdx = vertexdof::macroface::index( level, idx.x(), idx.y() ); + _data_dstVertex[arrayIdx] = walberla::numeric_cast< real_t >( 0 ); + } + } + for ( const auto& idx : edgedof::macroface::Iterator( level ) ) + { + for ( const auto& orientation : edgedof::faceLocalEdgeDoFOrientations ) + { + if ( !edgedof::macroface::isInnerEdgeDoF( level, idx, orientation ) ) + { + auto arrayIdx = edgedof::macroface::index( level, idx.x(), idx.y(), orientation ); + _data_dstEdge[arrayIdx] = walberla::numeric_cast< real_t >( 0 ); + } + } + } + + const auto micro_edges_per_macro_edge = (int64_t) levelinfo::num_microedges_per_edge( level ); + const auto micro_edges_per_macro_edge_float = (real_t) levelinfo::num_microedges_per_edge( level ); + const real_t macro_vertex_coord_id_0comp0 = (real_t) face.getCoordinates()[0][0]; + const real_t macro_vertex_coord_id_0comp1 = (real_t) face.getCoordinates()[0][1]; + const real_t macro_vertex_coord_id_1comp0 = (real_t) face.getCoordinates()[1][0]; + const real_t macro_vertex_coord_id_1comp1 = (real_t) face.getCoordinates()[1][1]; + const real_t macro_vertex_coord_id_2comp0 = (real_t) face.getCoordinates()[2][0]; + const real_t macro_vertex_coord_id_2comp1 = (real_t) face.getCoordinates()[2][1]; + + this->timingTree_->start( "kernel" ); + + apply_P2ElementwiseSupgDiffusion_macro_2D( + + _data_diffusivity_times_deltaEdge, + _data_diffusivity_times_deltaVertex, + _data_dstEdge, + _data_dstVertex, + _data_srcEdge, + _data_srcVertex, + _data_wxEdge, + _data_wxVertex, + _data_wyEdge, + _data_wyVertex, + macro_vertex_coord_id_0comp0, + macro_vertex_coord_id_0comp1, + macro_vertex_coord_id_1comp0, + macro_vertex_coord_id_1comp1, + macro_vertex_coord_id_2comp0, + macro_vertex_coord_id_2comp1, + micro_edges_per_macro_edge, + micro_edges_per_macro_edge_float ); + + this->timingTree_->stop( "kernel" ); + } + + // Push result to lower-dimensional primitives + // + this->timingTree_->start( "post-communication" ); + // Note: We could avoid communication here by implementing the apply() also for the respective + // lower dimensional primitives! + dst.getVertexDoFFunction().communicateAdditively< Face, Edge >( + level, DoFType::All ^ flag, *storage_, updateType == Replace ); + dst.getVertexDoFFunction().communicateAdditively< Face, Vertex >( + level, DoFType::All ^ flag, *storage_, updateType == Replace ); + dst.getEdgeDoFFunction().communicateAdditively< Face, Edge >( + level, DoFType::All ^ flag, *storage_, updateType == Replace ); + this->timingTree_->stop( "post-communication" ); + } + + this->stopTiming( "apply" ); +} +void P2ElementwiseSupgDiffusion::toMatrix( const std::shared_ptr< SparseMatrixProxy >& mat, + const P2Function< idx_t >& src, + const P2Function< idx_t >& dst, + uint_t level, + DoFType flag ) const +{ + this->startTiming( "toMatrix" ); + + // We currently ignore the flag provided! + if ( flag != All ) + { + WALBERLA_LOG_WARNING_ON_ROOT( "Input flag ignored in toMatrix; using flag = All" ); + } + + if ( storage_->hasGlobalCells() ) + { + this->timingTree_->start( "pre-communication" ); + diffusivity_times_delta.communicate< Face, Cell >( level ); + diffusivity_times_delta.communicate< Edge, Cell >( level ); + diffusivity_times_delta.communicate< Vertex, Cell >( level ); + wx.communicate< Face, Cell >( level ); + wx.communicate< Edge, Cell >( level ); + wx.communicate< Vertex, Cell >( level ); + wy.communicate< Face, Cell >( level ); + wy.communicate< Edge, Cell >( level ); + wy.communicate< Vertex, Cell >( level ); + this->timingTree_->stop( "pre-communication" ); + + WALBERLA_ABORT( "Not implemented." ); + } + else + { + this->timingTree_->start( "pre-communication" ); + communication::syncFunctionBetweenPrimitives( diffusivity_times_delta, level, communication::syncDirection_t::LOW2HIGH ); + communication::syncFunctionBetweenPrimitives( wx, level, communication::syncDirection_t::LOW2HIGH ); + communication::syncFunctionBetweenPrimitives( wy, level, communication::syncDirection_t::LOW2HIGH ); + this->timingTree_->stop( "pre-communication" ); + + for ( auto& it : storage_->getFaces() ) + { + Face& face = *it.second; + + // get hold of the actual numerical data + idx_t* _data_srcVertex = face.getData( src.getVertexDoFFunction().getFaceDataID() )->getPointer( level ); + idx_t* _data_srcEdge = face.getData( src.getEdgeDoFFunction().getFaceDataID() )->getPointer( level ); + idx_t* _data_dstVertex = face.getData( dst.getVertexDoFFunction().getFaceDataID() )->getPointer( level ); + idx_t* _data_dstEdge = face.getData( dst.getEdgeDoFFunction().getFaceDataID() )->getPointer( level ); + real_t* _data_diffusivity_times_deltaVertex = + face.getData( diffusivity_times_delta.getVertexDoFFunction().getFaceDataID() )->getPointer( level ); + real_t* _data_diffusivity_times_deltaEdge = + face.getData( diffusivity_times_delta.getEdgeDoFFunction().getFaceDataID() )->getPointer( level ); + real_t* _data_wxVertex = face.getData( wx.getVertexDoFFunction().getFaceDataID() )->getPointer( level ); + real_t* _data_wxEdge = face.getData( wx.getEdgeDoFFunction().getFaceDataID() )->getPointer( level ); + real_t* _data_wyVertex = face.getData( wy.getVertexDoFFunction().getFaceDataID() )->getPointer( level ); + real_t* _data_wyEdge = face.getData( wy.getEdgeDoFFunction().getFaceDataID() )->getPointer( level ); + + const auto micro_edges_per_macro_edge = (int64_t) levelinfo::num_microedges_per_edge( level ); + const auto micro_edges_per_macro_edge_float = (real_t) levelinfo::num_microedges_per_edge( level ); + const real_t macro_vertex_coord_id_0comp0 = (real_t) face.getCoordinates()[0][0]; + const real_t macro_vertex_coord_id_0comp1 = (real_t) face.getCoordinates()[0][1]; + const real_t macro_vertex_coord_id_1comp0 = (real_t) face.getCoordinates()[1][0]; + const real_t macro_vertex_coord_id_1comp1 = (real_t) face.getCoordinates()[1][1]; + const real_t macro_vertex_coord_id_2comp0 = (real_t) face.getCoordinates()[2][0]; + const real_t macro_vertex_coord_id_2comp1 = (real_t) face.getCoordinates()[2][1]; + + this->timingTree_->start( "kernel" ); + + toMatrix_P2ElementwiseSupgDiffusion_macro_2D( + + _data_diffusivity_times_deltaEdge, + _data_diffusivity_times_deltaVertex, + _data_dstEdge, + _data_dstVertex, + _data_srcEdge, + _data_srcVertex, + _data_wxEdge, + _data_wxVertex, + _data_wyEdge, + _data_wyVertex, + macro_vertex_coord_id_0comp0, + macro_vertex_coord_id_0comp1, + macro_vertex_coord_id_1comp0, + macro_vertex_coord_id_1comp1, + macro_vertex_coord_id_2comp0, + macro_vertex_coord_id_2comp1, + mat, + micro_edges_per_macro_edge, + micro_edges_per_macro_edge_float ); + + this->timingTree_->stop( "kernel" ); + } + } + this->stopTiming( "toMatrix" ); +} +void P2ElementwiseSupgDiffusion::computeInverseDiagonalOperatorValues() +{ + this->startTiming( "computeInverseDiagonalOperatorValues" ); + + if ( invDiag_ == nullptr ) + { + invDiag_ = std::make_shared< P2Function< real_t > >( "inverse diagonal entries", storage_, minLevel_, maxLevel_ ); + } + + for ( uint_t level = minLevel_; level <= maxLevel_; level++ ) + { + invDiag_->setToZero( level ); + + if ( storage_->hasGlobalCells() ) + { + this->timingTree_->start( "pre-communication" ); + diffusivity_times_delta.communicate< Face, Cell >( level ); + diffusivity_times_delta.communicate< Edge, Cell >( level ); + diffusivity_times_delta.communicate< Vertex, Cell >( level ); + wx.communicate< Face, Cell >( level ); + wx.communicate< Edge, Cell >( level ); + wx.communicate< Vertex, Cell >( level ); + wy.communicate< Face, Cell >( level ); + wy.communicate< Edge, Cell >( level ); + wy.communicate< Vertex, Cell >( level ); + this->timingTree_->stop( "pre-communication" ); + + WALBERLA_ABORT( "Not implemented." ); + ( *invDiag_ ).invertElementwise( level ); + } + else + { + this->timingTree_->start( "pre-communication" ); + communication::syncFunctionBetweenPrimitives( diffusivity_times_delta, level, communication::syncDirection_t::LOW2HIGH ); + communication::syncFunctionBetweenPrimitives( wx, level, communication::syncDirection_t::LOW2HIGH ); + communication::syncFunctionBetweenPrimitives( wy, level, communication::syncDirection_t::LOW2HIGH ); + this->timingTree_->stop( "pre-communication" ); + + for ( auto& it : storage_->getFaces() ) + { + Face& face = *it.second; + + // get hold of the actual numerical data + real_t* _data_invDiag_Vertex = + face.getData( ( *invDiag_ ).getVertexDoFFunction().getFaceDataID() )->getPointer( level ); + real_t* _data_invDiag_Edge = face.getData( ( *invDiag_ ).getEdgeDoFFunction().getFaceDataID() )->getPointer( level ); + real_t* _data_diffusivity_times_deltaVertex = + face.getData( diffusivity_times_delta.getVertexDoFFunction().getFaceDataID() )->getPointer( level ); + real_t* _data_diffusivity_times_deltaEdge = + face.getData( diffusivity_times_delta.getEdgeDoFFunction().getFaceDataID() )->getPointer( level ); + real_t* _data_wxVertex = face.getData( wx.getVertexDoFFunction().getFaceDataID() )->getPointer( level ); + real_t* _data_wxEdge = face.getData( wx.getEdgeDoFFunction().getFaceDataID() )->getPointer( level ); + real_t* _data_wyVertex = face.getData( wy.getVertexDoFFunction().getFaceDataID() )->getPointer( level ); + real_t* _data_wyEdge = face.getData( wy.getEdgeDoFFunction().getFaceDataID() )->getPointer( level ); + + const auto micro_edges_per_macro_edge = (int64_t) levelinfo::num_microedges_per_edge( level ); + const auto micro_edges_per_macro_edge_float = (real_t) levelinfo::num_microedges_per_edge( level ); + const real_t macro_vertex_coord_id_0comp0 = (real_t) face.getCoordinates()[0][0]; + const real_t macro_vertex_coord_id_0comp1 = (real_t) face.getCoordinates()[0][1]; + const real_t macro_vertex_coord_id_1comp0 = (real_t) face.getCoordinates()[1][0]; + const real_t macro_vertex_coord_id_1comp1 = (real_t) face.getCoordinates()[1][1]; + const real_t macro_vertex_coord_id_2comp0 = (real_t) face.getCoordinates()[2][0]; + const real_t macro_vertex_coord_id_2comp1 = (real_t) face.getCoordinates()[2][1]; + + this->timingTree_->start( "kernel" ); + + computeInverseDiagonalOperatorValues_P2ElementwiseSupgDiffusion_macro_2D( + + _data_diffusivity_times_deltaEdge, + _data_diffusivity_times_deltaVertex, + _data_invDiag_Edge, + _data_invDiag_Vertex, + _data_wxEdge, + _data_wxVertex, + _data_wyEdge, + _data_wyVertex, + macro_vertex_coord_id_0comp0, + macro_vertex_coord_id_0comp1, + macro_vertex_coord_id_1comp0, + macro_vertex_coord_id_1comp1, + macro_vertex_coord_id_2comp0, + macro_vertex_coord_id_2comp1, + micro_edges_per_macro_edge, + micro_edges_per_macro_edge_float ); + + this->timingTree_->stop( "kernel" ); + } + + // Push result to lower-dimensional primitives + // + this->timingTree_->start( "post-communication" ); + // Note: We could avoid communication here by implementing the apply() also for the respective + // lower dimensional primitives! + ( *invDiag_ ).getVertexDoFFunction().communicateAdditively< Face, Edge >( level ); + ( *invDiag_ ).getVertexDoFFunction().communicateAdditively< Face, Vertex >( level ); + ( *invDiag_ ).getEdgeDoFFunction().communicateAdditively< Face, Edge >( level ); + this->timingTree_->stop( "post-communication" ); + ( *invDiag_ ).invertElementwise( level ); + } + } + + this->stopTiming( "computeInverseDiagonalOperatorValues" ); +} +std::shared_ptr< P2Function< real_t > > P2ElementwiseSupgDiffusion::getInverseDiagonalValues() const +{ + return invDiag_; +} + +} // namespace operatorgeneration + +} // namespace hyteg diff --git a/operators/supg_diffusion/P2ElementwiseSupgDiffusion.hpp b/operators/supg_diffusion/P2ElementwiseSupgDiffusion.hpp new file mode 100644 index 0000000000000000000000000000000000000000..fd879661a7fd3317aed0dd48293b7af859dfc3a8 --- /dev/null +++ b/operators/supg_diffusion/P2ElementwiseSupgDiffusion.hpp @@ -0,0 +1,172 @@ +/* +* Copyright (c) 2017-2024 Nils Kohl, Daniel Bauer, Fabian Böhm. +* +* This file is part of HyTeG +* (see https://i10git.cs.fau.de/hyteg/hyteg). +* +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. +* +* You should have received a copy of the GNU General Public License +* along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +/* +* The entire file was generated with the HyTeG Operator Generator. +* +* Avoid modifying this file. If buggy, consider fixing the generator itself. +*/ + +#pragma once + +#include "core/DataTypes.h" + +#include "hyteg/LikwidWrapper.hpp" +#include "hyteg/boundary/BoundaryConditions.hpp" +#include "hyteg/communication/Syncing.hpp" +#include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp" +#include "hyteg/operators/Operator.hpp" +#include "hyteg/p2functionspace/P2Function.hpp" +#include "hyteg/primitivestorage/PrimitiveStorage.hpp" +#include "hyteg/solvers/Smoothables.hpp" +#include "hyteg/sparseassembly/SparseMatrixProxy.hpp" +#include "hyteg/types/types.hpp" + +#define FUNC_PREFIX + +namespace hyteg { + +namespace operatorgeneration { + +class P2ElementwiseSupgDiffusion : public Operator< P2Function< real_t >, P2Function< real_t > >, + public OperatorWithInverseDiagonal< P2Function< real_t > > +{ + public: + P2ElementwiseSupgDiffusion( const std::shared_ptr< PrimitiveStorage >& storage, + size_t minLevel, + size_t maxLevel, + const P2Function< real_t >& _diffusivity_times_delta, + const P2Function< real_t >& _wx, + const P2Function< real_t >& _wy ); + + void apply( const P2Function< real_t >& src, + const P2Function< real_t >& dst, + uint_t level, + DoFType flag, + UpdateType updateType = Replace ) const; + + void toMatrix( const std::shared_ptr< SparseMatrixProxy >& mat, + const P2Function< idx_t >& src, + const P2Function< idx_t >& dst, + uint_t level, + DoFType flag ) const; + + void computeInverseDiagonalOperatorValues(); + + std::shared_ptr< P2Function< real_t > > getInverseDiagonalValues() const; + + protected: + private: + /// Integral: P2ElementwiseSupgDiffusion + /// - volume element: triangle, dim: 2, vertices: 3, spacedim: 2 + /// - kernel type: apply + /// - loop strategy: SAWTOOTH + /// - quadrature rule: Dunavant 3 | points: 4, degree: 3 + /// - blending map: IdentityMap + /// - operations per element: + /// adds muls divs pows abs assignments function_calls unknown_ops + /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- + /// 472 584 12 0 0 0 0 1 + void apply_P2ElementwiseSupgDiffusion_macro_2D( real_t* RESTRICT _data_diffusivity_times_deltaEdge, + real_t* RESTRICT _data_diffusivity_times_deltaVertex, + real_t* RESTRICT _data_dstEdge, + real_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_srcEdge, + real_t* RESTRICT _data_srcVertex, + real_t* RESTRICT _data_wxEdge, + real_t* RESTRICT _data_wxVertex, + real_t* RESTRICT _data_wyEdge, + real_t* RESTRICT _data_wyVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; + + /// Integral: P2ElementwiseSupgDiffusion + /// - volume element: triangle, dim: 2, vertices: 3, spacedim: 2 + /// - kernel type: toMatrix + /// - loop strategy: SAWTOOTH + /// - quadrature rule: Dunavant 3 | points: 4, degree: 3 + /// - blending map: IdentityMap + /// - operations per element: + /// adds muls divs pows abs assignments function_calls unknown_ops + /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- + /// 436 548 12 0 0 0 0 4 + void toMatrix_P2ElementwiseSupgDiffusion_macro_2D( real_t* RESTRICT _data_diffusivity_times_deltaEdge, + real_t* RESTRICT _data_diffusivity_times_deltaVertex, + idx_t* RESTRICT _data_dstEdge, + idx_t* RESTRICT _data_dstVertex, + idx_t* RESTRICT _data_srcEdge, + idx_t* RESTRICT _data_srcVertex, + real_t* RESTRICT _data_wxEdge, + real_t* RESTRICT _data_wxVertex, + real_t* RESTRICT _data_wyEdge, + real_t* RESTRICT _data_wyVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + std::shared_ptr< SparseMatrixProxy > mat, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; + + /// Integral: P2ElementwiseSupgDiffusion + /// - volume element: triangle, dim: 2, vertices: 3, spacedim: 2 + /// - kernel type: computeInverseDiagonalOperatorValues + /// - loop strategy: SAWTOOTH + /// - quadrature rule: Dunavant 3 | points: 4, degree: 3 + /// - blending map: IdentityMap + /// - operations per element: + /// adds muls divs pows abs assignments function_calls unknown_ops + /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- + /// 322 428 12 0 0 0 0 1 + void computeInverseDiagonalOperatorValues_P2ElementwiseSupgDiffusion_macro_2D( + real_t* RESTRICT _data_diffusivity_times_deltaEdge, + real_t* RESTRICT _data_diffusivity_times_deltaVertex, + real_t* RESTRICT _data_invDiag_Edge, + real_t* RESTRICT _data_invDiag_Vertex, + real_t* RESTRICT _data_wxEdge, + real_t* RESTRICT _data_wxVertex, + real_t* RESTRICT _data_wyEdge, + real_t* RESTRICT _data_wyVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float ) const; + + std::shared_ptr< P2Function< real_t > > invDiag_; + P2Function< real_t > diffusivity_times_delta; + P2Function< real_t > wx; + P2Function< real_t > wy; +}; + +} // namespace operatorgeneration + +} // namespace hyteg diff --git a/operators/supg_diffusion/P2ElementwiseSupgDiffusionAnnulusMap.cpp b/operators/supg_diffusion/P2ElementwiseSupgDiffusionAnnulusMap.cpp new file mode 100644 index 0000000000000000000000000000000000000000..da784554b119666cbeaea7787448492c560e15e1 --- /dev/null +++ b/operators/supg_diffusion/P2ElementwiseSupgDiffusionAnnulusMap.cpp @@ -0,0 +1,454 @@ +/* +* Copyright (c) 2017-2024 Nils Kohl, Daniel Bauer, Fabian Böhm. +* +* This file is part of HyTeG +* (see https://i10git.cs.fau.de/hyteg/hyteg). +* +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. +* +* You should have received a copy of the GNU General Public License +* along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +/* +* The entire file was generated with the HyTeG Operator Generator. +* +* Avoid modifying this file. If buggy, consider fixing the generator itself. +*/ + +// Unfortunately, the inverse diagonal kernel wrapper triggers a GCC bug (maybe +// (related to) https://gcc.gnu.org/bugzilla/show_bug.cgi?id=107087) causing a +// warning in an internal standard library header (bits/stl_algobase.h). As a +// workaround, we disable the warning and include this header indirectly through +// a public header. +#include <waLBerlaDefinitions.h> +#ifdef WALBERLA_CXX_COMPILER_IS_GNU +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wnonnull" +#endif +#include <cmath> +#ifdef WALBERLA_CXX_COMPILER_IS_GNU +#pragma GCC diagnostic pop +#endif + +#include "P2ElementwiseSupgDiffusionAnnulusMap.hpp" + +#define FUNC_PREFIX + +namespace hyteg { + +namespace operatorgeneration { + +P2ElementwiseSupgDiffusionAnnulusMap::P2ElementwiseSupgDiffusionAnnulusMap( const std::shared_ptr< PrimitiveStorage >& storage, + size_t minLevel, + size_t maxLevel, + const P2Function< real_t >& _diffusivity_times_delta, + const P2Function< real_t >& _wx, + const P2Function< real_t >& _wy ) +: Operator( storage, minLevel, maxLevel ) +, diffusivity_times_delta( _diffusivity_times_delta ) +, wx( _wx ) +, wy( _wy ) +{} + +void P2ElementwiseSupgDiffusionAnnulusMap::apply( const P2Function< real_t >& src, + const P2Function< real_t >& dst, + uint_t level, + DoFType flag, + UpdateType updateType ) const +{ + this->startTiming( "apply" ); + + // Make sure that halos are up-to-date + this->timingTree_->start( "pre-communication" ); + if ( this->storage_->hasGlobalCells() ) + { + WALBERLA_ABORT( "Not implemented." ); + } + else + { + communication::syncFunctionBetweenPrimitives( src, level, communication::syncDirection_t::LOW2HIGH ); + communication::syncFunctionBetweenPrimitives( diffusivity_times_delta, level, communication::syncDirection_t::LOW2HIGH ); + communication::syncFunctionBetweenPrimitives( wx, level, communication::syncDirection_t::LOW2HIGH ); + communication::syncFunctionBetweenPrimitives( wy, level, communication::syncDirection_t::LOW2HIGH ); + } + this->timingTree_->stop( "pre-communication" ); + + if ( updateType == Replace ) + { + // We need to zero the destination array (including halos). + // However, we must not zero out anything that is not flagged with the specified BCs. + // Therefore, we first zero out everything that flagged, and then, later, + // the halos of the highest dim primitives. + dst.interpolate( walberla::numeric_cast< real_t >( 0 ), level, flag ); + } + + if ( storage_->hasGlobalCells() ) + { + WALBERLA_ABORT( "Not implemented." ); + } + else + { + for ( auto& it : storage_->getFaces() ) + { + Face& face = *it.second; + + // get hold of the actual numerical data in the functions + real_t* _data_srcVertex = face.getData( src.getVertexDoFFunction().getFaceDataID() )->getPointer( level ); + real_t* _data_srcEdge = face.getData( src.getEdgeDoFFunction().getFaceDataID() )->getPointer( level ); + real_t* _data_dstVertex = face.getData( dst.getVertexDoFFunction().getFaceDataID() )->getPointer( level ); + real_t* _data_dstEdge = face.getData( dst.getEdgeDoFFunction().getFaceDataID() )->getPointer( level ); + real_t* _data_diffusivity_times_deltaVertex = + face.getData( diffusivity_times_delta.getVertexDoFFunction().getFaceDataID() )->getPointer( level ); + real_t* _data_diffusivity_times_deltaEdge = + face.getData( diffusivity_times_delta.getEdgeDoFFunction().getFaceDataID() )->getPointer( level ); + real_t* _data_wxVertex = face.getData( wx.getVertexDoFFunction().getFaceDataID() )->getPointer( level ); + real_t* _data_wxEdge = face.getData( wx.getEdgeDoFFunction().getFaceDataID() )->getPointer( level ); + real_t* _data_wyVertex = face.getData( wy.getVertexDoFFunction().getFaceDataID() )->getPointer( level ); + real_t* _data_wyEdge = face.getData( wy.getEdgeDoFFunction().getFaceDataID() )->getPointer( level ); + + // Zero out dst halos only + // + // This is also necessary when using update type == Add. + // During additive comm we then skip zeroing the data on the lower-dim primitives. + for ( const auto& idx : vertexdof::macroface::Iterator( level ) ) + { + if ( vertexdof::macroface::isVertexOnBoundary( level, idx ) ) + { + auto arrayIdx = vertexdof::macroface::index( level, idx.x(), idx.y() ); + _data_dstVertex[arrayIdx] = walberla::numeric_cast< real_t >( 0 ); + } + } + for ( const auto& idx : edgedof::macroface::Iterator( level ) ) + { + for ( const auto& orientation : edgedof::faceLocalEdgeDoFOrientations ) + { + if ( !edgedof::macroface::isInnerEdgeDoF( level, idx, orientation ) ) + { + auto arrayIdx = edgedof::macroface::index( level, idx.x(), idx.y(), orientation ); + _data_dstEdge[arrayIdx] = walberla::numeric_cast< real_t >( 0 ); + } + } + } + + const auto micro_edges_per_macro_edge = (int64_t) levelinfo::num_microedges_per_edge( level ); + const auto micro_edges_per_macro_edge_float = (real_t) levelinfo::num_microedges_per_edge( level ); + const real_t macro_vertex_coord_id_0comp0 = (real_t) face.getCoordinates()[0][0]; + const real_t macro_vertex_coord_id_0comp1 = (real_t) face.getCoordinates()[0][1]; + const real_t macro_vertex_coord_id_1comp0 = (real_t) face.getCoordinates()[1][0]; + const real_t macro_vertex_coord_id_1comp1 = (real_t) face.getCoordinates()[1][1]; + const real_t macro_vertex_coord_id_2comp0 = (real_t) face.getCoordinates()[2][0]; + const real_t macro_vertex_coord_id_2comp1 = (real_t) face.getCoordinates()[2][1]; + WALBERLA_CHECK_NOT_NULLPTR( + std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() ), + "This operator requires the AnnulusMap to be registered as GeometryMap on every macro-cell." ) + real_t radRefVertex = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->radRefVertex(); + real_t radRayVertex = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->radRayVertex(); + real_t refVertex_0 = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->refVertex()[0]; + real_t rayVertex_0 = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->rayVertex()[0]; + real_t thrVertex_0 = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->thrVertex()[0]; + real_t refVertex_1 = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->refVertex()[1]; + real_t rayVertex_1 = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->rayVertex()[1]; + real_t thrVertex_1 = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->thrVertex()[1]; + + this->timingTree_->start( "kernel" ); + + apply_P2ElementwiseSupgDiffusionAnnulusMap_macro_2D( + + _data_diffusivity_times_deltaEdge, + _data_diffusivity_times_deltaVertex, + _data_dstEdge, + _data_dstVertex, + _data_srcEdge, + _data_srcVertex, + _data_wxEdge, + _data_wxVertex, + _data_wyEdge, + _data_wyVertex, + macro_vertex_coord_id_0comp0, + macro_vertex_coord_id_0comp1, + macro_vertex_coord_id_1comp0, + macro_vertex_coord_id_1comp1, + macro_vertex_coord_id_2comp0, + macro_vertex_coord_id_2comp1, + micro_edges_per_macro_edge, + micro_edges_per_macro_edge_float, + radRayVertex, + radRefVertex, + rayVertex_0, + rayVertex_1, + refVertex_0, + refVertex_1, + thrVertex_0, + thrVertex_1 ); + + this->timingTree_->stop( "kernel" ); + } + + // Push result to lower-dimensional primitives + // + this->timingTree_->start( "post-communication" ); + // Note: We could avoid communication here by implementing the apply() also for the respective + // lower dimensional primitives! + dst.getVertexDoFFunction().communicateAdditively< Face, Edge >( + level, DoFType::All ^ flag, *storage_, updateType == Replace ); + dst.getVertexDoFFunction().communicateAdditively< Face, Vertex >( + level, DoFType::All ^ flag, *storage_, updateType == Replace ); + dst.getEdgeDoFFunction().communicateAdditively< Face, Edge >( + level, DoFType::All ^ flag, *storage_, updateType == Replace ); + this->timingTree_->stop( "post-communication" ); + } + + this->stopTiming( "apply" ); +} +void P2ElementwiseSupgDiffusionAnnulusMap::toMatrix( const std::shared_ptr< SparseMatrixProxy >& mat, + const P2Function< idx_t >& src, + const P2Function< idx_t >& dst, + uint_t level, + DoFType flag ) const +{ + this->startTiming( "toMatrix" ); + + // We currently ignore the flag provided! + if ( flag != All ) + { + WALBERLA_LOG_WARNING_ON_ROOT( "Input flag ignored in toMatrix; using flag = All" ); + } + + if ( storage_->hasGlobalCells() ) + { + this->timingTree_->start( "pre-communication" ); + diffusivity_times_delta.communicate< Face, Cell >( level ); + diffusivity_times_delta.communicate< Edge, Cell >( level ); + diffusivity_times_delta.communicate< Vertex, Cell >( level ); + wx.communicate< Face, Cell >( level ); + wx.communicate< Edge, Cell >( level ); + wx.communicate< Vertex, Cell >( level ); + wy.communicate< Face, Cell >( level ); + wy.communicate< Edge, Cell >( level ); + wy.communicate< Vertex, Cell >( level ); + this->timingTree_->stop( "pre-communication" ); + + WALBERLA_ABORT( "Not implemented." ); + } + else + { + this->timingTree_->start( "pre-communication" ); + communication::syncFunctionBetweenPrimitives( diffusivity_times_delta, level, communication::syncDirection_t::LOW2HIGH ); + communication::syncFunctionBetweenPrimitives( wx, level, communication::syncDirection_t::LOW2HIGH ); + communication::syncFunctionBetweenPrimitives( wy, level, communication::syncDirection_t::LOW2HIGH ); + this->timingTree_->stop( "pre-communication" ); + + for ( auto& it : storage_->getFaces() ) + { + Face& face = *it.second; + + // get hold of the actual numerical data + idx_t* _data_srcVertex = face.getData( src.getVertexDoFFunction().getFaceDataID() )->getPointer( level ); + idx_t* _data_srcEdge = face.getData( src.getEdgeDoFFunction().getFaceDataID() )->getPointer( level ); + idx_t* _data_dstVertex = face.getData( dst.getVertexDoFFunction().getFaceDataID() )->getPointer( level ); + idx_t* _data_dstEdge = face.getData( dst.getEdgeDoFFunction().getFaceDataID() )->getPointer( level ); + real_t* _data_diffusivity_times_deltaVertex = + face.getData( diffusivity_times_delta.getVertexDoFFunction().getFaceDataID() )->getPointer( level ); + real_t* _data_diffusivity_times_deltaEdge = + face.getData( diffusivity_times_delta.getEdgeDoFFunction().getFaceDataID() )->getPointer( level ); + real_t* _data_wxVertex = face.getData( wx.getVertexDoFFunction().getFaceDataID() )->getPointer( level ); + real_t* _data_wxEdge = face.getData( wx.getEdgeDoFFunction().getFaceDataID() )->getPointer( level ); + real_t* _data_wyVertex = face.getData( wy.getVertexDoFFunction().getFaceDataID() )->getPointer( level ); + real_t* _data_wyEdge = face.getData( wy.getEdgeDoFFunction().getFaceDataID() )->getPointer( level ); + + const auto micro_edges_per_macro_edge = (int64_t) levelinfo::num_microedges_per_edge( level ); + const auto micro_edges_per_macro_edge_float = (real_t) levelinfo::num_microedges_per_edge( level ); + const real_t macro_vertex_coord_id_0comp0 = (real_t) face.getCoordinates()[0][0]; + const real_t macro_vertex_coord_id_0comp1 = (real_t) face.getCoordinates()[0][1]; + const real_t macro_vertex_coord_id_1comp0 = (real_t) face.getCoordinates()[1][0]; + const real_t macro_vertex_coord_id_1comp1 = (real_t) face.getCoordinates()[1][1]; + const real_t macro_vertex_coord_id_2comp0 = (real_t) face.getCoordinates()[2][0]; + const real_t macro_vertex_coord_id_2comp1 = (real_t) face.getCoordinates()[2][1]; + WALBERLA_CHECK_NOT_NULLPTR( + std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() ), + "This operator requires the AnnulusMap to be registered as GeometryMap on every macro-cell." ) + real_t radRefVertex = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->radRefVertex(); + real_t radRayVertex = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->radRayVertex(); + real_t refVertex_0 = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->refVertex()[0]; + real_t rayVertex_0 = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->rayVertex()[0]; + real_t thrVertex_0 = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->thrVertex()[0]; + real_t refVertex_1 = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->refVertex()[1]; + real_t rayVertex_1 = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->rayVertex()[1]; + real_t thrVertex_1 = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->thrVertex()[1]; + + this->timingTree_->start( "kernel" ); + + toMatrix_P2ElementwiseSupgDiffusionAnnulusMap_macro_2D( + + _data_diffusivity_times_deltaEdge, + _data_diffusivity_times_deltaVertex, + _data_dstEdge, + _data_dstVertex, + _data_srcEdge, + _data_srcVertex, + _data_wxEdge, + _data_wxVertex, + _data_wyEdge, + _data_wyVertex, + macro_vertex_coord_id_0comp0, + macro_vertex_coord_id_0comp1, + macro_vertex_coord_id_1comp0, + macro_vertex_coord_id_1comp1, + macro_vertex_coord_id_2comp0, + macro_vertex_coord_id_2comp1, + mat, + micro_edges_per_macro_edge, + micro_edges_per_macro_edge_float, + radRayVertex, + radRefVertex, + rayVertex_0, + rayVertex_1, + refVertex_0, + refVertex_1, + thrVertex_0, + thrVertex_1 ); + + this->timingTree_->stop( "kernel" ); + } + } + this->stopTiming( "toMatrix" ); +} +void P2ElementwiseSupgDiffusionAnnulusMap::computeInverseDiagonalOperatorValues() +{ + this->startTiming( "computeInverseDiagonalOperatorValues" ); + + if ( invDiag_ == nullptr ) + { + invDiag_ = std::make_shared< P2Function< real_t > >( "inverse diagonal entries", storage_, minLevel_, maxLevel_ ); + } + + for ( uint_t level = minLevel_; level <= maxLevel_; level++ ) + { + invDiag_->setToZero( level ); + + if ( storage_->hasGlobalCells() ) + { + this->timingTree_->start( "pre-communication" ); + diffusivity_times_delta.communicate< Face, Cell >( level ); + diffusivity_times_delta.communicate< Edge, Cell >( level ); + diffusivity_times_delta.communicate< Vertex, Cell >( level ); + wx.communicate< Face, Cell >( level ); + wx.communicate< Edge, Cell >( level ); + wx.communicate< Vertex, Cell >( level ); + wy.communicate< Face, Cell >( level ); + wy.communicate< Edge, Cell >( level ); + wy.communicate< Vertex, Cell >( level ); + this->timingTree_->stop( "pre-communication" ); + + WALBERLA_ABORT( "Not implemented." ); + ( *invDiag_ ).invertElementwise( level ); + } + else + { + this->timingTree_->start( "pre-communication" ); + communication::syncFunctionBetweenPrimitives( diffusivity_times_delta, level, communication::syncDirection_t::LOW2HIGH ); + communication::syncFunctionBetweenPrimitives( wx, level, communication::syncDirection_t::LOW2HIGH ); + communication::syncFunctionBetweenPrimitives( wy, level, communication::syncDirection_t::LOW2HIGH ); + this->timingTree_->stop( "pre-communication" ); + + for ( auto& it : storage_->getFaces() ) + { + Face& face = *it.second; + + // get hold of the actual numerical data + real_t* _data_invDiag_Vertex = + face.getData( ( *invDiag_ ).getVertexDoFFunction().getFaceDataID() )->getPointer( level ); + real_t* _data_invDiag_Edge = face.getData( ( *invDiag_ ).getEdgeDoFFunction().getFaceDataID() )->getPointer( level ); + real_t* _data_diffusivity_times_deltaVertex = + face.getData( diffusivity_times_delta.getVertexDoFFunction().getFaceDataID() )->getPointer( level ); + real_t* _data_diffusivity_times_deltaEdge = + face.getData( diffusivity_times_delta.getEdgeDoFFunction().getFaceDataID() )->getPointer( level ); + real_t* _data_wxVertex = face.getData( wx.getVertexDoFFunction().getFaceDataID() )->getPointer( level ); + real_t* _data_wxEdge = face.getData( wx.getEdgeDoFFunction().getFaceDataID() )->getPointer( level ); + real_t* _data_wyVertex = face.getData( wy.getVertexDoFFunction().getFaceDataID() )->getPointer( level ); + real_t* _data_wyEdge = face.getData( wy.getEdgeDoFFunction().getFaceDataID() )->getPointer( level ); + + const auto micro_edges_per_macro_edge = (int64_t) levelinfo::num_microedges_per_edge( level ); + const auto micro_edges_per_macro_edge_float = (real_t) levelinfo::num_microedges_per_edge( level ); + const real_t macro_vertex_coord_id_0comp0 = (real_t) face.getCoordinates()[0][0]; + const real_t macro_vertex_coord_id_0comp1 = (real_t) face.getCoordinates()[0][1]; + const real_t macro_vertex_coord_id_1comp0 = (real_t) face.getCoordinates()[1][0]; + const real_t macro_vertex_coord_id_1comp1 = (real_t) face.getCoordinates()[1][1]; + const real_t macro_vertex_coord_id_2comp0 = (real_t) face.getCoordinates()[2][0]; + const real_t macro_vertex_coord_id_2comp1 = (real_t) face.getCoordinates()[2][1]; + WALBERLA_CHECK_NOT_NULLPTR( + std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() ), + "This operator requires the AnnulusMap to be registered as GeometryMap on every macro-cell." ) + real_t radRefVertex = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->radRefVertex(); + real_t radRayVertex = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->radRayVertex(); + real_t refVertex_0 = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->refVertex()[0]; + real_t rayVertex_0 = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->rayVertex()[0]; + real_t thrVertex_0 = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->thrVertex()[0]; + real_t refVertex_1 = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->refVertex()[1]; + real_t rayVertex_1 = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->rayVertex()[1]; + real_t thrVertex_1 = std::dynamic_pointer_cast< AnnulusMap >( face.getGeometryMap() )->thrVertex()[1]; + + this->timingTree_->start( "kernel" ); + + computeInverseDiagonalOperatorValues_P2ElementwiseSupgDiffusionAnnulusMap_macro_2D( + + _data_diffusivity_times_deltaEdge, + _data_diffusivity_times_deltaVertex, + _data_invDiag_Edge, + _data_invDiag_Vertex, + _data_wxEdge, + _data_wxVertex, + _data_wyEdge, + _data_wyVertex, + macro_vertex_coord_id_0comp0, + macro_vertex_coord_id_0comp1, + macro_vertex_coord_id_1comp0, + macro_vertex_coord_id_1comp1, + macro_vertex_coord_id_2comp0, + macro_vertex_coord_id_2comp1, + micro_edges_per_macro_edge, + micro_edges_per_macro_edge_float, + radRayVertex, + radRefVertex, + rayVertex_0, + rayVertex_1, + refVertex_0, + refVertex_1, + thrVertex_0, + thrVertex_1 ); + + this->timingTree_->stop( "kernel" ); + } + + // Push result to lower-dimensional primitives + // + this->timingTree_->start( "post-communication" ); + // Note: We could avoid communication here by implementing the apply() also for the respective + // lower dimensional primitives! + ( *invDiag_ ).getVertexDoFFunction().communicateAdditively< Face, Edge >( level ); + ( *invDiag_ ).getVertexDoFFunction().communicateAdditively< Face, Vertex >( level ); + ( *invDiag_ ).getEdgeDoFFunction().communicateAdditively< Face, Edge >( level ); + this->timingTree_->stop( "post-communication" ); + ( *invDiag_ ).invertElementwise( level ); + } + } + + this->stopTiming( "computeInverseDiagonalOperatorValues" ); +} +std::shared_ptr< P2Function< real_t > > P2ElementwiseSupgDiffusionAnnulusMap::getInverseDiagonalValues() const +{ + return invDiag_; +} + +} // namespace operatorgeneration + +} // namespace hyteg diff --git a/operators/supg_diffusion/P2ElementwiseSupgDiffusionAnnulusMap.hpp b/operators/supg_diffusion/P2ElementwiseSupgDiffusionAnnulusMap.hpp new file mode 100644 index 0000000000000000000000000000000000000000..62a4d379c3898759d16be34ead8b2a841e53528c --- /dev/null +++ b/operators/supg_diffusion/P2ElementwiseSupgDiffusionAnnulusMap.hpp @@ -0,0 +1,197 @@ +/* +* Copyright (c) 2017-2024 Nils Kohl, Daniel Bauer, Fabian Böhm. +* +* This file is part of HyTeG +* (see https://i10git.cs.fau.de/hyteg/hyteg). +* +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. +* +* You should have received a copy of the GNU General Public License +* along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +/* +* The entire file was generated with the HyTeG Operator Generator. +* +* Avoid modifying this file. If buggy, consider fixing the generator itself. +*/ + +#pragma once + +#include "core/DataTypes.h" + +#include "hyteg/LikwidWrapper.hpp" +#include "hyteg/boundary/BoundaryConditions.hpp" +#include "hyteg/communication/Syncing.hpp" +#include "hyteg/edgedofspace/EdgeDoFMacroCell.hpp" +#include "hyteg/geometry/AnnulusMap.hpp" +#include "hyteg/operators/Operator.hpp" +#include "hyteg/p2functionspace/P2Function.hpp" +#include "hyteg/primitivestorage/PrimitiveStorage.hpp" +#include "hyteg/solvers/Smoothables.hpp" +#include "hyteg/sparseassembly/SparseMatrixProxy.hpp" +#include "hyteg/types/types.hpp" + +#define FUNC_PREFIX + +namespace hyteg { + +namespace operatorgeneration { + +class P2ElementwiseSupgDiffusionAnnulusMap : public Operator< P2Function< real_t >, P2Function< real_t > >, + public OperatorWithInverseDiagonal< P2Function< real_t > > +{ + public: + P2ElementwiseSupgDiffusionAnnulusMap( const std::shared_ptr< PrimitiveStorage >& storage, + size_t minLevel, + size_t maxLevel, + const P2Function< real_t >& _diffusivity_times_delta, + const P2Function< real_t >& _wx, + const P2Function< real_t >& _wy ); + + void apply( const P2Function< real_t >& src, + const P2Function< real_t >& dst, + uint_t level, + DoFType flag, + UpdateType updateType = Replace ) const; + + void toMatrix( const std::shared_ptr< SparseMatrixProxy >& mat, + const P2Function< idx_t >& src, + const P2Function< idx_t >& dst, + uint_t level, + DoFType flag ) const; + + void computeInverseDiagonalOperatorValues(); + + std::shared_ptr< P2Function< real_t > > getInverseDiagonalValues() const; + + protected: + private: + /// Integral: P2ElementwiseSupgDiffusionAnnulusMap + /// - volume element: triangle, dim: 2, vertices: 3, spacedim: 2 + /// - kernel type: apply + /// - loop strategy: SAWTOOTH + /// - quadrature rule: Dunavant 3 | points: 4, degree: 3 + /// - blending map: AnnulusMap + /// - operations per element: + /// adds muls divs pows abs assignments function_calls unknown_ops + /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- + /// 1140 1756 20 12 0 0 0 1 + void apply_P2ElementwiseSupgDiffusionAnnulusMap_macro_2D( real_t* RESTRICT _data_diffusivity_times_deltaEdge, + real_t* RESTRICT _data_diffusivity_times_deltaVertex, + real_t* RESTRICT _data_dstEdge, + real_t* RESTRICT _data_dstVertex, + real_t* RESTRICT _data_srcEdge, + real_t* RESTRICT _data_srcVertex, + real_t* RESTRICT _data_wxEdge, + real_t* RESTRICT _data_wxVertex, + real_t* RESTRICT _data_wyEdge, + real_t* RESTRICT _data_wyVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t refVertex_0, + real_t refVertex_1, + real_t thrVertex_0, + real_t thrVertex_1 ) const; + + /// Integral: P2ElementwiseSupgDiffusionAnnulusMap + /// - volume element: triangle, dim: 2, vertices: 3, spacedim: 2 + /// - kernel type: toMatrix + /// - loop strategy: SAWTOOTH + /// - quadrature rule: Dunavant 3 | points: 4, degree: 3 + /// - blending map: AnnulusMap + /// - operations per element: + /// adds muls divs pows abs assignments function_calls unknown_ops + /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- + /// 1104 1720 20 12 0 0 0 4 + void toMatrix_P2ElementwiseSupgDiffusionAnnulusMap_macro_2D( real_t* RESTRICT _data_diffusivity_times_deltaEdge, + real_t* RESTRICT _data_diffusivity_times_deltaVertex, + idx_t* RESTRICT _data_dstEdge, + idx_t* RESTRICT _data_dstVertex, + idx_t* RESTRICT _data_srcEdge, + idx_t* RESTRICT _data_srcVertex, + real_t* RESTRICT _data_wxEdge, + real_t* RESTRICT _data_wxVertex, + real_t* RESTRICT _data_wyEdge, + real_t* RESTRICT _data_wyVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + std::shared_ptr< SparseMatrixProxy > mat, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t refVertex_0, + real_t refVertex_1, + real_t thrVertex_0, + real_t thrVertex_1 ) const; + + /// Integral: P2ElementwiseSupgDiffusionAnnulusMap + /// - volume element: triangle, dim: 2, vertices: 3, spacedim: 2 + /// - kernel type: computeInverseDiagonalOperatorValues + /// - loop strategy: SAWTOOTH + /// - quadrature rule: Dunavant 3 | points: 4, degree: 3 + /// - blending map: AnnulusMap + /// - operations per element: + /// adds muls divs pows abs assignments function_calls unknown_ops + /// ------ ------ ------ ------ ----- ------------- ---------------- ------------- + /// 990 1600 20 12 0 0 0 1 + void computeInverseDiagonalOperatorValues_P2ElementwiseSupgDiffusionAnnulusMap_macro_2D( + real_t* RESTRICT _data_diffusivity_times_deltaEdge, + real_t* RESTRICT _data_diffusivity_times_deltaVertex, + real_t* RESTRICT _data_invDiag_Edge, + real_t* RESTRICT _data_invDiag_Vertex, + real_t* RESTRICT _data_wxEdge, + real_t* RESTRICT _data_wxVertex, + real_t* RESTRICT _data_wyEdge, + real_t* RESTRICT _data_wyVertex, + real_t macro_vertex_coord_id_0comp0, + real_t macro_vertex_coord_id_0comp1, + real_t macro_vertex_coord_id_1comp0, + real_t macro_vertex_coord_id_1comp1, + real_t macro_vertex_coord_id_2comp0, + real_t macro_vertex_coord_id_2comp1, + int64_t micro_edges_per_macro_edge, + real_t micro_edges_per_macro_edge_float, + real_t radRayVertex, + real_t radRefVertex, + real_t rayVertex_0, + real_t rayVertex_1, + real_t refVertex_0, + real_t refVertex_1, + real_t thrVertex_0, + real_t thrVertex_1 ) const; + + std::shared_ptr< P2Function< real_t > > invDiag_; + P2Function< real_t > diffusivity_times_delta; + P2Function< real_t > wx; + P2Function< real_t > wy; +}; + +} // namespace operatorgeneration + +} // namespace hyteg diff --git a/operators/supg_diffusion/avx/P2ElementwiseSupgDiffusionAnnulusMap_apply_P2ElementwiseSupgDiffusionAnnulusMap_macro_2D.cpp b/operators/supg_diffusion/avx/P2ElementwiseSupgDiffusionAnnulusMap_apply_P2ElementwiseSupgDiffusionAnnulusMap_macro_2D.cpp new file mode 100644 index 0000000000000000000000000000000000000000..24934c34ccb1413b6f7370788f96488c88c6b4e6 --- /dev/null +++ b/operators/supg_diffusion/avx/P2ElementwiseSupgDiffusionAnnulusMap_apply_P2ElementwiseSupgDiffusionAnnulusMap_macro_2D.cpp @@ -0,0 +1,1331 @@ +/* +* Copyright (c) 2017-2024 Nils Kohl, Daniel Bauer, Fabian Böhm. +* +* This file is part of HyTeG +* (see https://i10git.cs.fau.de/hyteg/hyteg). +* +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. +* +* You should have received a copy of the GNU General Public License +* along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +/* +* The entire file was generated with the HyTeG Operator Generator. +* +* Avoid modifying this file. If buggy, consider fixing the generator itself. +*/ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +#include "../P2ElementwiseSupgDiffusionAnnulusMap.hpp" + +#define FUNC_PREFIX + +namespace hyteg { + +namespace operatorgeneration { + +void P2ElementwiseSupgDiffusionAnnulusMap::apply_P2ElementwiseSupgDiffusionAnnulusMap_macro_2D( real_t * RESTRICT _data_diffusivity_times_deltaEdge, real_t * RESTRICT _data_diffusivity_times_deltaVertex, real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t * RESTRICT _data_wxEdge, real_t * RESTRICT _data_wxVertex, real_t * RESTRICT _data_wyEdge, real_t * RESTRICT _data_wyVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const +{ + { + const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669}; + + const real_t _data_q_p_0 [] = {0.33333333333333331, 0.20000000000000001, 0.59999999999999998, 0.20000000000000001}; + + const real_t _data_q_p_1 [] = {0.33333333333333331, 0.59999999999999998, 0.20000000000000001, 0.20000000000000001}; + + const real_t tmp_coords_jac_0_GRAY = 1.0 / (micro_edges_per_macro_edge_float)*1.0; + const real_t p_affine_const_0_0_GRAY = macro_vertex_coord_id_0comp0; + const real_t p_affine_const_0_1_GRAY = macro_vertex_coord_id_0comp1; + const real_t p_affine_const_1_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0); + const real_t p_affine_const_1_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1); + const real_t p_affine_const_2_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0); + const real_t p_affine_const_2_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1); + const real_t jac_affine_0_0_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_1_0_GRAY; + const real_t jac_affine_0_1_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_2_0_GRAY; + const real_t jac_affine_1_0_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_1_1_GRAY; + const real_t jac_affine_1_1_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_2_1_GRAY; + const real_t tmp_coords_jac_1_GRAY = jac_affine_0_0_GRAY*jac_affine_1_1_GRAY - jac_affine_0_1_GRAY*jac_affine_1_0_GRAY; + const real_t tmp_coords_jac_2_GRAY = 1.0 / (tmp_coords_jac_1_GRAY); + const real_t jac_affine_inv_0_0_GRAY = jac_affine_1_1_GRAY*tmp_coords_jac_2_GRAY; + const real_t jac_affine_inv_0_1_GRAY = -jac_affine_0_1_GRAY*tmp_coords_jac_2_GRAY; + const real_t jac_affine_inv_1_0_GRAY = -jac_affine_1_0_GRAY*tmp_coords_jac_2_GRAY; + const real_t jac_affine_inv_1_1_GRAY = jac_affine_0_0_GRAY*tmp_coords_jac_2_GRAY; + const real_t abs_det_jac_affine_GRAY = abs(tmp_coords_jac_1_GRAY); + const real_t tmp_qloop_1 = -rayVertex_1 + thrVertex_1; + const real_t tmp_qloop_7 = rayVertex_0 - thrVertex_0; + const real_t tmp_qloop_8 = -tmp_qloop_7; + const real_t tmp_qloop_9 = 1.0 / (tmp_qloop_1*(-rayVertex_0 + refVertex_0) - tmp_qloop_8*(-rayVertex_1 + refVertex_1)); + const real_t tmp_qloop_10 = -radRayVertex + radRefVertex; + const real_t tmp_qloop_11 = tmp_qloop_10*tmp_qloop_9; + const real_t tmp_qloop_12 = tmp_qloop_11*1.0; + const real_t tmp_qloop_37 = jac_affine_inv_0_0_GRAY*4.0; + const real_t tmp_qloop_38 = jac_affine_inv_1_0_GRAY*4.0; + const real_t tmp_qloop_39 = tmp_qloop_37 + tmp_qloop_38; + const real_t tmp_qloop_40 = jac_affine_inv_0_0_GRAY*tmp_qloop_39 + jac_affine_inv_1_0_GRAY*tmp_qloop_39; + const real_t tmp_qloop_41 = jac_affine_inv_0_1_GRAY*4.0; + const real_t tmp_qloop_42 = jac_affine_inv_1_1_GRAY*4.0; + const real_t tmp_qloop_43 = tmp_qloop_41 + tmp_qloop_42; + const real_t tmp_qloop_44 = jac_affine_inv_0_0_GRAY*tmp_qloop_43 + jac_affine_inv_1_0_GRAY*tmp_qloop_43; + const real_t tmp_qloop_45 = jac_affine_inv_0_1_GRAY*tmp_qloop_39 + jac_affine_inv_1_1_GRAY*tmp_qloop_39; + const real_t tmp_qloop_46 = jac_affine_inv_0_1_GRAY*tmp_qloop_43 + jac_affine_inv_1_1_GRAY*tmp_qloop_43; + const real_t tmp_qloop_83 = jac_affine_inv_0_1_GRAY*tmp_qloop_37; + const real_t tmp_qloop_84 = (jac_affine_inv_0_0_GRAY*jac_affine_inv_0_0_GRAY)*4.0; + const real_t tmp_qloop_85 = (jac_affine_inv_0_1_GRAY*jac_affine_inv_0_1_GRAY)*4.0; + const real_t tmp_qloop_90 = jac_affine_inv_1_1_GRAY*tmp_qloop_38; + const real_t tmp_qloop_91 = (jac_affine_inv_1_0_GRAY*jac_affine_inv_1_0_GRAY)*4.0; + const real_t tmp_qloop_92 = (jac_affine_inv_1_1_GRAY*jac_affine_inv_1_1_GRAY)*4.0; + const real_t tmp_qloop_97 = jac_affine_inv_1_0_GRAY*8.0; + const real_t tmp_qloop_98 = jac_affine_inv_0_0_GRAY*tmp_qloop_97; + const real_t tmp_qloop_99 = jac_affine_inv_1_1_GRAY*tmp_qloop_37; + const real_t tmp_qloop_100 = jac_affine_inv_0_1_GRAY*tmp_qloop_38; + const real_t tmp_qloop_101 = tmp_qloop_100 + tmp_qloop_99; + const real_t tmp_qloop_102 = jac_affine_inv_1_1_GRAY*8.0; + const real_t tmp_qloop_103 = jac_affine_inv_0_1_GRAY*tmp_qloop_102; + const real_t tmp_qloop_111 = jac_affine_inv_1_0_GRAY*tmp_qloop_37; + const real_t tmp_qloop_112 = -tmp_qloop_37 - tmp_qloop_97; + const real_t tmp_qloop_113 = jac_affine_inv_1_0_GRAY*tmp_qloop_112 - tmp_qloop_111; + const real_t tmp_qloop_114 = -tmp_qloop_102 - tmp_qloop_41; + const real_t tmp_qloop_115 = jac_affine_inv_1_0_GRAY*tmp_qloop_114 - tmp_qloop_99; + const real_t tmp_qloop_116 = jac_affine_inv_1_1_GRAY*tmp_qloop_112 - tmp_qloop_100; + const real_t tmp_qloop_117 = jac_affine_inv_1_1_GRAY*tmp_qloop_41; + const real_t tmp_qloop_118 = jac_affine_inv_1_1_GRAY*tmp_qloop_114 - tmp_qloop_117; + const real_t tmp_qloop_123 = jac_affine_inv_0_0_GRAY*-8.0 - tmp_qloop_38; + const real_t tmp_qloop_124 = jac_affine_inv_0_0_GRAY*tmp_qloop_123 - tmp_qloop_111; + const real_t tmp_qloop_125 = jac_affine_inv_0_1_GRAY*-8.0 - tmp_qloop_42; + const real_t tmp_qloop_126 = jac_affine_inv_0_0_GRAY*tmp_qloop_125 - tmp_qloop_100; + const real_t tmp_qloop_127 = jac_affine_inv_0_1_GRAY*tmp_qloop_123 - tmp_qloop_99; + const real_t tmp_qloop_128 = jac_affine_inv_0_1_GRAY*tmp_qloop_125 - tmp_qloop_117; + { + /* FaceType.GRAY */ + for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1) + { + for (int64_t ctr_0 = 0; ctr_0 < (int64_t)((-ctr_1 + micro_edges_per_macro_edge) / (4)) * (4); ctr_0 += 4) + { + + const int64_t phantom_ctr_0 = ctr_0; + real_t _data_float_loop_ctr_array_dim_0[4]; + _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0; + _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1; + _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2; + _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3; + real_t _data_float_loop_ctr_array_dim_1[4]; + _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1; + + const __m256d p_affine_0_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)); + const __m256d p_affine_0_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)); + const __m256d p_affine_1_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)); + const __m256d p_affine_1_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)); + const __m256d p_affine_2_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)); + const __m256d p_affine_2_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)); + const __m256d src_dof_0 = _mm256_loadu_pd(& _data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]); + const __m256d src_dof_1 = _mm256_loadu_pd(& _data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]); + const __m256d src_dof_2 = _mm256_loadu_pd(& _data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]); + const __m256d src_dof_3 = _mm256_loadu_pd(& _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]); + const __m256d src_dof_4 = _mm256_loadu_pd(& _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]); + const __m256d src_dof_5 = _mm256_loadu_pd(& _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]); + const __m256d diffusivity_times_delta_dof_0 = _mm256_loadu_pd(& _data_diffusivity_times_deltaVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]); + const __m256d diffusivity_times_delta_dof_1 = _mm256_loadu_pd(& _data_diffusivity_times_deltaVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]); + const __m256d diffusivity_times_delta_dof_2 = _mm256_loadu_pd(& _data_diffusivity_times_deltaVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]); + const __m256d diffusivity_times_delta_dof_3 = _mm256_loadu_pd(& _data_diffusivity_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]); + const __m256d diffusivity_times_delta_dof_4 = _mm256_loadu_pd(& _data_diffusivity_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]); + const __m256d diffusivity_times_delta_dof_5 = _mm256_loadu_pd(& _data_diffusivity_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]); + const __m256d wx_dof_0 = _mm256_loadu_pd(& _data_wxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]); + const __m256d wx_dof_1 = _mm256_loadu_pd(& _data_wxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]); + const __m256d wx_dof_2 = _mm256_loadu_pd(& _data_wxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]); + const __m256d wx_dof_3 = _mm256_loadu_pd(& _data_wxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]); + const __m256d wx_dof_4 = _mm256_loadu_pd(& _data_wxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]); + const __m256d wx_dof_5 = _mm256_loadu_pd(& _data_wxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]); + const __m256d wy_dof_0 = _mm256_loadu_pd(& _data_wyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]); + const __m256d wy_dof_1 = _mm256_loadu_pd(& _data_wyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]); + const __m256d wy_dof_2 = _mm256_loadu_pd(& _data_wyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]); + const __m256d wy_dof_3 = _mm256_loadu_pd(& _data_wyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]); + const __m256d wy_dof_4 = _mm256_loadu_pd(& _data_wyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]); + const __m256d wy_dof_5 = _mm256_loadu_pd(& _data_wyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]); + __m256d q_acc_0_0 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_1 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_2 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_3 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_4 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_5 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_0 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_1 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_2 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_3 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_4 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_5 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_0 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_1 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_2 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_3 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_4 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_5 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_0 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_1 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_2 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_3 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_4 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_5 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_4_0 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_4_1 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_4_2 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_4_3 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_4_4 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_4_5 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_5_0 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_5_1 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_5_2 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_5_3 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_5_4 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_5_5 = _mm256_set_pd(0.0,0.0,0.0,0.0); + for (int64_t q = 0; q < 4; q += 1) + { + const __m256d tmp_qloop_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_1_0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_2_0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),p_affine_0_0); + const __m256d tmp_qloop_2 = _mm256_mul_pd(tmp_qloop_0,tmp_qloop_0); + const __m256d tmp_qloop_3 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_1_1),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_2_1),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),p_affine_0_1); + const __m256d tmp_qloop_4 = _mm256_mul_pd(tmp_qloop_3,tmp_qloop_3); + const __m256d tmp_qloop_5 = _mm256_add_pd(tmp_qloop_2,tmp_qloop_4); + const __m256d tmp_qloop_6 = _mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_sqrt_pd(tmp_qloop_5)); + const __m256d tmp_qloop_13 = _mm256_mul_pd(tmp_qloop_6,_mm256_set_pd(tmp_qloop_12,tmp_qloop_12,tmp_qloop_12,tmp_qloop_12)); + const __m256d tmp_qloop_14 = _mm256_mul_pd(tmp_qloop_13,_mm256_set_pd(tmp_qloop_1,tmp_qloop_1,tmp_qloop_1,tmp_qloop_1)); + const __m256d tmp_qloop_15 = _mm256_div_pd(_mm256_sqrt_pd(tmp_qloop_5),_mm256_mul_pd(tmp_qloop_5,tmp_qloop_5)); + const __m256d tmp_qloop_16 = _mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(rayVertex_0,rayVertex_0,rayVertex_0,rayVertex_0)),tmp_qloop_0),_mm256_set_pd(tmp_qloop_1,tmp_qloop_1,tmp_qloop_1,tmp_qloop_1)),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(rayVertex_1,rayVertex_1,rayVertex_1,rayVertex_1)),tmp_qloop_3),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_8,tmp_qloop_8,tmp_qloop_8,tmp_qloop_8))),_mm256_set_pd(tmp_qloop_11,tmp_qloop_11,tmp_qloop_11,tmp_qloop_11)),_mm256_set_pd(radRayVertex,radRayVertex,radRayVertex,radRayVertex)); + const __m256d tmp_qloop_17 = _mm256_mul_pd(tmp_qloop_15,tmp_qloop_16); + const __m256d tmp_qloop_18 = _mm256_mul_pd(tmp_qloop_17,_mm256_set_pd(1.0,1.0,1.0,1.0)); + const __m256d tmp_qloop_19 = _mm256_mul_pd(tmp_qloop_13,_mm256_set_pd(tmp_qloop_8,tmp_qloop_8,tmp_qloop_8,tmp_qloop_8)); + const __m256d tmp_qloop_20 = _mm256_mul_pd(tmp_qloop_0,tmp_qloop_18); + const __m256d tmp_qloop_23 = _mm256_mul_pd(tmp_qloop_15,_mm256_set_pd(tmp_qloop_12,tmp_qloop_12,tmp_qloop_12,tmp_qloop_12)); + const __m256d tmp_qloop_24 = _mm256_mul_pd(tmp_qloop_23,_mm256_set_pd(tmp_qloop_1,tmp_qloop_1,tmp_qloop_1,tmp_qloop_1)); + const __m256d tmp_qloop_25 = _mm256_mul_pd(tmp_qloop_2,tmp_qloop_24); + const __m256d tmp_qloop_26 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_16,_mm256_div_pd(_mm256_sqrt_pd(tmp_qloop_5),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_5,tmp_qloop_5),tmp_qloop_5))),_mm256_set_pd(3.0,3.0,3.0,3.0)); + const __m256d tmp_qloop_27 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_0,tmp_qloop_26),tmp_qloop_4); + const __m256d tmp_qloop_28 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_27,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_24,tmp_qloop_4)); + const __m256d tmp_qloop_29 = _mm256_mul_pd(tmp_qloop_23,_mm256_set_pd(tmp_qloop_7,tmp_qloop_7,tmp_qloop_7,tmp_qloop_7)); + const __m256d tmp_qloop_30 = _mm256_mul_pd(tmp_qloop_0,tmp_qloop_3); + const __m256d tmp_qloop_31 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_30); + const __m256d tmp_qloop_32 = _mm256_mul_pd(tmp_qloop_17,_mm256_set_pd(2.0,2.0,2.0,2.0)); + const __m256d tmp_qloop_33 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_2,tmp_qloop_26),tmp_qloop_3); + const __m256d tmp_qloop_34 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_33,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_18,tmp_qloop_3)); + const __m256d tmp_qloop_35 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_30); + const __m256d tmp_qloop_36 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_23,tmp_qloop_30),_mm256_set_pd(tmp_qloop_8,tmp_qloop_8,tmp_qloop_8,tmp_qloop_8)); + const __m256d tmp_qloop_47 = _mm256_mul_pd(_mm256_set_pd(4.0,4.0,4.0,4.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])); + const __m256d tmp_qloop_48 = _mm256_mul_pd(_mm256_set_pd(4.0,4.0,4.0,4.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])); + const __m256d tmp_qloop_49 = _mm256_add_pd(_mm256_add_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),tmp_qloop_47),tmp_qloop_48); + const __m256d tmp_qloop_50 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_49,_mm256_set_pd(jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY)),_mm256_mul_pd(tmp_qloop_49,_mm256_set_pd(jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY))); + const __m256d tmp_qloop_54 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_49,_mm256_set_pd(jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY)),_mm256_mul_pd(tmp_qloop_49,_mm256_set_pd(jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY))); + const __m256d tmp_qloop_69 = _mm256_mul_pd(tmp_qloop_47,_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])); + const __m256d tmp_qloop_70 = _mm256_mul_pd(_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])); + const __m256d tmp_qloop_71 = _mm256_mul_pd(tmp_qloop_70,_mm256_set_pd(2.0,2.0,2.0,2.0)); + const __m256d tmp_qloop_72 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),tmp_qloop_71); + const __m256d tmp_qloop_73 = _mm256_mul_pd(_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])); + const __m256d tmp_qloop_74 = _mm256_mul_pd(tmp_qloop_73,_mm256_set_pd(2.0,2.0,2.0,2.0)); + const __m256d tmp_qloop_75 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])),tmp_qloop_74); + const __m256d tmp_qloop_76 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_73,_mm256_set_pd(-4.0,-4.0,-4.0,-4.0)),_mm256_mul_pd(tmp_qloop_69,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_48); + const __m256d tmp_qloop_77 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_70,_mm256_set_pd(-4.0,-4.0,-4.0,-4.0)),_mm256_mul_pd(tmp_qloop_69,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_47); + const __m256d tmp_qloop_78 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),tmp_qloop_69),tmp_qloop_71),tmp_qloop_74); + const __m256d tmp_qloop_79 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_69,wx_dof_3),_mm256_mul_pd(tmp_qloop_72,wx_dof_1)),_mm256_mul_pd(tmp_qloop_75,wx_dof_2)),_mm256_mul_pd(tmp_qloop_76,wx_dof_4)),_mm256_mul_pd(tmp_qloop_77,wx_dof_5)),_mm256_mul_pd(tmp_qloop_78,wx_dof_0)); + const __m256d tmp_qloop_80 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_69,wy_dof_3),_mm256_mul_pd(tmp_qloop_72,wy_dof_1)),_mm256_mul_pd(tmp_qloop_75,wy_dof_2)),_mm256_mul_pd(tmp_qloop_76,wy_dof_4)),_mm256_mul_pd(tmp_qloop_77,wy_dof_5)),_mm256_mul_pd(tmp_qloop_78,wy_dof_0)); + const __m256d tmp_qloop_86 = _mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),tmp_qloop_47); + const __m256d tmp_qloop_87 = _mm256_mul_pd(tmp_qloop_86,_mm256_set_pd(jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY)); + const __m256d tmp_qloop_88 = _mm256_mul_pd(tmp_qloop_86,_mm256_set_pd(jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY)); + const __m256d tmp_qloop_93 = _mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),tmp_qloop_48); + const __m256d tmp_qloop_94 = _mm256_mul_pd(tmp_qloop_93,_mm256_set_pd(jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY)); + const __m256d tmp_qloop_95 = _mm256_mul_pd(tmp_qloop_93,_mm256_set_pd(jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY)); + const __m256d tmp_qloop_104 = _mm256_mul_pd(tmp_qloop_47,_mm256_set_pd(jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY)); + const __m256d tmp_qloop_105 = _mm256_mul_pd(tmp_qloop_48,_mm256_set_pd(jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY)); + const __m256d tmp_qloop_106 = _mm256_add_pd(tmp_qloop_104,tmp_qloop_105); + const __m256d tmp_qloop_107 = _mm256_mul_pd(tmp_qloop_47,_mm256_set_pd(jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY)); + const __m256d tmp_qloop_108 = _mm256_mul_pd(tmp_qloop_48,_mm256_set_pd(jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY)); + const __m256d tmp_qloop_109 = _mm256_add_pd(tmp_qloop_107,tmp_qloop_108); + const __m256d tmp_qloop_119 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-8.0,-8.0,-8.0,-8.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])),_mm256_mul_pd(tmp_qloop_47,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_set_pd(4.0,4.0,4.0,4.0)); + const __m256d tmp_qloop_120 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_105,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_119,_mm256_set_pd(jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY))); + const __m256d tmp_qloop_121 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_108,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_119,_mm256_set_pd(jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY))); + const __m256d tmp_qloop_129 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-8.0,-8.0,-8.0,-8.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(tmp_qloop_48,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_set_pd(4.0,4.0,4.0,4.0)); + const __m256d tmp_qloop_130 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_104,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_129,_mm256_set_pd(jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY))); + const __m256d tmp_qloop_131 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_107,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_129,_mm256_set_pd(jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY))); + const __m256d jac_blending_0_0 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_0,tmp_qloop_14),_mm256_mul_pd(tmp_qloop_18,tmp_qloop_4)); + const __m256d jac_blending_0_1 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_0,tmp_qloop_19),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_0,tmp_qloop_15),tmp_qloop_16),tmp_qloop_3),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))); + const __m256d jac_blending_1_0 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_14,tmp_qloop_3),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_20,tmp_qloop_3),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))); + const __m256d jac_blending_1_1 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_19,tmp_qloop_3),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_15,tmp_qloop_16),tmp_qloop_2),_mm256_set_pd(1.0,1.0,1.0,1.0))); + const __m256d tmp_qloop_21 = _mm256_add_pd(_mm256_mul_pd(jac_blending_0_0,jac_blending_1_1),_mm256_mul_pd(_mm256_mul_pd(jac_blending_0_1,jac_blending_1_0),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))); + const __m256d tmp_qloop_22 = _mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),tmp_qloop_21); + const __m256d abs_det_jac_blending = tmp_qloop_21; + const __m256d tmp_qloop_81 = _mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(abs_det_jac_blending,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(diffusivity_times_delta_dof_0,tmp_qloop_78),_mm256_mul_pd(diffusivity_times_delta_dof_1,tmp_qloop_72)),_mm256_mul_pd(diffusivity_times_delta_dof_2,tmp_qloop_75)),_mm256_mul_pd(diffusivity_times_delta_dof_3,tmp_qloop_69)),_mm256_mul_pd(diffusivity_times_delta_dof_4,tmp_qloop_76)),_mm256_mul_pd(diffusivity_times_delta_dof_5,tmp_qloop_77))),_mm256_set_pd(_data_q_w[q],_data_q_w[q],_data_q_w[q],_data_q_w[q])),_mm256_set_pd(abs_det_jac_affine_GRAY,abs_det_jac_affine_GRAY,abs_det_jac_affine_GRAY,abs_det_jac_affine_GRAY)); + const __m256d jac_blending_inv_0_0 = _mm256_mul_pd(jac_blending_1_1,tmp_qloop_22); + const __m256d jac_blending_inv_0_1 = _mm256_mul_pd(_mm256_mul_pd(jac_blending_0_1,tmp_qloop_22),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)); + const __m256d jac_blending_inv_1_0 = _mm256_mul_pd(_mm256_mul_pd(jac_blending_1_0,tmp_qloop_22),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)); + const __m256d jac_blending_inv_1_1 = _mm256_mul_pd(jac_blending_0_0,tmp_qloop_22); + const __m256d tmp_qloop_82 = _mm256_mul_pd(tmp_qloop_81,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_79,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,tmp_qloop_50),_mm256_mul_pd(jac_blending_inv_1_0,tmp_qloop_54))),_mm256_mul_pd(tmp_qloop_80,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,tmp_qloop_50),_mm256_mul_pd(jac_blending_inv_1_1,tmp_qloop_54))))); + const __m256d tmp_qloop_133 = _mm256_mul_pd(tmp_qloop_81,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_79,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,tmp_qloop_87),_mm256_mul_pd(jac_blending_inv_1_0,tmp_qloop_88))),_mm256_mul_pd(tmp_qloop_80,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,tmp_qloop_87),_mm256_mul_pd(jac_blending_inv_1_1,tmp_qloop_88))))); + const __m256d tmp_qloop_134 = _mm256_mul_pd(tmp_qloop_81,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_79,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,tmp_qloop_94),_mm256_mul_pd(jac_blending_inv_1_0,tmp_qloop_95))),_mm256_mul_pd(tmp_qloop_80,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,tmp_qloop_94),_mm256_mul_pd(jac_blending_inv_1_1,tmp_qloop_95))))); + const __m256d tmp_qloop_135 = _mm256_mul_pd(tmp_qloop_81,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_79,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,tmp_qloop_106),_mm256_mul_pd(jac_blending_inv_1_0,tmp_qloop_109))),_mm256_mul_pd(tmp_qloop_80,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,tmp_qloop_106),_mm256_mul_pd(jac_blending_inv_1_1,tmp_qloop_109))))); + const __m256d tmp_qloop_136 = _mm256_mul_pd(tmp_qloop_81,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_79,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,tmp_qloop_120),_mm256_mul_pd(jac_blending_inv_1_0,tmp_qloop_121))),_mm256_mul_pd(tmp_qloop_80,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,tmp_qloop_120),_mm256_mul_pd(jac_blending_inv_1_1,tmp_qloop_121))))); + const __m256d tmp_qloop_137 = _mm256_mul_pd(tmp_qloop_81,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_79,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,tmp_qloop_130),_mm256_mul_pd(jac_blending_inv_1_0,tmp_qloop_131))),_mm256_mul_pd(tmp_qloop_80,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,tmp_qloop_130),_mm256_mul_pd(jac_blending_inv_1_1,tmp_qloop_131))))); + const __m256d hessian_blending_0_0_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_25,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),tmp_qloop_14),tmp_qloop_28); + const __m256d hessian_blending_1_0_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_31,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_29,tmp_qloop_4)),_mm256_mul_pd(tmp_qloop_3,tmp_qloop_32)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_26,_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_3,tmp_qloop_3),tmp_qloop_3)),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))); + const __m256d hessian_blending_0_0_1 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_34,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_15,tmp_qloop_30),_mm256_set_pd(-2.0,-2.0,-2.0,-2.0)),_mm256_set_pd(tmp_qloop_1,tmp_qloop_1,tmp_qloop_1,tmp_qloop_1)),_mm256_set_pd(tmp_qloop_11,tmp_qloop_11,tmp_qloop_11,tmp_qloop_11))); + const __m256d hessian_blending_1_0_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_20,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_28,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(tmp_qloop_35,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_6,_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_1,tmp_qloop_1,tmp_qloop_1,tmp_qloop_1)),_mm256_set_pd(tmp_qloop_10,tmp_qloop_10,tmp_qloop_10,tmp_qloop_10)),_mm256_set_pd(tmp_qloop_9,tmp_qloop_9,tmp_qloop_9,tmp_qloop_9))); + const __m256d hessian_blending_0_1_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_19,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_31,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(tmp_qloop_34,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_15,tmp_qloop_2),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_10,tmp_qloop_10,tmp_qloop_10,tmp_qloop_10)),_mm256_set_pd(tmp_qloop_8,tmp_qloop_8,tmp_qloop_8,tmp_qloop_8)),_mm256_set_pd(tmp_qloop_9,tmp_qloop_9,tmp_qloop_9,tmp_qloop_9))); + const __m256d tmp_qloop_51 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(hessian_blending_0_0_0,jac_blending_inv_0_0),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(hessian_blending_0_1_0,jac_blending_inv_1_0),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))); + const __m256d tmp_qloop_60 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(hessian_blending_0_0_0,jac_blending_inv_0_1),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(hessian_blending_0_1_0,jac_blending_inv_1_1),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))); + const __m256d hessian_blending_1_1_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_20,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_35,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_27),tmp_qloop_36); + const __m256d tmp_qloop_56 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(hessian_blending_1_0_0,jac_blending_inv_0_0),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(hessian_blending_1_1_0,jac_blending_inv_1_0),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))); + const __m256d tmp_qloop_64 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(hessian_blending_1_0_0,jac_blending_inv_0_1),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(hessian_blending_1_1_0,jac_blending_inv_1_1),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))); + const __m256d hessian_blending_0_1_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_0,tmp_qloop_32),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_0,tmp_qloop_0),tmp_qloop_0),tmp_qloop_26),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_25),tmp_qloop_36); + const __m256d tmp_qloop_52 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(hessian_blending_0_0_1,jac_blending_inv_0_0),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(hessian_blending_0_1_1,jac_blending_inv_1_0),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))); + const __m256d tmp_qloop_53 = _mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,tmp_qloop_51),_mm256_mul_pd(jac_blending_inv_0_1,tmp_qloop_52)); + const __m256d tmp_qloop_55 = _mm256_add_pd(_mm256_mul_pd(jac_blending_inv_1_0,tmp_qloop_51),_mm256_mul_pd(jac_blending_inv_1_1,tmp_qloop_52)); + const __m256d tmp_qloop_61 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(hessian_blending_0_0_1,jac_blending_inv_0_1),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(hessian_blending_0_1_1,jac_blending_inv_1_1),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))); + const __m256d tmp_qloop_62 = _mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,tmp_qloop_60),_mm256_mul_pd(jac_blending_inv_0_1,tmp_qloop_61)); + const __m256d tmp_qloop_63 = _mm256_add_pd(_mm256_mul_pd(jac_blending_inv_1_0,tmp_qloop_60),_mm256_mul_pd(jac_blending_inv_1_1,tmp_qloop_61)); + const __m256d hessian_blending_1_1_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_19,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_33,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_15,tmp_qloop_2),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_10,tmp_qloop_10,tmp_qloop_10,tmp_qloop_10)),_mm256_set_pd(tmp_qloop_7,tmp_qloop_7,tmp_qloop_7,tmp_qloop_7)),_mm256_set_pd(tmp_qloop_9,tmp_qloop_9,tmp_qloop_9,tmp_qloop_9))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_15,tmp_qloop_4),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_10,tmp_qloop_10,tmp_qloop_10,tmp_qloop_10)),_mm256_set_pd(tmp_qloop_8,tmp_qloop_8,tmp_qloop_8,tmp_qloop_8)),_mm256_set_pd(tmp_qloop_9,tmp_qloop_9,tmp_qloop_9,tmp_qloop_9))); + const __m256d tmp_qloop_57 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(hessian_blending_1_0_1,jac_blending_inv_0_0),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(hessian_blending_1_1_1,jac_blending_inv_1_0),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))); + const __m256d tmp_qloop_58 = _mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,tmp_qloop_56),_mm256_mul_pd(jac_blending_inv_0_1,tmp_qloop_57)); + const __m256d tmp_qloop_59 = _mm256_add_pd(_mm256_mul_pd(jac_blending_inv_1_0,tmp_qloop_56),_mm256_mul_pd(jac_blending_inv_1_1,tmp_qloop_57)); + const __m256d tmp_qloop_65 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(hessian_blending_1_0_1,jac_blending_inv_0_1),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(hessian_blending_1_1_1,jac_blending_inv_1_1),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))); + const __m256d tmp_qloop_66 = _mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,tmp_qloop_64),_mm256_mul_pd(jac_blending_inv_0_1,tmp_qloop_65)); + const __m256d tmp_qloop_67 = _mm256_add_pd(_mm256_mul_pd(jac_blending_inv_1_0,tmp_qloop_64),_mm256_mul_pd(jac_blending_inv_1_1,tmp_qloop_65)); + const __m256d tmp_qloop_68 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,_mm256_set_pd(tmp_qloop_40,tmp_qloop_40,tmp_qloop_40,tmp_qloop_40)),_mm256_mul_pd(jac_blending_inv_1_0,_mm256_set_pd(tmp_qloop_44,tmp_qloop_44,tmp_qloop_44,tmp_qloop_44)))),_mm256_mul_pd(jac_blending_inv_0_1,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,_mm256_set_pd(tmp_qloop_40,tmp_qloop_40,tmp_qloop_40,tmp_qloop_40)),_mm256_mul_pd(jac_blending_inv_1_1,_mm256_set_pd(tmp_qloop_44,tmp_qloop_44,tmp_qloop_44,tmp_qloop_44))))),_mm256_mul_pd(jac_blending_inv_1_0,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,_mm256_set_pd(tmp_qloop_45,tmp_qloop_45,tmp_qloop_45,tmp_qloop_45)),_mm256_mul_pd(jac_blending_inv_1_0,_mm256_set_pd(tmp_qloop_46,tmp_qloop_46,tmp_qloop_46,tmp_qloop_46))))),_mm256_mul_pd(jac_blending_inv_1_1,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,_mm256_set_pd(tmp_qloop_45,tmp_qloop_45,tmp_qloop_45,tmp_qloop_45)),_mm256_mul_pd(jac_blending_inv_1_1,_mm256_set_pd(tmp_qloop_46,tmp_qloop_46,tmp_qloop_46,tmp_qloop_46))))),_mm256_mul_pd(jac_blending_inv_0_0,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_50,tmp_qloop_53),_mm256_mul_pd(tmp_qloop_54,tmp_qloop_55)))),_mm256_mul_pd(jac_blending_inv_0_1,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_50,tmp_qloop_58),_mm256_mul_pd(tmp_qloop_54,tmp_qloop_59)))),_mm256_mul_pd(jac_blending_inv_1_0,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_50,tmp_qloop_62),_mm256_mul_pd(tmp_qloop_54,tmp_qloop_63)))),_mm256_mul_pd(jac_blending_inv_1_1,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_50,tmp_qloop_66),_mm256_mul_pd(tmp_qloop_54,tmp_qloop_67)))); + const __m256d tmp_qloop_89 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_1_0,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,_mm256_set_pd(tmp_qloop_83,tmp_qloop_83,tmp_qloop_83,tmp_qloop_83)),_mm256_mul_pd(jac_blending_inv_1_0,_mm256_set_pd(tmp_qloop_85,tmp_qloop_85,tmp_qloop_85,tmp_qloop_85)))),_mm256_mul_pd(jac_blending_inv_1_1,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,_mm256_set_pd(tmp_qloop_83,tmp_qloop_83,tmp_qloop_83,tmp_qloop_83)),_mm256_mul_pd(jac_blending_inv_1_1,_mm256_set_pd(tmp_qloop_85,tmp_qloop_85,tmp_qloop_85,tmp_qloop_85))))),_mm256_mul_pd(jac_blending_inv_0_0,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_1_0,_mm256_set_pd(tmp_qloop_83,tmp_qloop_83,tmp_qloop_83,tmp_qloop_83)),_mm256_mul_pd(jac_blending_inv_0_0,_mm256_set_pd(tmp_qloop_84,tmp_qloop_84,tmp_qloop_84,tmp_qloop_84))))),_mm256_mul_pd(jac_blending_inv_0_1,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_1_1,_mm256_set_pd(tmp_qloop_83,tmp_qloop_83,tmp_qloop_83,tmp_qloop_83)),_mm256_mul_pd(jac_blending_inv_0_1,_mm256_set_pd(tmp_qloop_84,tmp_qloop_84,tmp_qloop_84,tmp_qloop_84))))),_mm256_mul_pd(jac_blending_inv_0_0,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_53,tmp_qloop_87),_mm256_mul_pd(tmp_qloop_55,tmp_qloop_88)))),_mm256_mul_pd(jac_blending_inv_0_1,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_58,tmp_qloop_87),_mm256_mul_pd(tmp_qloop_59,tmp_qloop_88)))),_mm256_mul_pd(jac_blending_inv_1_0,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_62,tmp_qloop_87),_mm256_mul_pd(tmp_qloop_63,tmp_qloop_88)))),_mm256_mul_pd(jac_blending_inv_1_1,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_66,tmp_qloop_87),_mm256_mul_pd(tmp_qloop_67,tmp_qloop_88)))); + const __m256d tmp_qloop_96 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_1_0,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,_mm256_set_pd(tmp_qloop_90,tmp_qloop_90,tmp_qloop_90,tmp_qloop_90)),_mm256_mul_pd(jac_blending_inv_1_0,_mm256_set_pd(tmp_qloop_92,tmp_qloop_92,tmp_qloop_92,tmp_qloop_92)))),_mm256_mul_pd(jac_blending_inv_1_1,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,_mm256_set_pd(tmp_qloop_90,tmp_qloop_90,tmp_qloop_90,tmp_qloop_90)),_mm256_mul_pd(jac_blending_inv_1_1,_mm256_set_pd(tmp_qloop_92,tmp_qloop_92,tmp_qloop_92,tmp_qloop_92))))),_mm256_mul_pd(jac_blending_inv_0_0,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_1_0,_mm256_set_pd(tmp_qloop_90,tmp_qloop_90,tmp_qloop_90,tmp_qloop_90)),_mm256_mul_pd(jac_blending_inv_0_0,_mm256_set_pd(tmp_qloop_91,tmp_qloop_91,tmp_qloop_91,tmp_qloop_91))))),_mm256_mul_pd(jac_blending_inv_0_1,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_1_1,_mm256_set_pd(tmp_qloop_90,tmp_qloop_90,tmp_qloop_90,tmp_qloop_90)),_mm256_mul_pd(jac_blending_inv_0_1,_mm256_set_pd(tmp_qloop_91,tmp_qloop_91,tmp_qloop_91,tmp_qloop_91))))),_mm256_mul_pd(jac_blending_inv_0_0,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_53,tmp_qloop_94),_mm256_mul_pd(tmp_qloop_55,tmp_qloop_95)))),_mm256_mul_pd(jac_blending_inv_0_1,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_58,tmp_qloop_94),_mm256_mul_pd(tmp_qloop_59,tmp_qloop_95)))),_mm256_mul_pd(jac_blending_inv_1_0,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_62,tmp_qloop_94),_mm256_mul_pd(tmp_qloop_63,tmp_qloop_95)))),_mm256_mul_pd(jac_blending_inv_1_1,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_66,tmp_qloop_94),_mm256_mul_pd(tmp_qloop_67,tmp_qloop_95)))); + const __m256d tmp_qloop_110 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_1_0,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,_mm256_set_pd(tmp_qloop_101,tmp_qloop_101,tmp_qloop_101,tmp_qloop_101)),_mm256_mul_pd(jac_blending_inv_1_0,_mm256_set_pd(tmp_qloop_103,tmp_qloop_103,tmp_qloop_103,tmp_qloop_103)))),_mm256_mul_pd(jac_blending_inv_1_1,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,_mm256_set_pd(tmp_qloop_101,tmp_qloop_101,tmp_qloop_101,tmp_qloop_101)),_mm256_mul_pd(jac_blending_inv_1_1,_mm256_set_pd(tmp_qloop_103,tmp_qloop_103,tmp_qloop_103,tmp_qloop_103))))),_mm256_mul_pd(jac_blending_inv_0_0,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_1_0,_mm256_set_pd(tmp_qloop_101,tmp_qloop_101,tmp_qloop_101,tmp_qloop_101)),_mm256_mul_pd(jac_blending_inv_0_0,_mm256_set_pd(tmp_qloop_98,tmp_qloop_98,tmp_qloop_98,tmp_qloop_98))))),_mm256_mul_pd(jac_blending_inv_0_1,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_1_1,_mm256_set_pd(tmp_qloop_101,tmp_qloop_101,tmp_qloop_101,tmp_qloop_101)),_mm256_mul_pd(jac_blending_inv_0_1,_mm256_set_pd(tmp_qloop_98,tmp_qloop_98,tmp_qloop_98,tmp_qloop_98))))),_mm256_mul_pd(jac_blending_inv_0_0,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_106,tmp_qloop_53),_mm256_mul_pd(tmp_qloop_109,tmp_qloop_55)))),_mm256_mul_pd(jac_blending_inv_0_1,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_106,tmp_qloop_58),_mm256_mul_pd(tmp_qloop_109,tmp_qloop_59)))),_mm256_mul_pd(jac_blending_inv_1_0,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_106,tmp_qloop_62),_mm256_mul_pd(tmp_qloop_109,tmp_qloop_63)))),_mm256_mul_pd(jac_blending_inv_1_1,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_106,tmp_qloop_66),_mm256_mul_pd(tmp_qloop_109,tmp_qloop_67)))); + const __m256d tmp_qloop_122 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,_mm256_set_pd(tmp_qloop_113,tmp_qloop_113,tmp_qloop_113,tmp_qloop_113)),_mm256_mul_pd(jac_blending_inv_1_0,_mm256_set_pd(tmp_qloop_115,tmp_qloop_115,tmp_qloop_115,tmp_qloop_115)))),_mm256_mul_pd(jac_blending_inv_0_1,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,_mm256_set_pd(tmp_qloop_113,tmp_qloop_113,tmp_qloop_113,tmp_qloop_113)),_mm256_mul_pd(jac_blending_inv_1_1,_mm256_set_pd(tmp_qloop_115,tmp_qloop_115,tmp_qloop_115,tmp_qloop_115))))),_mm256_mul_pd(jac_blending_inv_1_0,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,_mm256_set_pd(tmp_qloop_116,tmp_qloop_116,tmp_qloop_116,tmp_qloop_116)),_mm256_mul_pd(jac_blending_inv_1_0,_mm256_set_pd(tmp_qloop_118,tmp_qloop_118,tmp_qloop_118,tmp_qloop_118))))),_mm256_mul_pd(jac_blending_inv_1_1,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,_mm256_set_pd(tmp_qloop_116,tmp_qloop_116,tmp_qloop_116,tmp_qloop_116)),_mm256_mul_pd(jac_blending_inv_1_1,_mm256_set_pd(tmp_qloop_118,tmp_qloop_118,tmp_qloop_118,tmp_qloop_118))))),_mm256_mul_pd(jac_blending_inv_0_0,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_120,tmp_qloop_53),_mm256_mul_pd(tmp_qloop_121,tmp_qloop_55)))),_mm256_mul_pd(jac_blending_inv_0_1,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_120,tmp_qloop_58),_mm256_mul_pd(tmp_qloop_121,tmp_qloop_59)))),_mm256_mul_pd(jac_blending_inv_1_0,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_120,tmp_qloop_62),_mm256_mul_pd(tmp_qloop_121,tmp_qloop_63)))),_mm256_mul_pd(jac_blending_inv_1_1,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_120,tmp_qloop_66),_mm256_mul_pd(tmp_qloop_121,tmp_qloop_67)))); + const __m256d tmp_qloop_132 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,_mm256_set_pd(tmp_qloop_124,tmp_qloop_124,tmp_qloop_124,tmp_qloop_124)),_mm256_mul_pd(jac_blending_inv_1_0,_mm256_set_pd(tmp_qloop_126,tmp_qloop_126,tmp_qloop_126,tmp_qloop_126)))),_mm256_mul_pd(jac_blending_inv_0_1,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,_mm256_set_pd(tmp_qloop_124,tmp_qloop_124,tmp_qloop_124,tmp_qloop_124)),_mm256_mul_pd(jac_blending_inv_1_1,_mm256_set_pd(tmp_qloop_126,tmp_qloop_126,tmp_qloop_126,tmp_qloop_126))))),_mm256_mul_pd(jac_blending_inv_1_0,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,_mm256_set_pd(tmp_qloop_127,tmp_qloop_127,tmp_qloop_127,tmp_qloop_127)),_mm256_mul_pd(jac_blending_inv_1_0,_mm256_set_pd(tmp_qloop_128,tmp_qloop_128,tmp_qloop_128,tmp_qloop_128))))),_mm256_mul_pd(jac_blending_inv_1_1,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,_mm256_set_pd(tmp_qloop_127,tmp_qloop_127,tmp_qloop_127,tmp_qloop_127)),_mm256_mul_pd(jac_blending_inv_1_1,_mm256_set_pd(tmp_qloop_128,tmp_qloop_128,tmp_qloop_128,tmp_qloop_128))))),_mm256_mul_pd(jac_blending_inv_0_0,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_130,tmp_qloop_53),_mm256_mul_pd(tmp_qloop_131,tmp_qloop_55)))),_mm256_mul_pd(jac_blending_inv_0_1,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_130,tmp_qloop_58),_mm256_mul_pd(tmp_qloop_131,tmp_qloop_59)))),_mm256_mul_pd(jac_blending_inv_1_0,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_130,tmp_qloop_62),_mm256_mul_pd(tmp_qloop_131,tmp_qloop_63)))),_mm256_mul_pd(jac_blending_inv_1_1,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_130,tmp_qloop_66),_mm256_mul_pd(tmp_qloop_131,tmp_qloop_67)))); + const __m256d q_tmp_0_0 = _mm256_mul_pd(tmp_qloop_68,tmp_qloop_82); + const __m256d q_tmp_0_1 = _mm256_mul_pd(tmp_qloop_82,tmp_qloop_89); + const __m256d q_tmp_0_2 = _mm256_mul_pd(tmp_qloop_82,tmp_qloop_96); + const __m256d q_tmp_0_3 = _mm256_mul_pd(tmp_qloop_110,tmp_qloop_82); + const __m256d q_tmp_0_4 = _mm256_mul_pd(tmp_qloop_122,tmp_qloop_82); + const __m256d q_tmp_0_5 = _mm256_mul_pd(tmp_qloop_132,tmp_qloop_82); + const __m256d q_tmp_1_0 = _mm256_mul_pd(tmp_qloop_133,tmp_qloop_68); + const __m256d q_tmp_1_1 = _mm256_mul_pd(tmp_qloop_133,tmp_qloop_89); + const __m256d q_tmp_1_2 = _mm256_mul_pd(tmp_qloop_133,tmp_qloop_96); + const __m256d q_tmp_1_3 = _mm256_mul_pd(tmp_qloop_110,tmp_qloop_133); + const __m256d q_tmp_1_4 = _mm256_mul_pd(tmp_qloop_122,tmp_qloop_133); + const __m256d q_tmp_1_5 = _mm256_mul_pd(tmp_qloop_132,tmp_qloop_133); + const __m256d q_tmp_2_0 = _mm256_mul_pd(tmp_qloop_134,tmp_qloop_68); + const __m256d q_tmp_2_1 = _mm256_mul_pd(tmp_qloop_134,tmp_qloop_89); + const __m256d q_tmp_2_2 = _mm256_mul_pd(tmp_qloop_134,tmp_qloop_96); + const __m256d q_tmp_2_3 = _mm256_mul_pd(tmp_qloop_110,tmp_qloop_134); + const __m256d q_tmp_2_4 = _mm256_mul_pd(tmp_qloop_122,tmp_qloop_134); + const __m256d q_tmp_2_5 = _mm256_mul_pd(tmp_qloop_132,tmp_qloop_134); + const __m256d q_tmp_3_0 = _mm256_mul_pd(tmp_qloop_135,tmp_qloop_68); + const __m256d q_tmp_3_1 = _mm256_mul_pd(tmp_qloop_135,tmp_qloop_89); + const __m256d q_tmp_3_2 = _mm256_mul_pd(tmp_qloop_135,tmp_qloop_96); + const __m256d q_tmp_3_3 = _mm256_mul_pd(tmp_qloop_110,tmp_qloop_135); + const __m256d q_tmp_3_4 = _mm256_mul_pd(tmp_qloop_122,tmp_qloop_135); + const __m256d q_tmp_3_5 = _mm256_mul_pd(tmp_qloop_132,tmp_qloop_135); + const __m256d q_tmp_4_0 = _mm256_mul_pd(tmp_qloop_136,tmp_qloop_68); + const __m256d q_tmp_4_1 = _mm256_mul_pd(tmp_qloop_136,tmp_qloop_89); + const __m256d q_tmp_4_2 = _mm256_mul_pd(tmp_qloop_136,tmp_qloop_96); + const __m256d q_tmp_4_3 = _mm256_mul_pd(tmp_qloop_110,tmp_qloop_136); + const __m256d q_tmp_4_4 = _mm256_mul_pd(tmp_qloop_122,tmp_qloop_136); + const __m256d q_tmp_4_5 = _mm256_mul_pd(tmp_qloop_132,tmp_qloop_136); + const __m256d q_tmp_5_0 = _mm256_mul_pd(tmp_qloop_137,tmp_qloop_68); + const __m256d q_tmp_5_1 = _mm256_mul_pd(tmp_qloop_137,tmp_qloop_89); + const __m256d q_tmp_5_2 = _mm256_mul_pd(tmp_qloop_137,tmp_qloop_96); + const __m256d q_tmp_5_3 = _mm256_mul_pd(tmp_qloop_110,tmp_qloop_137); + const __m256d q_tmp_5_4 = _mm256_mul_pd(tmp_qloop_122,tmp_qloop_137); + const __m256d q_tmp_5_5 = _mm256_mul_pd(tmp_qloop_132,tmp_qloop_137); + q_acc_0_0 = _mm256_add_pd(q_acc_0_0,q_tmp_0_0); + q_acc_0_1 = _mm256_add_pd(q_acc_0_1,q_tmp_0_1); + q_acc_0_2 = _mm256_add_pd(q_acc_0_2,q_tmp_0_2); + q_acc_0_3 = _mm256_add_pd(q_acc_0_3,q_tmp_0_3); + q_acc_0_4 = _mm256_add_pd(q_acc_0_4,q_tmp_0_4); + q_acc_0_5 = _mm256_add_pd(q_acc_0_5,q_tmp_0_5); + q_acc_1_0 = _mm256_add_pd(q_acc_1_0,q_tmp_1_0); + q_acc_1_1 = _mm256_add_pd(q_acc_1_1,q_tmp_1_1); + q_acc_1_2 = _mm256_add_pd(q_acc_1_2,q_tmp_1_2); + q_acc_1_3 = _mm256_add_pd(q_acc_1_3,q_tmp_1_3); + q_acc_1_4 = _mm256_add_pd(q_acc_1_4,q_tmp_1_4); + q_acc_1_5 = _mm256_add_pd(q_acc_1_5,q_tmp_1_5); + q_acc_2_0 = _mm256_add_pd(q_acc_2_0,q_tmp_2_0); + q_acc_2_1 = _mm256_add_pd(q_acc_2_1,q_tmp_2_1); + q_acc_2_2 = _mm256_add_pd(q_acc_2_2,q_tmp_2_2); + q_acc_2_3 = _mm256_add_pd(q_acc_2_3,q_tmp_2_3); + q_acc_2_4 = _mm256_add_pd(q_acc_2_4,q_tmp_2_4); + q_acc_2_5 = _mm256_add_pd(q_acc_2_5,q_tmp_2_5); + q_acc_3_0 = _mm256_add_pd(q_acc_3_0,q_tmp_3_0); + q_acc_3_1 = _mm256_add_pd(q_acc_3_1,q_tmp_3_1); + q_acc_3_2 = _mm256_add_pd(q_acc_3_2,q_tmp_3_2); + q_acc_3_3 = _mm256_add_pd(q_acc_3_3,q_tmp_3_3); + q_acc_3_4 = _mm256_add_pd(q_acc_3_4,q_tmp_3_4); + q_acc_3_5 = _mm256_add_pd(q_acc_3_5,q_tmp_3_5); + q_acc_4_0 = _mm256_add_pd(q_acc_4_0,q_tmp_4_0); + q_acc_4_1 = _mm256_add_pd(q_acc_4_1,q_tmp_4_1); + q_acc_4_2 = _mm256_add_pd(q_acc_4_2,q_tmp_4_2); + q_acc_4_3 = _mm256_add_pd(q_acc_4_3,q_tmp_4_3); + q_acc_4_4 = _mm256_add_pd(q_acc_4_4,q_tmp_4_4); + q_acc_4_5 = _mm256_add_pd(q_acc_4_5,q_tmp_4_5); + q_acc_5_0 = _mm256_add_pd(q_acc_5_0,q_tmp_5_0); + q_acc_5_1 = _mm256_add_pd(q_acc_5_1,q_tmp_5_1); + q_acc_5_2 = _mm256_add_pd(q_acc_5_2,q_tmp_5_2); + q_acc_5_3 = _mm256_add_pd(q_acc_5_3,q_tmp_5_3); + q_acc_5_4 = _mm256_add_pd(q_acc_5_4,q_tmp_5_4); + q_acc_5_5 = _mm256_add_pd(q_acc_5_5,q_tmp_5_5); + } + const __m256d elMatVec_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_0_0,src_dof_0),_mm256_mul_pd(q_acc_0_1,src_dof_1)),_mm256_mul_pd(q_acc_0_2,src_dof_2)),_mm256_mul_pd(q_acc_0_3,src_dof_3)),_mm256_mul_pd(q_acc_0_4,src_dof_4)),_mm256_mul_pd(q_acc_0_5,src_dof_5)); + const __m256d elMatVec_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_1_0,src_dof_0),_mm256_mul_pd(q_acc_1_1,src_dof_1)),_mm256_mul_pd(q_acc_1_2,src_dof_2)),_mm256_mul_pd(q_acc_1_3,src_dof_3)),_mm256_mul_pd(q_acc_1_4,src_dof_4)),_mm256_mul_pd(q_acc_1_5,src_dof_5)); + const __m256d elMatVec_2 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_2_0,src_dof_0),_mm256_mul_pd(q_acc_2_1,src_dof_1)),_mm256_mul_pd(q_acc_2_2,src_dof_2)),_mm256_mul_pd(q_acc_2_3,src_dof_3)),_mm256_mul_pd(q_acc_2_4,src_dof_4)),_mm256_mul_pd(q_acc_2_5,src_dof_5)); + const __m256d elMatVec_3 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_3_0,src_dof_0),_mm256_mul_pd(q_acc_3_1,src_dof_1)),_mm256_mul_pd(q_acc_3_2,src_dof_2)),_mm256_mul_pd(q_acc_3_3,src_dof_3)),_mm256_mul_pd(q_acc_3_4,src_dof_4)),_mm256_mul_pd(q_acc_3_5,src_dof_5)); + const __m256d elMatVec_4 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_4_0,src_dof_0),_mm256_mul_pd(q_acc_4_1,src_dof_1)),_mm256_mul_pd(q_acc_4_2,src_dof_2)),_mm256_mul_pd(q_acc_4_3,src_dof_3)),_mm256_mul_pd(q_acc_4_4,src_dof_4)),_mm256_mul_pd(q_acc_4_5,src_dof_5)); + const __m256d elMatVec_5 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_5_0,src_dof_0),_mm256_mul_pd(q_acc_5_1,src_dof_1)),_mm256_mul_pd(q_acc_5_2,src_dof_2)),_mm256_mul_pd(q_acc_5_3,src_dof_3)),_mm256_mul_pd(q_acc_5_4,src_dof_4)),_mm256_mul_pd(q_acc_5_5,src_dof_5)); + _mm256_storeu_pd(&_data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))],_mm256_add_pd(elMatVec_0,_mm256_loadu_pd(& _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]))); + _mm256_storeu_pd(&_data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1],_mm256_add_pd(elMatVec_1,_mm256_loadu_pd(& _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]))); + _mm256_storeu_pd(&_data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))],_mm256_add_pd(elMatVec_2,_mm256_loadu_pd(& _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]))); + _mm256_storeu_pd(&_data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))],_mm256_add_pd(elMatVec_3,_mm256_loadu_pd(& _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]))); + _mm256_storeu_pd(&_data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))],_mm256_add_pd(elMatVec_4,_mm256_loadu_pd(& _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]))); + _mm256_storeu_pd(&_data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))],_mm256_add_pd(elMatVec_5,_mm256_loadu_pd(& _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]))); + } + for (int64_t ctr_0 = (int64_t)((-ctr_1 + micro_edges_per_macro_edge) / (4)) * (4); ctr_0 < -ctr_1 + micro_edges_per_macro_edge; ctr_0 += 1) + { + + const int64_t phantom_ctr_0 = ctr_0; + real_t _data_float_loop_ctr_array_dim_0[4]; + _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0; + _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1; + _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2; + _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3; + real_t _data_float_loop_ctr_array_dim_1[4]; + _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1; + + const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; + const real_t src_dof_0 = _data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]; + const real_t src_dof_1 = _data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + const real_t src_dof_2 = _data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t src_dof_3 = _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t src_dof_4 = _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t src_dof_5 = _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]; + const real_t diffusivity_times_delta_dof_0 = _data_diffusivity_times_deltaVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]; + const real_t diffusivity_times_delta_dof_1 = _data_diffusivity_times_deltaVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + const real_t diffusivity_times_delta_dof_2 = _data_diffusivity_times_deltaVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t diffusivity_times_delta_dof_3 = _data_diffusivity_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t diffusivity_times_delta_dof_4 = _data_diffusivity_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t diffusivity_times_delta_dof_5 = _data_diffusivity_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]; + const real_t wx_dof_0 = _data_wxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]; + const real_t wx_dof_1 = _data_wxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + const real_t wx_dof_2 = _data_wxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t wx_dof_3 = _data_wxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t wx_dof_4 = _data_wxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t wx_dof_5 = _data_wxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]; + const real_t wy_dof_0 = _data_wyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]; + const real_t wy_dof_1 = _data_wyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + const real_t wy_dof_2 = _data_wyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t wy_dof_3 = _data_wyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t wy_dof_4 = _data_wyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t wy_dof_5 = _data_wyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]; + real_t q_acc_0_0 = 0.0; + real_t q_acc_0_1 = 0.0; + real_t q_acc_0_2 = 0.0; + real_t q_acc_0_3 = 0.0; + real_t q_acc_0_4 = 0.0; + real_t q_acc_0_5 = 0.0; + real_t q_acc_1_0 = 0.0; + real_t q_acc_1_1 = 0.0; + real_t q_acc_1_2 = 0.0; + real_t q_acc_1_3 = 0.0; + real_t q_acc_1_4 = 0.0; + real_t q_acc_1_5 = 0.0; + real_t q_acc_2_0 = 0.0; + real_t q_acc_2_1 = 0.0; + real_t q_acc_2_2 = 0.0; + real_t q_acc_2_3 = 0.0; + real_t q_acc_2_4 = 0.0; + real_t q_acc_2_5 = 0.0; + real_t q_acc_3_0 = 0.0; + real_t q_acc_3_1 = 0.0; + real_t q_acc_3_2 = 0.0; + real_t q_acc_3_3 = 0.0; + real_t q_acc_3_4 = 0.0; + real_t q_acc_3_5 = 0.0; + real_t q_acc_4_0 = 0.0; + real_t q_acc_4_1 = 0.0; + real_t q_acc_4_2 = 0.0; + real_t q_acc_4_3 = 0.0; + real_t q_acc_4_4 = 0.0; + real_t q_acc_4_5 = 0.0; + real_t q_acc_5_0 = 0.0; + real_t q_acc_5_1 = 0.0; + real_t q_acc_5_2 = 0.0; + real_t q_acc_5_3 = 0.0; + real_t q_acc_5_4 = 0.0; + real_t q_acc_5_5 = 0.0; + for (int64_t q = 0; q < 4; q += 1) + { + const real_t tmp_qloop_0 = p_affine_0_0 + (-p_affine_0_0 + p_affine_1_0)*_data_q_p_0[q] + (-p_affine_0_0 + p_affine_2_0)*_data_q_p_1[q]; + const real_t tmp_qloop_2 = (tmp_qloop_0*tmp_qloop_0); + const real_t tmp_qloop_3 = p_affine_0_1 + (-p_affine_0_1 + p_affine_1_1)*_data_q_p_0[q] + (-p_affine_0_1 + p_affine_2_1)*_data_q_p_1[q]; + const real_t tmp_qloop_4 = (tmp_qloop_3*tmp_qloop_3); + const real_t tmp_qloop_5 = tmp_qloop_2 + tmp_qloop_4; + const real_t tmp_qloop_6 = pow(tmp_qloop_5, -0.50000000000000000); + const real_t tmp_qloop_13 = tmp_qloop_12*tmp_qloop_6; + const real_t tmp_qloop_14 = tmp_qloop_1*tmp_qloop_13; + const real_t tmp_qloop_15 = pow(tmp_qloop_5, -1.5000000000000000); + const real_t tmp_qloop_16 = radRayVertex + tmp_qloop_11*(tmp_qloop_1*(-rayVertex_0 + tmp_qloop_0) - tmp_qloop_8*(-rayVertex_1 + tmp_qloop_3)); + const real_t tmp_qloop_17 = tmp_qloop_15*tmp_qloop_16; + const real_t tmp_qloop_18 = tmp_qloop_17*1.0; + const real_t tmp_qloop_19 = tmp_qloop_13*tmp_qloop_8; + const real_t tmp_qloop_20 = tmp_qloop_0*tmp_qloop_18; + const real_t tmp_qloop_23 = tmp_qloop_12*tmp_qloop_15; + const real_t tmp_qloop_24 = tmp_qloop_1*tmp_qloop_23; + const real_t tmp_qloop_25 = tmp_qloop_2*tmp_qloop_24; + const real_t tmp_qloop_26 = tmp_qloop_16*pow(tmp_qloop_5, -2.5000000000000000)*3.0; + const real_t tmp_qloop_27 = tmp_qloop_0*tmp_qloop_26*tmp_qloop_4; + const real_t tmp_qloop_28 = tmp_qloop_24*tmp_qloop_4 - tmp_qloop_27; + const real_t tmp_qloop_29 = tmp_qloop_23*tmp_qloop_7; + const real_t tmp_qloop_30 = tmp_qloop_0*tmp_qloop_3; + const real_t tmp_qloop_31 = tmp_qloop_24*tmp_qloop_30; + const real_t tmp_qloop_32 = tmp_qloop_17*2.0; + const real_t tmp_qloop_33 = tmp_qloop_2*tmp_qloop_26*tmp_qloop_3; + const real_t tmp_qloop_34 = tmp_qloop_18*tmp_qloop_3 - tmp_qloop_33; + const real_t tmp_qloop_35 = tmp_qloop_29*tmp_qloop_30; + const real_t tmp_qloop_36 = tmp_qloop_23*tmp_qloop_30*tmp_qloop_8; + const real_t tmp_qloop_47 = 4.0*_data_q_p_0[q]; + const real_t tmp_qloop_48 = 4.0*_data_q_p_1[q]; + const real_t tmp_qloop_49 = tmp_qloop_47 + tmp_qloop_48 - 3.0; + const real_t tmp_qloop_50 = jac_affine_inv_0_0_GRAY*tmp_qloop_49 + jac_affine_inv_1_0_GRAY*tmp_qloop_49; + const real_t tmp_qloop_54 = jac_affine_inv_0_1_GRAY*tmp_qloop_49 + jac_affine_inv_1_1_GRAY*tmp_qloop_49; + const real_t tmp_qloop_69 = tmp_qloop_47*_data_q_p_1[q]; + const real_t tmp_qloop_70 = (_data_q_p_0[q]*_data_q_p_0[q]); + const real_t tmp_qloop_71 = tmp_qloop_70*2.0; + const real_t tmp_qloop_72 = tmp_qloop_71 - _data_q_p_0[q]; + const real_t tmp_qloop_73 = (_data_q_p_1[q]*_data_q_p_1[q]); + const real_t tmp_qloop_74 = tmp_qloop_73*2.0; + const real_t tmp_qloop_75 = tmp_qloop_74 - _data_q_p_1[q]; + const real_t tmp_qloop_76 = tmp_qloop_48 - tmp_qloop_69 + tmp_qloop_73*-4.0; + const real_t tmp_qloop_77 = tmp_qloop_47 - tmp_qloop_69 + tmp_qloop_70*-4.0; + const real_t tmp_qloop_78 = tmp_qloop_69 + tmp_qloop_71 + tmp_qloop_74 - 3.0*_data_q_p_0[q] - 3.0*_data_q_p_1[q] + 1.0; + const real_t tmp_qloop_79 = tmp_qloop_69*wx_dof_3 + tmp_qloop_72*wx_dof_1 + tmp_qloop_75*wx_dof_2 + tmp_qloop_76*wx_dof_4 + tmp_qloop_77*wx_dof_5 + tmp_qloop_78*wx_dof_0; + const real_t tmp_qloop_80 = tmp_qloop_69*wy_dof_3 + tmp_qloop_72*wy_dof_1 + tmp_qloop_75*wy_dof_2 + tmp_qloop_76*wy_dof_4 + tmp_qloop_77*wy_dof_5 + tmp_qloop_78*wy_dof_0; + const real_t tmp_qloop_86 = tmp_qloop_47 - 1.0; + const real_t tmp_qloop_87 = jac_affine_inv_0_0_GRAY*tmp_qloop_86; + const real_t tmp_qloop_88 = jac_affine_inv_0_1_GRAY*tmp_qloop_86; + const real_t tmp_qloop_93 = tmp_qloop_48 - 1.0; + const real_t tmp_qloop_94 = jac_affine_inv_1_0_GRAY*tmp_qloop_93; + const real_t tmp_qloop_95 = jac_affine_inv_1_1_GRAY*tmp_qloop_93; + const real_t tmp_qloop_104 = jac_affine_inv_1_0_GRAY*tmp_qloop_47; + const real_t tmp_qloop_105 = jac_affine_inv_0_0_GRAY*tmp_qloop_48; + const real_t tmp_qloop_106 = tmp_qloop_104 + tmp_qloop_105; + const real_t tmp_qloop_107 = jac_affine_inv_1_1_GRAY*tmp_qloop_47; + const real_t tmp_qloop_108 = jac_affine_inv_0_1_GRAY*tmp_qloop_48; + const real_t tmp_qloop_109 = tmp_qloop_107 + tmp_qloop_108; + const real_t tmp_qloop_119 = -tmp_qloop_47 - 8.0*_data_q_p_1[q] + 4.0; + const real_t tmp_qloop_120 = jac_affine_inv_1_0_GRAY*tmp_qloop_119 - tmp_qloop_105; + const real_t tmp_qloop_121 = jac_affine_inv_1_1_GRAY*tmp_qloop_119 - tmp_qloop_108; + const real_t tmp_qloop_129 = -tmp_qloop_48 - 8.0*_data_q_p_0[q] + 4.0; + const real_t tmp_qloop_130 = jac_affine_inv_0_0_GRAY*tmp_qloop_129 - tmp_qloop_104; + const real_t tmp_qloop_131 = jac_affine_inv_0_1_GRAY*tmp_qloop_129 - tmp_qloop_107; + const real_t jac_blending_0_0 = tmp_qloop_0*tmp_qloop_14 + tmp_qloop_18*tmp_qloop_4; + const real_t jac_blending_0_1 = -tmp_qloop_0*tmp_qloop_15*tmp_qloop_16*tmp_qloop_3 - tmp_qloop_0*tmp_qloop_19; + const real_t jac_blending_1_0 = tmp_qloop_14*tmp_qloop_3 - tmp_qloop_20*tmp_qloop_3; + const real_t jac_blending_1_1 = tmp_qloop_15*tmp_qloop_16*tmp_qloop_2*1.0 - tmp_qloop_19*tmp_qloop_3; + const real_t tmp_qloop_21 = jac_blending_0_0*jac_blending_1_1 - jac_blending_0_1*jac_blending_1_0; + const real_t tmp_qloop_22 = 1.0 / (tmp_qloop_21); + const real_t abs_det_jac_blending = tmp_qloop_21; + const real_t tmp_qloop_81 = abs_det_jac_affine_GRAY*abs_det_jac_blending*(diffusivity_times_delta_dof_0*tmp_qloop_78 + diffusivity_times_delta_dof_1*tmp_qloop_72 + diffusivity_times_delta_dof_2*tmp_qloop_75 + diffusivity_times_delta_dof_3*tmp_qloop_69 + diffusivity_times_delta_dof_4*tmp_qloop_76 + diffusivity_times_delta_dof_5*tmp_qloop_77)*_data_q_w[q]; + const real_t jac_blending_inv_0_0 = jac_blending_1_1*tmp_qloop_22; + const real_t jac_blending_inv_0_1 = -jac_blending_0_1*tmp_qloop_22; + const real_t jac_blending_inv_1_0 = -jac_blending_1_0*tmp_qloop_22; + const real_t jac_blending_inv_1_1 = jac_blending_0_0*tmp_qloop_22; + const real_t tmp_qloop_82 = tmp_qloop_81*(tmp_qloop_79*(jac_blending_inv_0_0*tmp_qloop_50 + jac_blending_inv_1_0*tmp_qloop_54) + tmp_qloop_80*(jac_blending_inv_0_1*tmp_qloop_50 + jac_blending_inv_1_1*tmp_qloop_54)); + const real_t tmp_qloop_133 = tmp_qloop_81*(tmp_qloop_79*(jac_blending_inv_0_0*tmp_qloop_87 + jac_blending_inv_1_0*tmp_qloop_88) + tmp_qloop_80*(jac_blending_inv_0_1*tmp_qloop_87 + jac_blending_inv_1_1*tmp_qloop_88)); + const real_t tmp_qloop_134 = tmp_qloop_81*(tmp_qloop_79*(jac_blending_inv_0_0*tmp_qloop_94 + jac_blending_inv_1_0*tmp_qloop_95) + tmp_qloop_80*(jac_blending_inv_0_1*tmp_qloop_94 + jac_blending_inv_1_1*tmp_qloop_95)); + const real_t tmp_qloop_135 = tmp_qloop_81*(tmp_qloop_79*(jac_blending_inv_0_0*tmp_qloop_106 + jac_blending_inv_1_0*tmp_qloop_109) + tmp_qloop_80*(jac_blending_inv_0_1*tmp_qloop_106 + jac_blending_inv_1_1*tmp_qloop_109)); + const real_t tmp_qloop_136 = tmp_qloop_81*(tmp_qloop_79*(jac_blending_inv_0_0*tmp_qloop_120 + jac_blending_inv_1_0*tmp_qloop_121) + tmp_qloop_80*(jac_blending_inv_0_1*tmp_qloop_120 + jac_blending_inv_1_1*tmp_qloop_121)); + const real_t tmp_qloop_137 = tmp_qloop_81*(tmp_qloop_79*(jac_blending_inv_0_0*tmp_qloop_130 + jac_blending_inv_1_0*tmp_qloop_131) + tmp_qloop_80*(jac_blending_inv_0_1*tmp_qloop_130 + jac_blending_inv_1_1*tmp_qloop_131)); + const real_t hessian_blending_0_0_0 = tmp_qloop_14 - tmp_qloop_25 + tmp_qloop_28; + const real_t hessian_blending_1_0_0 = -tmp_qloop_26*(tmp_qloop_3*tmp_qloop_3*tmp_qloop_3) + tmp_qloop_29*tmp_qloop_4 + tmp_qloop_3*tmp_qloop_32 - tmp_qloop_31; + const real_t hessian_blending_0_0_1 = tmp_qloop_1*tmp_qloop_11*tmp_qloop_15*tmp_qloop_30*-2.0 - tmp_qloop_34; + const real_t hessian_blending_1_0_1 = tmp_qloop_1*tmp_qloop_10*tmp_qloop_6*tmp_qloop_9*1.0 - tmp_qloop_20 - tmp_qloop_28 - tmp_qloop_35; + const real_t hessian_blending_0_1_0 = tmp_qloop_10*tmp_qloop_15*tmp_qloop_2*tmp_qloop_8*tmp_qloop_9*1.0 - tmp_qloop_19 - tmp_qloop_31 - tmp_qloop_34; + const real_t tmp_qloop_51 = -hessian_blending_0_0_0*jac_blending_inv_0_0 - hessian_blending_0_1_0*jac_blending_inv_1_0; + const real_t tmp_qloop_60 = -hessian_blending_0_0_0*jac_blending_inv_0_1 - hessian_blending_0_1_0*jac_blending_inv_1_1; + const real_t hessian_blending_1_1_0 = -tmp_qloop_20 + tmp_qloop_27 - tmp_qloop_35 + tmp_qloop_36; + const real_t tmp_qloop_56 = -hessian_blending_1_0_0*jac_blending_inv_0_0 - hessian_blending_1_1_0*jac_blending_inv_1_0; + const real_t tmp_qloop_64 = -hessian_blending_1_0_0*jac_blending_inv_0_1 - hessian_blending_1_1_0*jac_blending_inv_1_1; + const real_t hessian_blending_0_1_1 = tmp_qloop_0*tmp_qloop_32 - (tmp_qloop_0*tmp_qloop_0*tmp_qloop_0)*tmp_qloop_26 + tmp_qloop_25 + tmp_qloop_36; + const real_t tmp_qloop_52 = -hessian_blending_0_0_1*jac_blending_inv_0_0 - hessian_blending_0_1_1*jac_blending_inv_1_0; + const real_t tmp_qloop_53 = jac_blending_inv_0_0*tmp_qloop_51 + jac_blending_inv_0_1*tmp_qloop_52; + const real_t tmp_qloop_55 = jac_blending_inv_1_0*tmp_qloop_51 + jac_blending_inv_1_1*tmp_qloop_52; + const real_t tmp_qloop_61 = -hessian_blending_0_0_1*jac_blending_inv_0_1 - hessian_blending_0_1_1*jac_blending_inv_1_1; + const real_t tmp_qloop_62 = jac_blending_inv_0_0*tmp_qloop_60 + jac_blending_inv_0_1*tmp_qloop_61; + const real_t tmp_qloop_63 = jac_blending_inv_1_0*tmp_qloop_60 + jac_blending_inv_1_1*tmp_qloop_61; + const real_t hessian_blending_1_1_1 = tmp_qloop_10*tmp_qloop_15*tmp_qloop_2*tmp_qloop_7*tmp_qloop_9*1.0 + tmp_qloop_10*tmp_qloop_15*tmp_qloop_4*tmp_qloop_8*tmp_qloop_9*1.0 - tmp_qloop_19 - tmp_qloop_33; + const real_t tmp_qloop_57 = -hessian_blending_1_0_1*jac_blending_inv_0_0 - hessian_blending_1_1_1*jac_blending_inv_1_0; + const real_t tmp_qloop_58 = jac_blending_inv_0_0*tmp_qloop_56 + jac_blending_inv_0_1*tmp_qloop_57; + const real_t tmp_qloop_59 = jac_blending_inv_1_0*tmp_qloop_56 + jac_blending_inv_1_1*tmp_qloop_57; + const real_t tmp_qloop_65 = -hessian_blending_1_0_1*jac_blending_inv_0_1 - hessian_blending_1_1_1*jac_blending_inv_1_1; + const real_t tmp_qloop_66 = jac_blending_inv_0_0*tmp_qloop_64 + jac_blending_inv_0_1*tmp_qloop_65; + const real_t tmp_qloop_67 = jac_blending_inv_1_0*tmp_qloop_64 + jac_blending_inv_1_1*tmp_qloop_65; + const real_t tmp_qloop_68 = jac_blending_inv_0_0*(jac_blending_inv_0_0*tmp_qloop_40 + jac_blending_inv_1_0*tmp_qloop_44) + jac_blending_inv_0_0*(tmp_qloop_50*tmp_qloop_53 + tmp_qloop_54*tmp_qloop_55) + jac_blending_inv_0_1*(jac_blending_inv_0_1*tmp_qloop_40 + jac_blending_inv_1_1*tmp_qloop_44) + jac_blending_inv_0_1*(tmp_qloop_50*tmp_qloop_58 + tmp_qloop_54*tmp_qloop_59) + jac_blending_inv_1_0*(jac_blending_inv_0_0*tmp_qloop_45 + jac_blending_inv_1_0*tmp_qloop_46) + jac_blending_inv_1_0*(tmp_qloop_50*tmp_qloop_62 + tmp_qloop_54*tmp_qloop_63) + jac_blending_inv_1_1*(jac_blending_inv_0_1*tmp_qloop_45 + jac_blending_inv_1_1*tmp_qloop_46) + jac_blending_inv_1_1*(tmp_qloop_50*tmp_qloop_66 + tmp_qloop_54*tmp_qloop_67); + const real_t tmp_qloop_89 = jac_blending_inv_0_0*(jac_blending_inv_0_0*tmp_qloop_84 + jac_blending_inv_1_0*tmp_qloop_83) + jac_blending_inv_0_0*(tmp_qloop_53*tmp_qloop_87 + tmp_qloop_55*tmp_qloop_88) + jac_blending_inv_0_1*(jac_blending_inv_0_1*tmp_qloop_84 + jac_blending_inv_1_1*tmp_qloop_83) + jac_blending_inv_0_1*(tmp_qloop_58*tmp_qloop_87 + tmp_qloop_59*tmp_qloop_88) + jac_blending_inv_1_0*(jac_blending_inv_0_0*tmp_qloop_83 + jac_blending_inv_1_0*tmp_qloop_85) + jac_blending_inv_1_0*(tmp_qloop_62*tmp_qloop_87 + tmp_qloop_63*tmp_qloop_88) + jac_blending_inv_1_1*(jac_blending_inv_0_1*tmp_qloop_83 + jac_blending_inv_1_1*tmp_qloop_85) + jac_blending_inv_1_1*(tmp_qloop_66*tmp_qloop_87 + tmp_qloop_67*tmp_qloop_88); + const real_t tmp_qloop_96 = jac_blending_inv_0_0*(jac_blending_inv_0_0*tmp_qloop_91 + jac_blending_inv_1_0*tmp_qloop_90) + jac_blending_inv_0_0*(tmp_qloop_53*tmp_qloop_94 + tmp_qloop_55*tmp_qloop_95) + jac_blending_inv_0_1*(jac_blending_inv_0_1*tmp_qloop_91 + jac_blending_inv_1_1*tmp_qloop_90) + jac_blending_inv_0_1*(tmp_qloop_58*tmp_qloop_94 + tmp_qloop_59*tmp_qloop_95) + jac_blending_inv_1_0*(jac_blending_inv_0_0*tmp_qloop_90 + jac_blending_inv_1_0*tmp_qloop_92) + jac_blending_inv_1_0*(tmp_qloop_62*tmp_qloop_94 + tmp_qloop_63*tmp_qloop_95) + jac_blending_inv_1_1*(jac_blending_inv_0_1*tmp_qloop_90 + jac_blending_inv_1_1*tmp_qloop_92) + jac_blending_inv_1_1*(tmp_qloop_66*tmp_qloop_94 + tmp_qloop_67*tmp_qloop_95); + const real_t tmp_qloop_110 = jac_blending_inv_0_0*(jac_blending_inv_0_0*tmp_qloop_98 + jac_blending_inv_1_0*tmp_qloop_101) + jac_blending_inv_0_0*(tmp_qloop_106*tmp_qloop_53 + tmp_qloop_109*tmp_qloop_55) + jac_blending_inv_0_1*(jac_blending_inv_0_1*tmp_qloop_98 + jac_blending_inv_1_1*tmp_qloop_101) + jac_blending_inv_0_1*(tmp_qloop_106*tmp_qloop_58 + tmp_qloop_109*tmp_qloop_59) + jac_blending_inv_1_0*(jac_blending_inv_0_0*tmp_qloop_101 + jac_blending_inv_1_0*tmp_qloop_103) + jac_blending_inv_1_0*(tmp_qloop_106*tmp_qloop_62 + tmp_qloop_109*tmp_qloop_63) + jac_blending_inv_1_1*(jac_blending_inv_0_1*tmp_qloop_101 + jac_blending_inv_1_1*tmp_qloop_103) + jac_blending_inv_1_1*(tmp_qloop_106*tmp_qloop_66 + tmp_qloop_109*tmp_qloop_67); + const real_t tmp_qloop_122 = jac_blending_inv_0_0*(jac_blending_inv_0_0*tmp_qloop_113 + jac_blending_inv_1_0*tmp_qloop_115) + jac_blending_inv_0_0*(tmp_qloop_120*tmp_qloop_53 + tmp_qloop_121*tmp_qloop_55) + jac_blending_inv_0_1*(jac_blending_inv_0_1*tmp_qloop_113 + jac_blending_inv_1_1*tmp_qloop_115) + jac_blending_inv_0_1*(tmp_qloop_120*tmp_qloop_58 + tmp_qloop_121*tmp_qloop_59) + jac_blending_inv_1_0*(jac_blending_inv_0_0*tmp_qloop_116 + jac_blending_inv_1_0*tmp_qloop_118) + jac_blending_inv_1_0*(tmp_qloop_120*tmp_qloop_62 + tmp_qloop_121*tmp_qloop_63) + jac_blending_inv_1_1*(jac_blending_inv_0_1*tmp_qloop_116 + jac_blending_inv_1_1*tmp_qloop_118) + jac_blending_inv_1_1*(tmp_qloop_120*tmp_qloop_66 + tmp_qloop_121*tmp_qloop_67); + const real_t tmp_qloop_132 = jac_blending_inv_0_0*(jac_blending_inv_0_0*tmp_qloop_124 + jac_blending_inv_1_0*tmp_qloop_126) + jac_blending_inv_0_0*(tmp_qloop_130*tmp_qloop_53 + tmp_qloop_131*tmp_qloop_55) + jac_blending_inv_0_1*(jac_blending_inv_0_1*tmp_qloop_124 + jac_blending_inv_1_1*tmp_qloop_126) + jac_blending_inv_0_1*(tmp_qloop_130*tmp_qloop_58 + tmp_qloop_131*tmp_qloop_59) + jac_blending_inv_1_0*(jac_blending_inv_0_0*tmp_qloop_127 + jac_blending_inv_1_0*tmp_qloop_128) + jac_blending_inv_1_0*(tmp_qloop_130*tmp_qloop_62 + tmp_qloop_131*tmp_qloop_63) + jac_blending_inv_1_1*(jac_blending_inv_0_1*tmp_qloop_127 + jac_blending_inv_1_1*tmp_qloop_128) + jac_blending_inv_1_1*(tmp_qloop_130*tmp_qloop_66 + tmp_qloop_131*tmp_qloop_67); + const real_t q_tmp_0_0 = tmp_qloop_68*tmp_qloop_82; + const real_t q_tmp_0_1 = tmp_qloop_82*tmp_qloop_89; + const real_t q_tmp_0_2 = tmp_qloop_82*tmp_qloop_96; + const real_t q_tmp_0_3 = tmp_qloop_110*tmp_qloop_82; + const real_t q_tmp_0_4 = tmp_qloop_122*tmp_qloop_82; + const real_t q_tmp_0_5 = tmp_qloop_132*tmp_qloop_82; + const real_t q_tmp_1_0 = tmp_qloop_133*tmp_qloop_68; + const real_t q_tmp_1_1 = tmp_qloop_133*tmp_qloop_89; + const real_t q_tmp_1_2 = tmp_qloop_133*tmp_qloop_96; + const real_t q_tmp_1_3 = tmp_qloop_110*tmp_qloop_133; + const real_t q_tmp_1_4 = tmp_qloop_122*tmp_qloop_133; + const real_t q_tmp_1_5 = tmp_qloop_132*tmp_qloop_133; + const real_t q_tmp_2_0 = tmp_qloop_134*tmp_qloop_68; + const real_t q_tmp_2_1 = tmp_qloop_134*tmp_qloop_89; + const real_t q_tmp_2_2 = tmp_qloop_134*tmp_qloop_96; + const real_t q_tmp_2_3 = tmp_qloop_110*tmp_qloop_134; + const real_t q_tmp_2_4 = tmp_qloop_122*tmp_qloop_134; + const real_t q_tmp_2_5 = tmp_qloop_132*tmp_qloop_134; + const real_t q_tmp_3_0 = tmp_qloop_135*tmp_qloop_68; + const real_t q_tmp_3_1 = tmp_qloop_135*tmp_qloop_89; + const real_t q_tmp_3_2 = tmp_qloop_135*tmp_qloop_96; + const real_t q_tmp_3_3 = tmp_qloop_110*tmp_qloop_135; + const real_t q_tmp_3_4 = tmp_qloop_122*tmp_qloop_135; + const real_t q_tmp_3_5 = tmp_qloop_132*tmp_qloop_135; + const real_t q_tmp_4_0 = tmp_qloop_136*tmp_qloop_68; + const real_t q_tmp_4_1 = tmp_qloop_136*tmp_qloop_89; + const real_t q_tmp_4_2 = tmp_qloop_136*tmp_qloop_96; + const real_t q_tmp_4_3 = tmp_qloop_110*tmp_qloop_136; + const real_t q_tmp_4_4 = tmp_qloop_122*tmp_qloop_136; + const real_t q_tmp_4_5 = tmp_qloop_132*tmp_qloop_136; + const real_t q_tmp_5_0 = tmp_qloop_137*tmp_qloop_68; + const real_t q_tmp_5_1 = tmp_qloop_137*tmp_qloop_89; + const real_t q_tmp_5_2 = tmp_qloop_137*tmp_qloop_96; + const real_t q_tmp_5_3 = tmp_qloop_110*tmp_qloop_137; + const real_t q_tmp_5_4 = tmp_qloop_122*tmp_qloop_137; + const real_t q_tmp_5_5 = tmp_qloop_132*tmp_qloop_137; + q_acc_0_0 = q_acc_0_0 + q_tmp_0_0; + q_acc_0_1 = q_acc_0_1 + q_tmp_0_1; + q_acc_0_2 = q_acc_0_2 + q_tmp_0_2; + q_acc_0_3 = q_acc_0_3 + q_tmp_0_3; + q_acc_0_4 = q_acc_0_4 + q_tmp_0_4; + q_acc_0_5 = q_acc_0_5 + q_tmp_0_5; + q_acc_1_0 = q_acc_1_0 + q_tmp_1_0; + q_acc_1_1 = q_acc_1_1 + q_tmp_1_1; + q_acc_1_2 = q_acc_1_2 + q_tmp_1_2; + q_acc_1_3 = q_acc_1_3 + q_tmp_1_3; + q_acc_1_4 = q_acc_1_4 + q_tmp_1_4; + q_acc_1_5 = q_acc_1_5 + q_tmp_1_5; + q_acc_2_0 = q_acc_2_0 + q_tmp_2_0; + q_acc_2_1 = q_acc_2_1 + q_tmp_2_1; + q_acc_2_2 = q_acc_2_2 + q_tmp_2_2; + q_acc_2_3 = q_acc_2_3 + q_tmp_2_3; + q_acc_2_4 = q_acc_2_4 + q_tmp_2_4; + q_acc_2_5 = q_acc_2_5 + q_tmp_2_5; + q_acc_3_0 = q_acc_3_0 + q_tmp_3_0; + q_acc_3_1 = q_acc_3_1 + q_tmp_3_1; + q_acc_3_2 = q_acc_3_2 + q_tmp_3_2; + q_acc_3_3 = q_acc_3_3 + q_tmp_3_3; + q_acc_3_4 = q_acc_3_4 + q_tmp_3_4; + q_acc_3_5 = q_acc_3_5 + q_tmp_3_5; + q_acc_4_0 = q_acc_4_0 + q_tmp_4_0; + q_acc_4_1 = q_acc_4_1 + q_tmp_4_1; + q_acc_4_2 = q_acc_4_2 + q_tmp_4_2; + q_acc_4_3 = q_acc_4_3 + q_tmp_4_3; + q_acc_4_4 = q_acc_4_4 + q_tmp_4_4; + q_acc_4_5 = q_acc_4_5 + q_tmp_4_5; + q_acc_5_0 = q_acc_5_0 + q_tmp_5_0; + q_acc_5_1 = q_acc_5_1 + q_tmp_5_1; + q_acc_5_2 = q_acc_5_2 + q_tmp_5_2; + q_acc_5_3 = q_acc_5_3 + q_tmp_5_3; + q_acc_5_4 = q_acc_5_4 + q_tmp_5_4; + q_acc_5_5 = q_acc_5_5 + q_tmp_5_5; + } + const real_t elMatVec_0 = q_acc_0_0*src_dof_0 + q_acc_0_1*src_dof_1 + q_acc_0_2*src_dof_2 + q_acc_0_3*src_dof_3 + q_acc_0_4*src_dof_4 + q_acc_0_5*src_dof_5; + const real_t elMatVec_1 = q_acc_1_0*src_dof_0 + q_acc_1_1*src_dof_1 + q_acc_1_2*src_dof_2 + q_acc_1_3*src_dof_3 + q_acc_1_4*src_dof_4 + q_acc_1_5*src_dof_5; + const real_t elMatVec_2 = q_acc_2_0*src_dof_0 + q_acc_2_1*src_dof_1 + q_acc_2_2*src_dof_2 + q_acc_2_3*src_dof_3 + q_acc_2_4*src_dof_4 + q_acc_2_5*src_dof_5; + const real_t elMatVec_3 = q_acc_3_0*src_dof_0 + q_acc_3_1*src_dof_1 + q_acc_3_2*src_dof_2 + q_acc_3_3*src_dof_3 + q_acc_3_4*src_dof_4 + q_acc_3_5*src_dof_5; + const real_t elMatVec_4 = q_acc_4_0*src_dof_0 + q_acc_4_1*src_dof_1 + q_acc_4_2*src_dof_2 + q_acc_4_3*src_dof_3 + q_acc_4_4*src_dof_4 + q_acc_4_5*src_dof_5; + const real_t elMatVec_5 = q_acc_5_0*src_dof_0 + q_acc_5_1*src_dof_1 + q_acc_5_2*src_dof_2 + q_acc_5_3*src_dof_3 + q_acc_5_4*src_dof_4 + q_acc_5_5*src_dof_5; + _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))] = elMatVec_0 + _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]; + _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1] = elMatVec_1 + _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatVec_2 + _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))] = elMatVec_3 + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))] = elMatVec_4 + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))] = elMatVec_5 + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]; + } + } + } + const real_t tmp_coords_jac_0_BLUE = 1.0 / (micro_edges_per_macro_edge_float)*1.0; + const real_t tmp_coords_jac_1_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0); + const real_t tmp_coords_jac_2_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1); + const real_t tmp_coords_jac_3_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0); + const real_t tmp_coords_jac_4_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1); + const real_t p_affine_const_0_0_BLUE = tmp_coords_jac_1_BLUE; + const real_t p_affine_const_0_1_BLUE = tmp_coords_jac_2_BLUE; + const real_t p_affine_const_1_0_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_3_BLUE; + const real_t p_affine_const_1_1_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_4_BLUE; + const real_t p_affine_const_2_0_BLUE = tmp_coords_jac_1_BLUE + tmp_coords_jac_3_BLUE; + const real_t p_affine_const_2_1_BLUE = tmp_coords_jac_2_BLUE + tmp_coords_jac_4_BLUE; + const real_t jac_affine_0_0_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_1_0_BLUE; + const real_t jac_affine_0_1_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_2_0_BLUE; + const real_t jac_affine_1_0_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_1_1_BLUE; + const real_t jac_affine_1_1_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_2_1_BLUE; + const real_t tmp_coords_jac_5_BLUE = jac_affine_0_0_BLUE*jac_affine_1_1_BLUE - jac_affine_0_1_BLUE*jac_affine_1_0_BLUE; + const real_t tmp_coords_jac_6_BLUE = 1.0 / (tmp_coords_jac_5_BLUE); + const real_t jac_affine_inv_0_0_BLUE = jac_affine_1_1_BLUE*tmp_coords_jac_6_BLUE; + const real_t jac_affine_inv_0_1_BLUE = -jac_affine_0_1_BLUE*tmp_coords_jac_6_BLUE; + const real_t jac_affine_inv_1_0_BLUE = -jac_affine_1_0_BLUE*tmp_coords_jac_6_BLUE; + const real_t jac_affine_inv_1_1_BLUE = jac_affine_0_0_BLUE*tmp_coords_jac_6_BLUE; + const real_t abs_det_jac_affine_BLUE = abs(tmp_coords_jac_5_BLUE); + const real_t tmp_moved_constant_0 = jac_affine_inv_0_0_BLUE*4.0; + const real_t tmp_moved_constant_1 = jac_affine_inv_1_0_BLUE*4.0; + const real_t tmp_moved_constant_2 = tmp_moved_constant_0 + tmp_moved_constant_1; + const real_t tmp_moved_constant_3 = jac_affine_inv_0_0_BLUE*tmp_moved_constant_2 + jac_affine_inv_1_0_BLUE*tmp_moved_constant_2; + const real_t tmp_moved_constant_4 = jac_affine_inv_0_1_BLUE*4.0; + const real_t tmp_moved_constant_5 = jac_affine_inv_1_1_BLUE*4.0; + const real_t tmp_moved_constant_6 = tmp_moved_constant_4 + tmp_moved_constant_5; + const real_t tmp_moved_constant_7 = jac_affine_inv_0_0_BLUE*tmp_moved_constant_6 + jac_affine_inv_1_0_BLUE*tmp_moved_constant_6; + const real_t tmp_moved_constant_8 = jac_affine_inv_0_1_BLUE*tmp_moved_constant_2 + jac_affine_inv_1_1_BLUE*tmp_moved_constant_2; + const real_t tmp_moved_constant_9 = jac_affine_inv_0_1_BLUE*tmp_moved_constant_6 + jac_affine_inv_1_1_BLUE*tmp_moved_constant_6; + const real_t tmp_moved_constant_10 = jac_affine_inv_0_1_BLUE*tmp_moved_constant_0; + const real_t tmp_moved_constant_11 = (jac_affine_inv_0_0_BLUE*jac_affine_inv_0_0_BLUE)*4.0; + const real_t tmp_moved_constant_12 = (jac_affine_inv_0_1_BLUE*jac_affine_inv_0_1_BLUE)*4.0; + const real_t tmp_moved_constant_13 = jac_affine_inv_1_1_BLUE*tmp_moved_constant_1; + const real_t tmp_moved_constant_14 = (jac_affine_inv_1_0_BLUE*jac_affine_inv_1_0_BLUE)*4.0; + const real_t tmp_moved_constant_15 = (jac_affine_inv_1_1_BLUE*jac_affine_inv_1_1_BLUE)*4.0; + const real_t tmp_moved_constant_16 = jac_affine_inv_1_0_BLUE*8.0; + const real_t tmp_moved_constant_17 = jac_affine_inv_0_0_BLUE*tmp_moved_constant_16; + const real_t tmp_moved_constant_18 = jac_affine_inv_1_1_BLUE*tmp_moved_constant_0; + const real_t tmp_moved_constant_19 = jac_affine_inv_0_1_BLUE*tmp_moved_constant_1; + const real_t tmp_moved_constant_20 = tmp_moved_constant_18 + tmp_moved_constant_19; + const real_t tmp_moved_constant_21 = jac_affine_inv_1_1_BLUE*8.0; + const real_t tmp_moved_constant_22 = jac_affine_inv_0_1_BLUE*tmp_moved_constant_21; + const real_t tmp_moved_constant_23 = jac_affine_inv_1_0_BLUE*tmp_moved_constant_0; + const real_t tmp_moved_constant_24 = -tmp_moved_constant_0 - tmp_moved_constant_16; + const real_t tmp_moved_constant_25 = jac_affine_inv_1_0_BLUE*tmp_moved_constant_24 - tmp_moved_constant_23; + const real_t tmp_moved_constant_26 = -tmp_moved_constant_21 - tmp_moved_constant_4; + const real_t tmp_moved_constant_27 = jac_affine_inv_1_0_BLUE*tmp_moved_constant_26 - tmp_moved_constant_18; + const real_t tmp_moved_constant_28 = jac_affine_inv_1_1_BLUE*tmp_moved_constant_24 - tmp_moved_constant_19; + const real_t tmp_moved_constant_29 = jac_affine_inv_1_1_BLUE*tmp_moved_constant_4; + const real_t tmp_moved_constant_30 = jac_affine_inv_1_1_BLUE*tmp_moved_constant_26 - tmp_moved_constant_29; + const real_t tmp_moved_constant_31 = jac_affine_inv_0_0_BLUE*-8.0 - tmp_moved_constant_1; + const real_t tmp_moved_constant_32 = jac_affine_inv_0_0_BLUE*tmp_moved_constant_31 - tmp_moved_constant_23; + const real_t tmp_moved_constant_33 = jac_affine_inv_0_1_BLUE*-8.0 - tmp_moved_constant_5; + const real_t tmp_moved_constant_34 = jac_affine_inv_0_0_BLUE*tmp_moved_constant_33 - tmp_moved_constant_19; + const real_t tmp_moved_constant_35 = jac_affine_inv_0_1_BLUE*tmp_moved_constant_31 - tmp_moved_constant_18; + const real_t tmp_moved_constant_36 = jac_affine_inv_0_1_BLUE*tmp_moved_constant_33 - tmp_moved_constant_29; + { + /* FaceType.BLUE */ + for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1) + { + for (int64_t ctr_0 = 0; ctr_0 < (int64_t)((-ctr_1 + micro_edges_per_macro_edge - 1) / (4)) * (4); ctr_0 += 4) + { + + const int64_t phantom_ctr_0 = ctr_0; + real_t _data_float_loop_ctr_array_dim_0[4]; + _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0; + _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1; + _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2; + _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3; + real_t _data_float_loop_ctr_array_dim_1[4]; + _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1; + + const __m256d p_affine_0_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)); + const __m256d p_affine_0_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)); + const __m256d p_affine_1_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)); + const __m256d p_affine_1_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)); + const __m256d p_affine_2_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)); + const __m256d p_affine_2_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)); + const __m256d src_dof_0 = _mm256_loadu_pd(& _data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]); + const __m256d src_dof_1 = _mm256_loadu_pd(& _data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]); + const __m256d src_dof_2 = _mm256_loadu_pd(& _data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]); + const __m256d src_dof_3 = _mm256_loadu_pd(& _data_srcEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]); + const __m256d src_dof_4 = _mm256_loadu_pd(& _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]); + const __m256d src_dof_5 = _mm256_loadu_pd(& _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]); + const __m256d diffusivity_times_delta_dof_0 = _mm256_loadu_pd(& _data_diffusivity_times_deltaVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]); + const __m256d diffusivity_times_delta_dof_1 = _mm256_loadu_pd(& _data_diffusivity_times_deltaVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]); + const __m256d diffusivity_times_delta_dof_2 = _mm256_loadu_pd(& _data_diffusivity_times_deltaVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]); + const __m256d diffusivity_times_delta_dof_3 = _mm256_loadu_pd(& _data_diffusivity_times_deltaEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]); + const __m256d diffusivity_times_delta_dof_4 = _mm256_loadu_pd(& _data_diffusivity_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]); + const __m256d diffusivity_times_delta_dof_5 = _mm256_loadu_pd(& _data_diffusivity_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]); + const __m256d wx_dof_0 = _mm256_loadu_pd(& _data_wxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]); + const __m256d wx_dof_1 = _mm256_loadu_pd(& _data_wxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]); + const __m256d wx_dof_2 = _mm256_loadu_pd(& _data_wxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]); + const __m256d wx_dof_3 = _mm256_loadu_pd(& _data_wxEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]); + const __m256d wx_dof_4 = _mm256_loadu_pd(& _data_wxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]); + const __m256d wx_dof_5 = _mm256_loadu_pd(& _data_wxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]); + const __m256d wy_dof_0 = _mm256_loadu_pd(& _data_wyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]); + const __m256d wy_dof_1 = _mm256_loadu_pd(& _data_wyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]); + const __m256d wy_dof_2 = _mm256_loadu_pd(& _data_wyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]); + const __m256d wy_dof_3 = _mm256_loadu_pd(& _data_wyEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]); + const __m256d wy_dof_4 = _mm256_loadu_pd(& _data_wyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]); + const __m256d wy_dof_5 = _mm256_loadu_pd(& _data_wyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]); + __m256d q_acc_0_0 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_1 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_2 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_3 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_4 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_5 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_0 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_1 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_2 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_3 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_4 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_5 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_0 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_1 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_2 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_3 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_4 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_5 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_0 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_1 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_2 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_3 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_4 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_5 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_4_0 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_4_1 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_4_2 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_4_3 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_4_4 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_4_5 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_5_0 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_5_1 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_5_2 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_5_3 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_5_4 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_5_5 = _mm256_set_pd(0.0,0.0,0.0,0.0); + for (int64_t q = 0; q < 4; q += 1) + { + const __m256d tmp_qloop_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_1_0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_2_0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),p_affine_0_0); + const __m256d tmp_qloop_2 = _mm256_mul_pd(tmp_qloop_0,tmp_qloop_0); + const __m256d tmp_qloop_3 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_1_1),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_2_1),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),p_affine_0_1); + const __m256d tmp_qloop_4 = _mm256_mul_pd(tmp_qloop_3,tmp_qloop_3); + const __m256d tmp_qloop_5 = _mm256_add_pd(tmp_qloop_2,tmp_qloop_4); + const __m256d tmp_qloop_6 = _mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_sqrt_pd(tmp_qloop_5)); + const __m256d tmp_qloop_13 = _mm256_mul_pd(tmp_qloop_6,_mm256_set_pd(tmp_qloop_12,tmp_qloop_12,tmp_qloop_12,tmp_qloop_12)); + const __m256d tmp_qloop_14 = _mm256_mul_pd(tmp_qloop_13,_mm256_set_pd(tmp_qloop_1,tmp_qloop_1,tmp_qloop_1,tmp_qloop_1)); + const __m256d tmp_qloop_15 = _mm256_div_pd(_mm256_sqrt_pd(tmp_qloop_5),_mm256_mul_pd(tmp_qloop_5,tmp_qloop_5)); + const __m256d tmp_qloop_16 = _mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(rayVertex_0,rayVertex_0,rayVertex_0,rayVertex_0)),tmp_qloop_0),_mm256_set_pd(tmp_qloop_1,tmp_qloop_1,tmp_qloop_1,tmp_qloop_1)),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(rayVertex_1,rayVertex_1,rayVertex_1,rayVertex_1)),tmp_qloop_3),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_8,tmp_qloop_8,tmp_qloop_8,tmp_qloop_8))),_mm256_set_pd(tmp_qloop_11,tmp_qloop_11,tmp_qloop_11,tmp_qloop_11)),_mm256_set_pd(radRayVertex,radRayVertex,radRayVertex,radRayVertex)); + const __m256d tmp_qloop_17 = _mm256_mul_pd(tmp_qloop_15,tmp_qloop_16); + const __m256d tmp_qloop_18 = _mm256_mul_pd(tmp_qloop_17,_mm256_set_pd(1.0,1.0,1.0,1.0)); + const __m256d tmp_qloop_19 = _mm256_mul_pd(tmp_qloop_13,_mm256_set_pd(tmp_qloop_8,tmp_qloop_8,tmp_qloop_8,tmp_qloop_8)); + const __m256d tmp_qloop_20 = _mm256_mul_pd(tmp_qloop_0,tmp_qloop_18); + const __m256d tmp_qloop_23 = _mm256_mul_pd(tmp_qloop_15,_mm256_set_pd(tmp_qloop_12,tmp_qloop_12,tmp_qloop_12,tmp_qloop_12)); + const __m256d tmp_qloop_24 = _mm256_mul_pd(tmp_qloop_23,_mm256_set_pd(tmp_qloop_1,tmp_qloop_1,tmp_qloop_1,tmp_qloop_1)); + const __m256d tmp_qloop_25 = _mm256_mul_pd(tmp_qloop_2,tmp_qloop_24); + const __m256d tmp_qloop_26 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_16,_mm256_div_pd(_mm256_sqrt_pd(tmp_qloop_5),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_5,tmp_qloop_5),tmp_qloop_5))),_mm256_set_pd(3.0,3.0,3.0,3.0)); + const __m256d tmp_qloop_27 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_0,tmp_qloop_26),tmp_qloop_4); + const __m256d tmp_qloop_28 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_27,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_24,tmp_qloop_4)); + const __m256d tmp_qloop_29 = _mm256_mul_pd(tmp_qloop_23,_mm256_set_pd(tmp_qloop_7,tmp_qloop_7,tmp_qloop_7,tmp_qloop_7)); + const __m256d tmp_qloop_30 = _mm256_mul_pd(tmp_qloop_0,tmp_qloop_3); + const __m256d tmp_qloop_31 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_30); + const __m256d tmp_qloop_32 = _mm256_mul_pd(tmp_qloop_17,_mm256_set_pd(2.0,2.0,2.0,2.0)); + const __m256d tmp_qloop_33 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_2,tmp_qloop_26),tmp_qloop_3); + const __m256d tmp_qloop_34 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_33,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_18,tmp_qloop_3)); + const __m256d tmp_qloop_35 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_30); + const __m256d tmp_qloop_36 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_23,tmp_qloop_30),_mm256_set_pd(tmp_qloop_8,tmp_qloop_8,tmp_qloop_8,tmp_qloop_8)); + const __m256d tmp_qloop_47 = _mm256_mul_pd(_mm256_set_pd(4.0,4.0,4.0,4.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])); + const __m256d tmp_qloop_48 = _mm256_mul_pd(_mm256_set_pd(4.0,4.0,4.0,4.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])); + const __m256d tmp_qloop_49 = _mm256_add_pd(_mm256_add_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),tmp_qloop_47),tmp_qloop_48); + const __m256d tmp_qloop_50 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_49,_mm256_set_pd(jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE)),_mm256_mul_pd(tmp_qloop_49,_mm256_set_pd(jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE))); + const __m256d tmp_qloop_54 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_49,_mm256_set_pd(jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE)),_mm256_mul_pd(tmp_qloop_49,_mm256_set_pd(jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE))); + const __m256d tmp_qloop_69 = _mm256_mul_pd(tmp_qloop_47,_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])); + const __m256d tmp_qloop_70 = _mm256_mul_pd(_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])); + const __m256d tmp_qloop_71 = _mm256_mul_pd(tmp_qloop_70,_mm256_set_pd(2.0,2.0,2.0,2.0)); + const __m256d tmp_qloop_72 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),tmp_qloop_71); + const __m256d tmp_qloop_73 = _mm256_mul_pd(_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])); + const __m256d tmp_qloop_74 = _mm256_mul_pd(tmp_qloop_73,_mm256_set_pd(2.0,2.0,2.0,2.0)); + const __m256d tmp_qloop_75 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])),tmp_qloop_74); + const __m256d tmp_qloop_76 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_73,_mm256_set_pd(-4.0,-4.0,-4.0,-4.0)),_mm256_mul_pd(tmp_qloop_69,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_48); + const __m256d tmp_qloop_77 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_70,_mm256_set_pd(-4.0,-4.0,-4.0,-4.0)),_mm256_mul_pd(tmp_qloop_69,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_47); + const __m256d tmp_qloop_78 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),tmp_qloop_69),tmp_qloop_71),tmp_qloop_74); + const __m256d tmp_qloop_79 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_69,wx_dof_3),_mm256_mul_pd(tmp_qloop_72,wx_dof_1)),_mm256_mul_pd(tmp_qloop_75,wx_dof_2)),_mm256_mul_pd(tmp_qloop_76,wx_dof_4)),_mm256_mul_pd(tmp_qloop_77,wx_dof_5)),_mm256_mul_pd(tmp_qloop_78,wx_dof_0)); + const __m256d tmp_qloop_80 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_69,wy_dof_3),_mm256_mul_pd(tmp_qloop_72,wy_dof_1)),_mm256_mul_pd(tmp_qloop_75,wy_dof_2)),_mm256_mul_pd(tmp_qloop_76,wy_dof_4)),_mm256_mul_pd(tmp_qloop_77,wy_dof_5)),_mm256_mul_pd(tmp_qloop_78,wy_dof_0)); + const __m256d tmp_qloop_86 = _mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),tmp_qloop_47); + const __m256d tmp_qloop_87 = _mm256_mul_pd(tmp_qloop_86,_mm256_set_pd(jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE)); + const __m256d tmp_qloop_88 = _mm256_mul_pd(tmp_qloop_86,_mm256_set_pd(jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE)); + const __m256d tmp_qloop_93 = _mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),tmp_qloop_48); + const __m256d tmp_qloop_94 = _mm256_mul_pd(tmp_qloop_93,_mm256_set_pd(jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE)); + const __m256d tmp_qloop_95 = _mm256_mul_pd(tmp_qloop_93,_mm256_set_pd(jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE)); + const __m256d tmp_qloop_104 = _mm256_mul_pd(tmp_qloop_47,_mm256_set_pd(jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE)); + const __m256d tmp_qloop_105 = _mm256_mul_pd(tmp_qloop_48,_mm256_set_pd(jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE)); + const __m256d tmp_qloop_106 = _mm256_add_pd(tmp_qloop_104,tmp_qloop_105); + const __m256d tmp_qloop_107 = _mm256_mul_pd(tmp_qloop_47,_mm256_set_pd(jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE)); + const __m256d tmp_qloop_108 = _mm256_mul_pd(tmp_qloop_48,_mm256_set_pd(jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE)); + const __m256d tmp_qloop_109 = _mm256_add_pd(tmp_qloop_107,tmp_qloop_108); + const __m256d tmp_qloop_119 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-8.0,-8.0,-8.0,-8.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])),_mm256_mul_pd(tmp_qloop_47,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_set_pd(4.0,4.0,4.0,4.0)); + const __m256d tmp_qloop_120 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_105,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_119,_mm256_set_pd(jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE))); + const __m256d tmp_qloop_121 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_108,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_119,_mm256_set_pd(jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE))); + const __m256d tmp_qloop_129 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-8.0,-8.0,-8.0,-8.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(tmp_qloop_48,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_set_pd(4.0,4.0,4.0,4.0)); + const __m256d tmp_qloop_130 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_104,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_129,_mm256_set_pd(jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE))); + const __m256d tmp_qloop_131 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_107,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_129,_mm256_set_pd(jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE))); + const __m256d jac_blending_0_0 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_0,tmp_qloop_14),_mm256_mul_pd(tmp_qloop_18,tmp_qloop_4)); + const __m256d jac_blending_0_1 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_0,tmp_qloop_19),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_0,tmp_qloop_15),tmp_qloop_16),tmp_qloop_3),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))); + const __m256d jac_blending_1_0 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_14,tmp_qloop_3),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_20,tmp_qloop_3),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))); + const __m256d jac_blending_1_1 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_19,tmp_qloop_3),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_15,tmp_qloop_16),tmp_qloop_2),_mm256_set_pd(1.0,1.0,1.0,1.0))); + const __m256d tmp_qloop_21 = _mm256_add_pd(_mm256_mul_pd(jac_blending_0_0,jac_blending_1_1),_mm256_mul_pd(_mm256_mul_pd(jac_blending_0_1,jac_blending_1_0),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))); + const __m256d tmp_qloop_22 = _mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),tmp_qloop_21); + const __m256d abs_det_jac_blending = tmp_qloop_21; + const __m256d tmp_qloop_81 = _mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(abs_det_jac_blending,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(diffusivity_times_delta_dof_0,tmp_qloop_78),_mm256_mul_pd(diffusivity_times_delta_dof_1,tmp_qloop_72)),_mm256_mul_pd(diffusivity_times_delta_dof_2,tmp_qloop_75)),_mm256_mul_pd(diffusivity_times_delta_dof_3,tmp_qloop_69)),_mm256_mul_pd(diffusivity_times_delta_dof_4,tmp_qloop_76)),_mm256_mul_pd(diffusivity_times_delta_dof_5,tmp_qloop_77))),_mm256_set_pd(_data_q_w[q],_data_q_w[q],_data_q_w[q],_data_q_w[q])),_mm256_set_pd(abs_det_jac_affine_BLUE,abs_det_jac_affine_BLUE,abs_det_jac_affine_BLUE,abs_det_jac_affine_BLUE)); + const __m256d jac_blending_inv_0_0 = _mm256_mul_pd(jac_blending_1_1,tmp_qloop_22); + const __m256d jac_blending_inv_0_1 = _mm256_mul_pd(_mm256_mul_pd(jac_blending_0_1,tmp_qloop_22),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)); + const __m256d jac_blending_inv_1_0 = _mm256_mul_pd(_mm256_mul_pd(jac_blending_1_0,tmp_qloop_22),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)); + const __m256d jac_blending_inv_1_1 = _mm256_mul_pd(jac_blending_0_0,tmp_qloop_22); + const __m256d tmp_qloop_82 = _mm256_mul_pd(tmp_qloop_81,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_79,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,tmp_qloop_50),_mm256_mul_pd(jac_blending_inv_1_0,tmp_qloop_54))),_mm256_mul_pd(tmp_qloop_80,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,tmp_qloop_50),_mm256_mul_pd(jac_blending_inv_1_1,tmp_qloop_54))))); + const __m256d tmp_qloop_133 = _mm256_mul_pd(tmp_qloop_81,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_79,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,tmp_qloop_87),_mm256_mul_pd(jac_blending_inv_1_0,tmp_qloop_88))),_mm256_mul_pd(tmp_qloop_80,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,tmp_qloop_87),_mm256_mul_pd(jac_blending_inv_1_1,tmp_qloop_88))))); + const __m256d tmp_qloop_134 = _mm256_mul_pd(tmp_qloop_81,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_79,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,tmp_qloop_94),_mm256_mul_pd(jac_blending_inv_1_0,tmp_qloop_95))),_mm256_mul_pd(tmp_qloop_80,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,tmp_qloop_94),_mm256_mul_pd(jac_blending_inv_1_1,tmp_qloop_95))))); + const __m256d tmp_qloop_135 = _mm256_mul_pd(tmp_qloop_81,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_79,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,tmp_qloop_106),_mm256_mul_pd(jac_blending_inv_1_0,tmp_qloop_109))),_mm256_mul_pd(tmp_qloop_80,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,tmp_qloop_106),_mm256_mul_pd(jac_blending_inv_1_1,tmp_qloop_109))))); + const __m256d tmp_qloop_136 = _mm256_mul_pd(tmp_qloop_81,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_79,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,tmp_qloop_120),_mm256_mul_pd(jac_blending_inv_1_0,tmp_qloop_121))),_mm256_mul_pd(tmp_qloop_80,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,tmp_qloop_120),_mm256_mul_pd(jac_blending_inv_1_1,tmp_qloop_121))))); + const __m256d tmp_qloop_137 = _mm256_mul_pd(tmp_qloop_81,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_79,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,tmp_qloop_130),_mm256_mul_pd(jac_blending_inv_1_0,tmp_qloop_131))),_mm256_mul_pd(tmp_qloop_80,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,tmp_qloop_130),_mm256_mul_pd(jac_blending_inv_1_1,tmp_qloop_131))))); + const __m256d hessian_blending_0_0_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_25,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),tmp_qloop_14),tmp_qloop_28); + const __m256d hessian_blending_1_0_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_31,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_29,tmp_qloop_4)),_mm256_mul_pd(tmp_qloop_3,tmp_qloop_32)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_26,_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_3,tmp_qloop_3),tmp_qloop_3)),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))); + const __m256d hessian_blending_0_0_1 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_34,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_15,tmp_qloop_30),_mm256_set_pd(-2.0,-2.0,-2.0,-2.0)),_mm256_set_pd(tmp_qloop_1,tmp_qloop_1,tmp_qloop_1,tmp_qloop_1)),_mm256_set_pd(tmp_qloop_11,tmp_qloop_11,tmp_qloop_11,tmp_qloop_11))); + const __m256d hessian_blending_1_0_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_20,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_28,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(tmp_qloop_35,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_6,_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_1,tmp_qloop_1,tmp_qloop_1,tmp_qloop_1)),_mm256_set_pd(tmp_qloop_10,tmp_qloop_10,tmp_qloop_10,tmp_qloop_10)),_mm256_set_pd(tmp_qloop_9,tmp_qloop_9,tmp_qloop_9,tmp_qloop_9))); + const __m256d hessian_blending_0_1_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_19,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_31,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(tmp_qloop_34,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_15,tmp_qloop_2),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_10,tmp_qloop_10,tmp_qloop_10,tmp_qloop_10)),_mm256_set_pd(tmp_qloop_8,tmp_qloop_8,tmp_qloop_8,tmp_qloop_8)),_mm256_set_pd(tmp_qloop_9,tmp_qloop_9,tmp_qloop_9,tmp_qloop_9))); + const __m256d tmp_qloop_51 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(hessian_blending_0_0_0,jac_blending_inv_0_0),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(hessian_blending_0_1_0,jac_blending_inv_1_0),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))); + const __m256d tmp_qloop_60 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(hessian_blending_0_0_0,jac_blending_inv_0_1),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(hessian_blending_0_1_0,jac_blending_inv_1_1),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))); + const __m256d hessian_blending_1_1_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_20,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_35,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_27),tmp_qloop_36); + const __m256d tmp_qloop_56 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(hessian_blending_1_0_0,jac_blending_inv_0_0),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(hessian_blending_1_1_0,jac_blending_inv_1_0),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))); + const __m256d tmp_qloop_64 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(hessian_blending_1_0_0,jac_blending_inv_0_1),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(hessian_blending_1_1_0,jac_blending_inv_1_1),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))); + const __m256d hessian_blending_0_1_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_0,tmp_qloop_32),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_0,tmp_qloop_0),tmp_qloop_0),tmp_qloop_26),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_25),tmp_qloop_36); + const __m256d tmp_qloop_52 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(hessian_blending_0_0_1,jac_blending_inv_0_0),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(hessian_blending_0_1_1,jac_blending_inv_1_0),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))); + const __m256d tmp_qloop_53 = _mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,tmp_qloop_51),_mm256_mul_pd(jac_blending_inv_0_1,tmp_qloop_52)); + const __m256d tmp_qloop_55 = _mm256_add_pd(_mm256_mul_pd(jac_blending_inv_1_0,tmp_qloop_51),_mm256_mul_pd(jac_blending_inv_1_1,tmp_qloop_52)); + const __m256d tmp_qloop_61 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(hessian_blending_0_0_1,jac_blending_inv_0_1),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(hessian_blending_0_1_1,jac_blending_inv_1_1),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))); + const __m256d tmp_qloop_62 = _mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,tmp_qloop_60),_mm256_mul_pd(jac_blending_inv_0_1,tmp_qloop_61)); + const __m256d tmp_qloop_63 = _mm256_add_pd(_mm256_mul_pd(jac_blending_inv_1_0,tmp_qloop_60),_mm256_mul_pd(jac_blending_inv_1_1,tmp_qloop_61)); + const __m256d hessian_blending_1_1_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_19,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_33,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_15,tmp_qloop_2),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_10,tmp_qloop_10,tmp_qloop_10,tmp_qloop_10)),_mm256_set_pd(tmp_qloop_7,tmp_qloop_7,tmp_qloop_7,tmp_qloop_7)),_mm256_set_pd(tmp_qloop_9,tmp_qloop_9,tmp_qloop_9,tmp_qloop_9))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_15,tmp_qloop_4),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_10,tmp_qloop_10,tmp_qloop_10,tmp_qloop_10)),_mm256_set_pd(tmp_qloop_8,tmp_qloop_8,tmp_qloop_8,tmp_qloop_8)),_mm256_set_pd(tmp_qloop_9,tmp_qloop_9,tmp_qloop_9,tmp_qloop_9))); + const __m256d tmp_qloop_57 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(hessian_blending_1_0_1,jac_blending_inv_0_0),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(hessian_blending_1_1_1,jac_blending_inv_1_0),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))); + const __m256d tmp_qloop_58 = _mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,tmp_qloop_56),_mm256_mul_pd(jac_blending_inv_0_1,tmp_qloop_57)); + const __m256d tmp_qloop_59 = _mm256_add_pd(_mm256_mul_pd(jac_blending_inv_1_0,tmp_qloop_56),_mm256_mul_pd(jac_blending_inv_1_1,tmp_qloop_57)); + const __m256d tmp_qloop_65 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(hessian_blending_1_0_1,jac_blending_inv_0_1),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(hessian_blending_1_1_1,jac_blending_inv_1_1),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))); + const __m256d tmp_qloop_66 = _mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,tmp_qloop_64),_mm256_mul_pd(jac_blending_inv_0_1,tmp_qloop_65)); + const __m256d tmp_qloop_67 = _mm256_add_pd(_mm256_mul_pd(jac_blending_inv_1_0,tmp_qloop_64),_mm256_mul_pd(jac_blending_inv_1_1,tmp_qloop_65)); + const __m256d tmp_qloop_68 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,_mm256_set_pd(tmp_moved_constant_3,tmp_moved_constant_3,tmp_moved_constant_3,tmp_moved_constant_3)),_mm256_mul_pd(jac_blending_inv_1_0,_mm256_set_pd(tmp_moved_constant_7,tmp_moved_constant_7,tmp_moved_constant_7,tmp_moved_constant_7)))),_mm256_mul_pd(jac_blending_inv_0_1,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,_mm256_set_pd(tmp_moved_constant_3,tmp_moved_constant_3,tmp_moved_constant_3,tmp_moved_constant_3)),_mm256_mul_pd(jac_blending_inv_1_1,_mm256_set_pd(tmp_moved_constant_7,tmp_moved_constant_7,tmp_moved_constant_7,tmp_moved_constant_7))))),_mm256_mul_pd(jac_blending_inv_1_0,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,_mm256_set_pd(tmp_moved_constant_8,tmp_moved_constant_8,tmp_moved_constant_8,tmp_moved_constant_8)),_mm256_mul_pd(jac_blending_inv_1_0,_mm256_set_pd(tmp_moved_constant_9,tmp_moved_constant_9,tmp_moved_constant_9,tmp_moved_constant_9))))),_mm256_mul_pd(jac_blending_inv_1_1,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,_mm256_set_pd(tmp_moved_constant_8,tmp_moved_constant_8,tmp_moved_constant_8,tmp_moved_constant_8)),_mm256_mul_pd(jac_blending_inv_1_1,_mm256_set_pd(tmp_moved_constant_9,tmp_moved_constant_9,tmp_moved_constant_9,tmp_moved_constant_9))))),_mm256_mul_pd(jac_blending_inv_0_0,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_50,tmp_qloop_53),_mm256_mul_pd(tmp_qloop_54,tmp_qloop_55)))),_mm256_mul_pd(jac_blending_inv_0_1,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_50,tmp_qloop_58),_mm256_mul_pd(tmp_qloop_54,tmp_qloop_59)))),_mm256_mul_pd(jac_blending_inv_1_0,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_50,tmp_qloop_62),_mm256_mul_pd(tmp_qloop_54,tmp_qloop_63)))),_mm256_mul_pd(jac_blending_inv_1_1,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_50,tmp_qloop_66),_mm256_mul_pd(tmp_qloop_54,tmp_qloop_67)))); + const __m256d tmp_qloop_89 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_1_0,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,_mm256_set_pd(tmp_moved_constant_10,tmp_moved_constant_10,tmp_moved_constant_10,tmp_moved_constant_10)),_mm256_mul_pd(jac_blending_inv_1_0,_mm256_set_pd(tmp_moved_constant_12,tmp_moved_constant_12,tmp_moved_constant_12,tmp_moved_constant_12)))),_mm256_mul_pd(jac_blending_inv_1_1,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,_mm256_set_pd(tmp_moved_constant_10,tmp_moved_constant_10,tmp_moved_constant_10,tmp_moved_constant_10)),_mm256_mul_pd(jac_blending_inv_1_1,_mm256_set_pd(tmp_moved_constant_12,tmp_moved_constant_12,tmp_moved_constant_12,tmp_moved_constant_12))))),_mm256_mul_pd(jac_blending_inv_0_0,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_1_0,_mm256_set_pd(tmp_moved_constant_10,tmp_moved_constant_10,tmp_moved_constant_10,tmp_moved_constant_10)),_mm256_mul_pd(jac_blending_inv_0_0,_mm256_set_pd(tmp_moved_constant_11,tmp_moved_constant_11,tmp_moved_constant_11,tmp_moved_constant_11))))),_mm256_mul_pd(jac_blending_inv_0_1,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_1_1,_mm256_set_pd(tmp_moved_constant_10,tmp_moved_constant_10,tmp_moved_constant_10,tmp_moved_constant_10)),_mm256_mul_pd(jac_blending_inv_0_1,_mm256_set_pd(tmp_moved_constant_11,tmp_moved_constant_11,tmp_moved_constant_11,tmp_moved_constant_11))))),_mm256_mul_pd(jac_blending_inv_0_0,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_53,tmp_qloop_87),_mm256_mul_pd(tmp_qloop_55,tmp_qloop_88)))),_mm256_mul_pd(jac_blending_inv_0_1,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_58,tmp_qloop_87),_mm256_mul_pd(tmp_qloop_59,tmp_qloop_88)))),_mm256_mul_pd(jac_blending_inv_1_0,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_62,tmp_qloop_87),_mm256_mul_pd(tmp_qloop_63,tmp_qloop_88)))),_mm256_mul_pd(jac_blending_inv_1_1,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_66,tmp_qloop_87),_mm256_mul_pd(tmp_qloop_67,tmp_qloop_88)))); + const __m256d tmp_qloop_96 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_1_0,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,_mm256_set_pd(tmp_moved_constant_13,tmp_moved_constant_13,tmp_moved_constant_13,tmp_moved_constant_13)),_mm256_mul_pd(jac_blending_inv_1_0,_mm256_set_pd(tmp_moved_constant_15,tmp_moved_constant_15,tmp_moved_constant_15,tmp_moved_constant_15)))),_mm256_mul_pd(jac_blending_inv_1_1,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,_mm256_set_pd(tmp_moved_constant_13,tmp_moved_constant_13,tmp_moved_constant_13,tmp_moved_constant_13)),_mm256_mul_pd(jac_blending_inv_1_1,_mm256_set_pd(tmp_moved_constant_15,tmp_moved_constant_15,tmp_moved_constant_15,tmp_moved_constant_15))))),_mm256_mul_pd(jac_blending_inv_0_0,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_1_0,_mm256_set_pd(tmp_moved_constant_13,tmp_moved_constant_13,tmp_moved_constant_13,tmp_moved_constant_13)),_mm256_mul_pd(jac_blending_inv_0_0,_mm256_set_pd(tmp_moved_constant_14,tmp_moved_constant_14,tmp_moved_constant_14,tmp_moved_constant_14))))),_mm256_mul_pd(jac_blending_inv_0_1,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_1_1,_mm256_set_pd(tmp_moved_constant_13,tmp_moved_constant_13,tmp_moved_constant_13,tmp_moved_constant_13)),_mm256_mul_pd(jac_blending_inv_0_1,_mm256_set_pd(tmp_moved_constant_14,tmp_moved_constant_14,tmp_moved_constant_14,tmp_moved_constant_14))))),_mm256_mul_pd(jac_blending_inv_0_0,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_53,tmp_qloop_94),_mm256_mul_pd(tmp_qloop_55,tmp_qloop_95)))),_mm256_mul_pd(jac_blending_inv_0_1,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_58,tmp_qloop_94),_mm256_mul_pd(tmp_qloop_59,tmp_qloop_95)))),_mm256_mul_pd(jac_blending_inv_1_0,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_62,tmp_qloop_94),_mm256_mul_pd(tmp_qloop_63,tmp_qloop_95)))),_mm256_mul_pd(jac_blending_inv_1_1,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_66,tmp_qloop_94),_mm256_mul_pd(tmp_qloop_67,tmp_qloop_95)))); + const __m256d tmp_qloop_110 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,_mm256_set_pd(tmp_moved_constant_17,tmp_moved_constant_17,tmp_moved_constant_17,tmp_moved_constant_17)),_mm256_mul_pd(jac_blending_inv_1_0,_mm256_set_pd(tmp_moved_constant_20,tmp_moved_constant_20,tmp_moved_constant_20,tmp_moved_constant_20)))),_mm256_mul_pd(jac_blending_inv_0_1,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,_mm256_set_pd(tmp_moved_constant_17,tmp_moved_constant_17,tmp_moved_constant_17,tmp_moved_constant_17)),_mm256_mul_pd(jac_blending_inv_1_1,_mm256_set_pd(tmp_moved_constant_20,tmp_moved_constant_20,tmp_moved_constant_20,tmp_moved_constant_20))))),_mm256_mul_pd(jac_blending_inv_1_0,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,_mm256_set_pd(tmp_moved_constant_20,tmp_moved_constant_20,tmp_moved_constant_20,tmp_moved_constant_20)),_mm256_mul_pd(jac_blending_inv_1_0,_mm256_set_pd(tmp_moved_constant_22,tmp_moved_constant_22,tmp_moved_constant_22,tmp_moved_constant_22))))),_mm256_mul_pd(jac_blending_inv_1_1,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,_mm256_set_pd(tmp_moved_constant_20,tmp_moved_constant_20,tmp_moved_constant_20,tmp_moved_constant_20)),_mm256_mul_pd(jac_blending_inv_1_1,_mm256_set_pd(tmp_moved_constant_22,tmp_moved_constant_22,tmp_moved_constant_22,tmp_moved_constant_22))))),_mm256_mul_pd(jac_blending_inv_0_0,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_106,tmp_qloop_53),_mm256_mul_pd(tmp_qloop_109,tmp_qloop_55)))),_mm256_mul_pd(jac_blending_inv_0_1,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_106,tmp_qloop_58),_mm256_mul_pd(tmp_qloop_109,tmp_qloop_59)))),_mm256_mul_pd(jac_blending_inv_1_0,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_106,tmp_qloop_62),_mm256_mul_pd(tmp_qloop_109,tmp_qloop_63)))),_mm256_mul_pd(jac_blending_inv_1_1,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_106,tmp_qloop_66),_mm256_mul_pd(tmp_qloop_109,tmp_qloop_67)))); + const __m256d tmp_qloop_122 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,_mm256_set_pd(tmp_moved_constant_25,tmp_moved_constant_25,tmp_moved_constant_25,tmp_moved_constant_25)),_mm256_mul_pd(jac_blending_inv_1_0,_mm256_set_pd(tmp_moved_constant_27,tmp_moved_constant_27,tmp_moved_constant_27,tmp_moved_constant_27)))),_mm256_mul_pd(jac_blending_inv_0_1,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,_mm256_set_pd(tmp_moved_constant_25,tmp_moved_constant_25,tmp_moved_constant_25,tmp_moved_constant_25)),_mm256_mul_pd(jac_blending_inv_1_1,_mm256_set_pd(tmp_moved_constant_27,tmp_moved_constant_27,tmp_moved_constant_27,tmp_moved_constant_27))))),_mm256_mul_pd(jac_blending_inv_1_0,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,_mm256_set_pd(tmp_moved_constant_28,tmp_moved_constant_28,tmp_moved_constant_28,tmp_moved_constant_28)),_mm256_mul_pd(jac_blending_inv_1_0,_mm256_set_pd(tmp_moved_constant_30,tmp_moved_constant_30,tmp_moved_constant_30,tmp_moved_constant_30))))),_mm256_mul_pd(jac_blending_inv_1_1,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,_mm256_set_pd(tmp_moved_constant_28,tmp_moved_constant_28,tmp_moved_constant_28,tmp_moved_constant_28)),_mm256_mul_pd(jac_blending_inv_1_1,_mm256_set_pd(tmp_moved_constant_30,tmp_moved_constant_30,tmp_moved_constant_30,tmp_moved_constant_30))))),_mm256_mul_pd(jac_blending_inv_0_0,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_120,tmp_qloop_53),_mm256_mul_pd(tmp_qloop_121,tmp_qloop_55)))),_mm256_mul_pd(jac_blending_inv_0_1,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_120,tmp_qloop_58),_mm256_mul_pd(tmp_qloop_121,tmp_qloop_59)))),_mm256_mul_pd(jac_blending_inv_1_0,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_120,tmp_qloop_62),_mm256_mul_pd(tmp_qloop_121,tmp_qloop_63)))),_mm256_mul_pd(jac_blending_inv_1_1,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_120,tmp_qloop_66),_mm256_mul_pd(tmp_qloop_121,tmp_qloop_67)))); + const __m256d tmp_qloop_132 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,_mm256_set_pd(tmp_moved_constant_32,tmp_moved_constant_32,tmp_moved_constant_32,tmp_moved_constant_32)),_mm256_mul_pd(jac_blending_inv_1_0,_mm256_set_pd(tmp_moved_constant_34,tmp_moved_constant_34,tmp_moved_constant_34,tmp_moved_constant_34)))),_mm256_mul_pd(jac_blending_inv_0_1,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,_mm256_set_pd(tmp_moved_constant_32,tmp_moved_constant_32,tmp_moved_constant_32,tmp_moved_constant_32)),_mm256_mul_pd(jac_blending_inv_1_1,_mm256_set_pd(tmp_moved_constant_34,tmp_moved_constant_34,tmp_moved_constant_34,tmp_moved_constant_34))))),_mm256_mul_pd(jac_blending_inv_1_0,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,_mm256_set_pd(tmp_moved_constant_35,tmp_moved_constant_35,tmp_moved_constant_35,tmp_moved_constant_35)),_mm256_mul_pd(jac_blending_inv_1_0,_mm256_set_pd(tmp_moved_constant_36,tmp_moved_constant_36,tmp_moved_constant_36,tmp_moved_constant_36))))),_mm256_mul_pd(jac_blending_inv_1_1,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,_mm256_set_pd(tmp_moved_constant_35,tmp_moved_constant_35,tmp_moved_constant_35,tmp_moved_constant_35)),_mm256_mul_pd(jac_blending_inv_1_1,_mm256_set_pd(tmp_moved_constant_36,tmp_moved_constant_36,tmp_moved_constant_36,tmp_moved_constant_36))))),_mm256_mul_pd(jac_blending_inv_0_0,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_130,tmp_qloop_53),_mm256_mul_pd(tmp_qloop_131,tmp_qloop_55)))),_mm256_mul_pd(jac_blending_inv_0_1,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_130,tmp_qloop_58),_mm256_mul_pd(tmp_qloop_131,tmp_qloop_59)))),_mm256_mul_pd(jac_blending_inv_1_0,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_130,tmp_qloop_62),_mm256_mul_pd(tmp_qloop_131,tmp_qloop_63)))),_mm256_mul_pd(jac_blending_inv_1_1,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_130,tmp_qloop_66),_mm256_mul_pd(tmp_qloop_131,tmp_qloop_67)))); + const __m256d q_tmp_0_0 = _mm256_mul_pd(tmp_qloop_68,tmp_qloop_82); + const __m256d q_tmp_0_1 = _mm256_mul_pd(tmp_qloop_82,tmp_qloop_89); + const __m256d q_tmp_0_2 = _mm256_mul_pd(tmp_qloop_82,tmp_qloop_96); + const __m256d q_tmp_0_3 = _mm256_mul_pd(tmp_qloop_110,tmp_qloop_82); + const __m256d q_tmp_0_4 = _mm256_mul_pd(tmp_qloop_122,tmp_qloop_82); + const __m256d q_tmp_0_5 = _mm256_mul_pd(tmp_qloop_132,tmp_qloop_82); + const __m256d q_tmp_1_0 = _mm256_mul_pd(tmp_qloop_133,tmp_qloop_68); + const __m256d q_tmp_1_1 = _mm256_mul_pd(tmp_qloop_133,tmp_qloop_89); + const __m256d q_tmp_1_2 = _mm256_mul_pd(tmp_qloop_133,tmp_qloop_96); + const __m256d q_tmp_1_3 = _mm256_mul_pd(tmp_qloop_110,tmp_qloop_133); + const __m256d q_tmp_1_4 = _mm256_mul_pd(tmp_qloop_122,tmp_qloop_133); + const __m256d q_tmp_1_5 = _mm256_mul_pd(tmp_qloop_132,tmp_qloop_133); + const __m256d q_tmp_2_0 = _mm256_mul_pd(tmp_qloop_134,tmp_qloop_68); + const __m256d q_tmp_2_1 = _mm256_mul_pd(tmp_qloop_134,tmp_qloop_89); + const __m256d q_tmp_2_2 = _mm256_mul_pd(tmp_qloop_134,tmp_qloop_96); + const __m256d q_tmp_2_3 = _mm256_mul_pd(tmp_qloop_110,tmp_qloop_134); + const __m256d q_tmp_2_4 = _mm256_mul_pd(tmp_qloop_122,tmp_qloop_134); + const __m256d q_tmp_2_5 = _mm256_mul_pd(tmp_qloop_132,tmp_qloop_134); + const __m256d q_tmp_3_0 = _mm256_mul_pd(tmp_qloop_135,tmp_qloop_68); + const __m256d q_tmp_3_1 = _mm256_mul_pd(tmp_qloop_135,tmp_qloop_89); + const __m256d q_tmp_3_2 = _mm256_mul_pd(tmp_qloop_135,tmp_qloop_96); + const __m256d q_tmp_3_3 = _mm256_mul_pd(tmp_qloop_110,tmp_qloop_135); + const __m256d q_tmp_3_4 = _mm256_mul_pd(tmp_qloop_122,tmp_qloop_135); + const __m256d q_tmp_3_5 = _mm256_mul_pd(tmp_qloop_132,tmp_qloop_135); + const __m256d q_tmp_4_0 = _mm256_mul_pd(tmp_qloop_136,tmp_qloop_68); + const __m256d q_tmp_4_1 = _mm256_mul_pd(tmp_qloop_136,tmp_qloop_89); + const __m256d q_tmp_4_2 = _mm256_mul_pd(tmp_qloop_136,tmp_qloop_96); + const __m256d q_tmp_4_3 = _mm256_mul_pd(tmp_qloop_110,tmp_qloop_136); + const __m256d q_tmp_4_4 = _mm256_mul_pd(tmp_qloop_122,tmp_qloop_136); + const __m256d q_tmp_4_5 = _mm256_mul_pd(tmp_qloop_132,tmp_qloop_136); + const __m256d q_tmp_5_0 = _mm256_mul_pd(tmp_qloop_137,tmp_qloop_68); + const __m256d q_tmp_5_1 = _mm256_mul_pd(tmp_qloop_137,tmp_qloop_89); + const __m256d q_tmp_5_2 = _mm256_mul_pd(tmp_qloop_137,tmp_qloop_96); + const __m256d q_tmp_5_3 = _mm256_mul_pd(tmp_qloop_110,tmp_qloop_137); + const __m256d q_tmp_5_4 = _mm256_mul_pd(tmp_qloop_122,tmp_qloop_137); + const __m256d q_tmp_5_5 = _mm256_mul_pd(tmp_qloop_132,tmp_qloop_137); + q_acc_0_0 = _mm256_add_pd(q_acc_0_0,q_tmp_0_0); + q_acc_0_1 = _mm256_add_pd(q_acc_0_1,q_tmp_0_1); + q_acc_0_2 = _mm256_add_pd(q_acc_0_2,q_tmp_0_2); + q_acc_0_3 = _mm256_add_pd(q_acc_0_3,q_tmp_0_3); + q_acc_0_4 = _mm256_add_pd(q_acc_0_4,q_tmp_0_4); + q_acc_0_5 = _mm256_add_pd(q_acc_0_5,q_tmp_0_5); + q_acc_1_0 = _mm256_add_pd(q_acc_1_0,q_tmp_1_0); + q_acc_1_1 = _mm256_add_pd(q_acc_1_1,q_tmp_1_1); + q_acc_1_2 = _mm256_add_pd(q_acc_1_2,q_tmp_1_2); + q_acc_1_3 = _mm256_add_pd(q_acc_1_3,q_tmp_1_3); + q_acc_1_4 = _mm256_add_pd(q_acc_1_4,q_tmp_1_4); + q_acc_1_5 = _mm256_add_pd(q_acc_1_5,q_tmp_1_5); + q_acc_2_0 = _mm256_add_pd(q_acc_2_0,q_tmp_2_0); + q_acc_2_1 = _mm256_add_pd(q_acc_2_1,q_tmp_2_1); + q_acc_2_2 = _mm256_add_pd(q_acc_2_2,q_tmp_2_2); + q_acc_2_3 = _mm256_add_pd(q_acc_2_3,q_tmp_2_3); + q_acc_2_4 = _mm256_add_pd(q_acc_2_4,q_tmp_2_4); + q_acc_2_5 = _mm256_add_pd(q_acc_2_5,q_tmp_2_5); + q_acc_3_0 = _mm256_add_pd(q_acc_3_0,q_tmp_3_0); + q_acc_3_1 = _mm256_add_pd(q_acc_3_1,q_tmp_3_1); + q_acc_3_2 = _mm256_add_pd(q_acc_3_2,q_tmp_3_2); + q_acc_3_3 = _mm256_add_pd(q_acc_3_3,q_tmp_3_3); + q_acc_3_4 = _mm256_add_pd(q_acc_3_4,q_tmp_3_4); + q_acc_3_5 = _mm256_add_pd(q_acc_3_5,q_tmp_3_5); + q_acc_4_0 = _mm256_add_pd(q_acc_4_0,q_tmp_4_0); + q_acc_4_1 = _mm256_add_pd(q_acc_4_1,q_tmp_4_1); + q_acc_4_2 = _mm256_add_pd(q_acc_4_2,q_tmp_4_2); + q_acc_4_3 = _mm256_add_pd(q_acc_4_3,q_tmp_4_3); + q_acc_4_4 = _mm256_add_pd(q_acc_4_4,q_tmp_4_4); + q_acc_4_5 = _mm256_add_pd(q_acc_4_5,q_tmp_4_5); + q_acc_5_0 = _mm256_add_pd(q_acc_5_0,q_tmp_5_0); + q_acc_5_1 = _mm256_add_pd(q_acc_5_1,q_tmp_5_1); + q_acc_5_2 = _mm256_add_pd(q_acc_5_2,q_tmp_5_2); + q_acc_5_3 = _mm256_add_pd(q_acc_5_3,q_tmp_5_3); + q_acc_5_4 = _mm256_add_pd(q_acc_5_4,q_tmp_5_4); + q_acc_5_5 = _mm256_add_pd(q_acc_5_5,q_tmp_5_5); + } + const __m256d elMatVec_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_0_0,src_dof_0),_mm256_mul_pd(q_acc_0_1,src_dof_1)),_mm256_mul_pd(q_acc_0_2,src_dof_2)),_mm256_mul_pd(q_acc_0_3,src_dof_3)),_mm256_mul_pd(q_acc_0_4,src_dof_4)),_mm256_mul_pd(q_acc_0_5,src_dof_5)); + const __m256d elMatVec_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_1_0,src_dof_0),_mm256_mul_pd(q_acc_1_1,src_dof_1)),_mm256_mul_pd(q_acc_1_2,src_dof_2)),_mm256_mul_pd(q_acc_1_3,src_dof_3)),_mm256_mul_pd(q_acc_1_4,src_dof_4)),_mm256_mul_pd(q_acc_1_5,src_dof_5)); + const __m256d elMatVec_2 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_2_0,src_dof_0),_mm256_mul_pd(q_acc_2_1,src_dof_1)),_mm256_mul_pd(q_acc_2_2,src_dof_2)),_mm256_mul_pd(q_acc_2_3,src_dof_3)),_mm256_mul_pd(q_acc_2_4,src_dof_4)),_mm256_mul_pd(q_acc_2_5,src_dof_5)); + const __m256d elMatVec_3 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_3_0,src_dof_0),_mm256_mul_pd(q_acc_3_1,src_dof_1)),_mm256_mul_pd(q_acc_3_2,src_dof_2)),_mm256_mul_pd(q_acc_3_3,src_dof_3)),_mm256_mul_pd(q_acc_3_4,src_dof_4)),_mm256_mul_pd(q_acc_3_5,src_dof_5)); + const __m256d elMatVec_4 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_4_0,src_dof_0),_mm256_mul_pd(q_acc_4_1,src_dof_1)),_mm256_mul_pd(q_acc_4_2,src_dof_2)),_mm256_mul_pd(q_acc_4_3,src_dof_3)),_mm256_mul_pd(q_acc_4_4,src_dof_4)),_mm256_mul_pd(q_acc_4_5,src_dof_5)); + const __m256d elMatVec_5 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_5_0,src_dof_0),_mm256_mul_pd(q_acc_5_1,src_dof_1)),_mm256_mul_pd(q_acc_5_2,src_dof_2)),_mm256_mul_pd(q_acc_5_3,src_dof_3)),_mm256_mul_pd(q_acc_5_4,src_dof_4)),_mm256_mul_pd(q_acc_5_5,src_dof_5)); + _mm256_storeu_pd(&_data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1],_mm256_add_pd(elMatVec_0,_mm256_loadu_pd(& _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]))); + _mm256_storeu_pd(&_data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))],_mm256_add_pd(elMatVec_1,_mm256_loadu_pd(& _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]))); + _mm256_storeu_pd(&_data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1],_mm256_add_pd(elMatVec_2,_mm256_loadu_pd(& _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]))); + _mm256_storeu_pd(&_data_dstEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))],_mm256_add_pd(elMatVec_3,_mm256_loadu_pd(& _data_dstEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]))); + _mm256_storeu_pd(&_data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1],_mm256_add_pd(elMatVec_4,_mm256_loadu_pd(& _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]))); + _mm256_storeu_pd(&_data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))],_mm256_add_pd(elMatVec_5,_mm256_loadu_pd(& _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]))); + } + for (int64_t ctr_0 = (int64_t)((-ctr_1 + micro_edges_per_macro_edge - 1) / (4)) * (4); ctr_0 < -ctr_1 + micro_edges_per_macro_edge - 1; ctr_0 += 1) + { + + const int64_t phantom_ctr_0 = ctr_0; + real_t _data_float_loop_ctr_array_dim_0[4]; + _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0; + _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1; + _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2; + _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3; + real_t _data_float_loop_ctr_array_dim_1[4]; + _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1; + + const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; + const real_t src_dof_0 = _data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + const real_t src_dof_1 = _data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t src_dof_2 = _data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]; + const real_t src_dof_3 = _data_srcEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t src_dof_4 = _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]; + const real_t src_dof_5 = _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t diffusivity_times_delta_dof_0 = _data_diffusivity_times_deltaVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + const real_t diffusivity_times_delta_dof_1 = _data_diffusivity_times_deltaVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t diffusivity_times_delta_dof_2 = _data_diffusivity_times_deltaVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]; + const real_t diffusivity_times_delta_dof_3 = _data_diffusivity_times_deltaEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t diffusivity_times_delta_dof_4 = _data_diffusivity_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]; + const real_t diffusivity_times_delta_dof_5 = _data_diffusivity_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t wx_dof_0 = _data_wxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + const real_t wx_dof_1 = _data_wxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t wx_dof_2 = _data_wxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]; + const real_t wx_dof_3 = _data_wxEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t wx_dof_4 = _data_wxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]; + const real_t wx_dof_5 = _data_wxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t wy_dof_0 = _data_wyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + const real_t wy_dof_1 = _data_wyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t wy_dof_2 = _data_wyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]; + const real_t wy_dof_3 = _data_wyEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t wy_dof_4 = _data_wyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]; + const real_t wy_dof_5 = _data_wyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + real_t q_acc_0_0 = 0.0; + real_t q_acc_0_1 = 0.0; + real_t q_acc_0_2 = 0.0; + real_t q_acc_0_3 = 0.0; + real_t q_acc_0_4 = 0.0; + real_t q_acc_0_5 = 0.0; + real_t q_acc_1_0 = 0.0; + real_t q_acc_1_1 = 0.0; + real_t q_acc_1_2 = 0.0; + real_t q_acc_1_3 = 0.0; + real_t q_acc_1_4 = 0.0; + real_t q_acc_1_5 = 0.0; + real_t q_acc_2_0 = 0.0; + real_t q_acc_2_1 = 0.0; + real_t q_acc_2_2 = 0.0; + real_t q_acc_2_3 = 0.0; + real_t q_acc_2_4 = 0.0; + real_t q_acc_2_5 = 0.0; + real_t q_acc_3_0 = 0.0; + real_t q_acc_3_1 = 0.0; + real_t q_acc_3_2 = 0.0; + real_t q_acc_3_3 = 0.0; + real_t q_acc_3_4 = 0.0; + real_t q_acc_3_5 = 0.0; + real_t q_acc_4_0 = 0.0; + real_t q_acc_4_1 = 0.0; + real_t q_acc_4_2 = 0.0; + real_t q_acc_4_3 = 0.0; + real_t q_acc_4_4 = 0.0; + real_t q_acc_4_5 = 0.0; + real_t q_acc_5_0 = 0.0; + real_t q_acc_5_1 = 0.0; + real_t q_acc_5_2 = 0.0; + real_t q_acc_5_3 = 0.0; + real_t q_acc_5_4 = 0.0; + real_t q_acc_5_5 = 0.0; + for (int64_t q = 0; q < 4; q += 1) + { + const real_t tmp_qloop_0 = p_affine_0_0 + (-p_affine_0_0 + p_affine_1_0)*_data_q_p_0[q] + (-p_affine_0_0 + p_affine_2_0)*_data_q_p_1[q]; + const real_t tmp_qloop_2 = (tmp_qloop_0*tmp_qloop_0); + const real_t tmp_qloop_3 = p_affine_0_1 + (-p_affine_0_1 + p_affine_1_1)*_data_q_p_0[q] + (-p_affine_0_1 + p_affine_2_1)*_data_q_p_1[q]; + const real_t tmp_qloop_4 = (tmp_qloop_3*tmp_qloop_3); + const real_t tmp_qloop_5 = tmp_qloop_2 + tmp_qloop_4; + const real_t tmp_qloop_6 = pow(tmp_qloop_5, -0.50000000000000000); + const real_t tmp_qloop_13 = tmp_qloop_12*tmp_qloop_6; + const real_t tmp_qloop_14 = tmp_qloop_1*tmp_qloop_13; + const real_t tmp_qloop_15 = pow(tmp_qloop_5, -1.5000000000000000); + const real_t tmp_qloop_16 = radRayVertex + tmp_qloop_11*(tmp_qloop_1*(-rayVertex_0 + tmp_qloop_0) - tmp_qloop_8*(-rayVertex_1 + tmp_qloop_3)); + const real_t tmp_qloop_17 = tmp_qloop_15*tmp_qloop_16; + const real_t tmp_qloop_18 = tmp_qloop_17*1.0; + const real_t tmp_qloop_19 = tmp_qloop_13*tmp_qloop_8; + const real_t tmp_qloop_20 = tmp_qloop_0*tmp_qloop_18; + const real_t tmp_qloop_23 = tmp_qloop_12*tmp_qloop_15; + const real_t tmp_qloop_24 = tmp_qloop_1*tmp_qloop_23; + const real_t tmp_qloop_25 = tmp_qloop_2*tmp_qloop_24; + const real_t tmp_qloop_26 = tmp_qloop_16*pow(tmp_qloop_5, -2.5000000000000000)*3.0; + const real_t tmp_qloop_27 = tmp_qloop_0*tmp_qloop_26*tmp_qloop_4; + const real_t tmp_qloop_28 = tmp_qloop_24*tmp_qloop_4 - tmp_qloop_27; + const real_t tmp_qloop_29 = tmp_qloop_23*tmp_qloop_7; + const real_t tmp_qloop_30 = tmp_qloop_0*tmp_qloop_3; + const real_t tmp_qloop_31 = tmp_qloop_24*tmp_qloop_30; + const real_t tmp_qloop_32 = tmp_qloop_17*2.0; + const real_t tmp_qloop_33 = tmp_qloop_2*tmp_qloop_26*tmp_qloop_3; + const real_t tmp_qloop_34 = tmp_qloop_18*tmp_qloop_3 - tmp_qloop_33; + const real_t tmp_qloop_35 = tmp_qloop_29*tmp_qloop_30; + const real_t tmp_qloop_36 = tmp_qloop_23*tmp_qloop_30*tmp_qloop_8; + const real_t tmp_qloop_47 = 4.0*_data_q_p_0[q]; + const real_t tmp_qloop_48 = 4.0*_data_q_p_1[q]; + const real_t tmp_qloop_49 = tmp_qloop_47 + tmp_qloop_48 - 3.0; + const real_t tmp_qloop_50 = jac_affine_inv_0_0_BLUE*tmp_qloop_49 + jac_affine_inv_1_0_BLUE*tmp_qloop_49; + const real_t tmp_qloop_54 = jac_affine_inv_0_1_BLUE*tmp_qloop_49 + jac_affine_inv_1_1_BLUE*tmp_qloop_49; + const real_t tmp_qloop_69 = tmp_qloop_47*_data_q_p_1[q]; + const real_t tmp_qloop_70 = (_data_q_p_0[q]*_data_q_p_0[q]); + const real_t tmp_qloop_71 = tmp_qloop_70*2.0; + const real_t tmp_qloop_72 = tmp_qloop_71 - _data_q_p_0[q]; + const real_t tmp_qloop_73 = (_data_q_p_1[q]*_data_q_p_1[q]); + const real_t tmp_qloop_74 = tmp_qloop_73*2.0; + const real_t tmp_qloop_75 = tmp_qloop_74 - _data_q_p_1[q]; + const real_t tmp_qloop_76 = tmp_qloop_48 - tmp_qloop_69 + tmp_qloop_73*-4.0; + const real_t tmp_qloop_77 = tmp_qloop_47 - tmp_qloop_69 + tmp_qloop_70*-4.0; + const real_t tmp_qloop_78 = tmp_qloop_69 + tmp_qloop_71 + tmp_qloop_74 - 3.0*_data_q_p_0[q] - 3.0*_data_q_p_1[q] + 1.0; + const real_t tmp_qloop_79 = tmp_qloop_69*wx_dof_3 + tmp_qloop_72*wx_dof_1 + tmp_qloop_75*wx_dof_2 + tmp_qloop_76*wx_dof_4 + tmp_qloop_77*wx_dof_5 + tmp_qloop_78*wx_dof_0; + const real_t tmp_qloop_80 = tmp_qloop_69*wy_dof_3 + tmp_qloop_72*wy_dof_1 + tmp_qloop_75*wy_dof_2 + tmp_qloop_76*wy_dof_4 + tmp_qloop_77*wy_dof_5 + tmp_qloop_78*wy_dof_0; + const real_t tmp_qloop_86 = tmp_qloop_47 - 1.0; + const real_t tmp_qloop_87 = jac_affine_inv_0_0_BLUE*tmp_qloop_86; + const real_t tmp_qloop_88 = jac_affine_inv_0_1_BLUE*tmp_qloop_86; + const real_t tmp_qloop_93 = tmp_qloop_48 - 1.0; + const real_t tmp_qloop_94 = jac_affine_inv_1_0_BLUE*tmp_qloop_93; + const real_t tmp_qloop_95 = jac_affine_inv_1_1_BLUE*tmp_qloop_93; + const real_t tmp_qloop_104 = jac_affine_inv_1_0_BLUE*tmp_qloop_47; + const real_t tmp_qloop_105 = jac_affine_inv_0_0_BLUE*tmp_qloop_48; + const real_t tmp_qloop_106 = tmp_qloop_104 + tmp_qloop_105; + const real_t tmp_qloop_107 = jac_affine_inv_1_1_BLUE*tmp_qloop_47; + const real_t tmp_qloop_108 = jac_affine_inv_0_1_BLUE*tmp_qloop_48; + const real_t tmp_qloop_109 = tmp_qloop_107 + tmp_qloop_108; + const real_t tmp_qloop_119 = -tmp_qloop_47 - 8.0*_data_q_p_1[q] + 4.0; + const real_t tmp_qloop_120 = jac_affine_inv_1_0_BLUE*tmp_qloop_119 - tmp_qloop_105; + const real_t tmp_qloop_121 = jac_affine_inv_1_1_BLUE*tmp_qloop_119 - tmp_qloop_108; + const real_t tmp_qloop_129 = -tmp_qloop_48 - 8.0*_data_q_p_0[q] + 4.0; + const real_t tmp_qloop_130 = jac_affine_inv_0_0_BLUE*tmp_qloop_129 - tmp_qloop_104; + const real_t tmp_qloop_131 = jac_affine_inv_0_1_BLUE*tmp_qloop_129 - tmp_qloop_107; + const real_t jac_blending_0_0 = tmp_qloop_0*tmp_qloop_14 + tmp_qloop_18*tmp_qloop_4; + const real_t jac_blending_0_1 = -tmp_qloop_0*tmp_qloop_15*tmp_qloop_16*tmp_qloop_3 - tmp_qloop_0*tmp_qloop_19; + const real_t jac_blending_1_0 = tmp_qloop_14*tmp_qloop_3 - tmp_qloop_20*tmp_qloop_3; + const real_t jac_blending_1_1 = tmp_qloop_15*tmp_qloop_16*tmp_qloop_2*1.0 - tmp_qloop_19*tmp_qloop_3; + const real_t tmp_qloop_21 = jac_blending_0_0*jac_blending_1_1 - jac_blending_0_1*jac_blending_1_0; + const real_t tmp_qloop_22 = 1.0 / (tmp_qloop_21); + const real_t abs_det_jac_blending = tmp_qloop_21; + const real_t tmp_qloop_81 = abs_det_jac_affine_BLUE*abs_det_jac_blending*(diffusivity_times_delta_dof_0*tmp_qloop_78 + diffusivity_times_delta_dof_1*tmp_qloop_72 + diffusivity_times_delta_dof_2*tmp_qloop_75 + diffusivity_times_delta_dof_3*tmp_qloop_69 + diffusivity_times_delta_dof_4*tmp_qloop_76 + diffusivity_times_delta_dof_5*tmp_qloop_77)*_data_q_w[q]; + const real_t jac_blending_inv_0_0 = jac_blending_1_1*tmp_qloop_22; + const real_t jac_blending_inv_0_1 = -jac_blending_0_1*tmp_qloop_22; + const real_t jac_blending_inv_1_0 = -jac_blending_1_0*tmp_qloop_22; + const real_t jac_blending_inv_1_1 = jac_blending_0_0*tmp_qloop_22; + const real_t tmp_qloop_82 = tmp_qloop_81*(tmp_qloop_79*(jac_blending_inv_0_0*tmp_qloop_50 + jac_blending_inv_1_0*tmp_qloop_54) + tmp_qloop_80*(jac_blending_inv_0_1*tmp_qloop_50 + jac_blending_inv_1_1*tmp_qloop_54)); + const real_t tmp_qloop_133 = tmp_qloop_81*(tmp_qloop_79*(jac_blending_inv_0_0*tmp_qloop_87 + jac_blending_inv_1_0*tmp_qloop_88) + tmp_qloop_80*(jac_blending_inv_0_1*tmp_qloop_87 + jac_blending_inv_1_1*tmp_qloop_88)); + const real_t tmp_qloop_134 = tmp_qloop_81*(tmp_qloop_79*(jac_blending_inv_0_0*tmp_qloop_94 + jac_blending_inv_1_0*tmp_qloop_95) + tmp_qloop_80*(jac_blending_inv_0_1*tmp_qloop_94 + jac_blending_inv_1_1*tmp_qloop_95)); + const real_t tmp_qloop_135 = tmp_qloop_81*(tmp_qloop_79*(jac_blending_inv_0_0*tmp_qloop_106 + jac_blending_inv_1_0*tmp_qloop_109) + tmp_qloop_80*(jac_blending_inv_0_1*tmp_qloop_106 + jac_blending_inv_1_1*tmp_qloop_109)); + const real_t tmp_qloop_136 = tmp_qloop_81*(tmp_qloop_79*(jac_blending_inv_0_0*tmp_qloop_120 + jac_blending_inv_1_0*tmp_qloop_121) + tmp_qloop_80*(jac_blending_inv_0_1*tmp_qloop_120 + jac_blending_inv_1_1*tmp_qloop_121)); + const real_t tmp_qloop_137 = tmp_qloop_81*(tmp_qloop_79*(jac_blending_inv_0_0*tmp_qloop_130 + jac_blending_inv_1_0*tmp_qloop_131) + tmp_qloop_80*(jac_blending_inv_0_1*tmp_qloop_130 + jac_blending_inv_1_1*tmp_qloop_131)); + const real_t hessian_blending_0_0_0 = tmp_qloop_14 - tmp_qloop_25 + tmp_qloop_28; + const real_t hessian_blending_1_0_0 = -tmp_qloop_26*(tmp_qloop_3*tmp_qloop_3*tmp_qloop_3) + tmp_qloop_29*tmp_qloop_4 + tmp_qloop_3*tmp_qloop_32 - tmp_qloop_31; + const real_t hessian_blending_0_0_1 = tmp_qloop_1*tmp_qloop_11*tmp_qloop_15*tmp_qloop_30*-2.0 - tmp_qloop_34; + const real_t hessian_blending_1_0_1 = tmp_qloop_1*tmp_qloop_10*tmp_qloop_6*tmp_qloop_9*1.0 - tmp_qloop_20 - tmp_qloop_28 - tmp_qloop_35; + const real_t hessian_blending_0_1_0 = tmp_qloop_10*tmp_qloop_15*tmp_qloop_2*tmp_qloop_8*tmp_qloop_9*1.0 - tmp_qloop_19 - tmp_qloop_31 - tmp_qloop_34; + const real_t tmp_qloop_51 = -hessian_blending_0_0_0*jac_blending_inv_0_0 - hessian_blending_0_1_0*jac_blending_inv_1_0; + const real_t tmp_qloop_60 = -hessian_blending_0_0_0*jac_blending_inv_0_1 - hessian_blending_0_1_0*jac_blending_inv_1_1; + const real_t hessian_blending_1_1_0 = -tmp_qloop_20 + tmp_qloop_27 - tmp_qloop_35 + tmp_qloop_36; + const real_t tmp_qloop_56 = -hessian_blending_1_0_0*jac_blending_inv_0_0 - hessian_blending_1_1_0*jac_blending_inv_1_0; + const real_t tmp_qloop_64 = -hessian_blending_1_0_0*jac_blending_inv_0_1 - hessian_blending_1_1_0*jac_blending_inv_1_1; + const real_t hessian_blending_0_1_1 = tmp_qloop_0*tmp_qloop_32 - (tmp_qloop_0*tmp_qloop_0*tmp_qloop_0)*tmp_qloop_26 + tmp_qloop_25 + tmp_qloop_36; + const real_t tmp_qloop_52 = -hessian_blending_0_0_1*jac_blending_inv_0_0 - hessian_blending_0_1_1*jac_blending_inv_1_0; + const real_t tmp_qloop_53 = jac_blending_inv_0_0*tmp_qloop_51 + jac_blending_inv_0_1*tmp_qloop_52; + const real_t tmp_qloop_55 = jac_blending_inv_1_0*tmp_qloop_51 + jac_blending_inv_1_1*tmp_qloop_52; + const real_t tmp_qloop_61 = -hessian_blending_0_0_1*jac_blending_inv_0_1 - hessian_blending_0_1_1*jac_blending_inv_1_1; + const real_t tmp_qloop_62 = jac_blending_inv_0_0*tmp_qloop_60 + jac_blending_inv_0_1*tmp_qloop_61; + const real_t tmp_qloop_63 = jac_blending_inv_1_0*tmp_qloop_60 + jac_blending_inv_1_1*tmp_qloop_61; + const real_t hessian_blending_1_1_1 = tmp_qloop_10*tmp_qloop_15*tmp_qloop_2*tmp_qloop_7*tmp_qloop_9*1.0 + tmp_qloop_10*tmp_qloop_15*tmp_qloop_4*tmp_qloop_8*tmp_qloop_9*1.0 - tmp_qloop_19 - tmp_qloop_33; + const real_t tmp_qloop_57 = -hessian_blending_1_0_1*jac_blending_inv_0_0 - hessian_blending_1_1_1*jac_blending_inv_1_0; + const real_t tmp_qloop_58 = jac_blending_inv_0_0*tmp_qloop_56 + jac_blending_inv_0_1*tmp_qloop_57; + const real_t tmp_qloop_59 = jac_blending_inv_1_0*tmp_qloop_56 + jac_blending_inv_1_1*tmp_qloop_57; + const real_t tmp_qloop_65 = -hessian_blending_1_0_1*jac_blending_inv_0_1 - hessian_blending_1_1_1*jac_blending_inv_1_1; + const real_t tmp_qloop_66 = jac_blending_inv_0_0*tmp_qloop_64 + jac_blending_inv_0_1*tmp_qloop_65; + const real_t tmp_qloop_67 = jac_blending_inv_1_0*tmp_qloop_64 + jac_blending_inv_1_1*tmp_qloop_65; + const real_t tmp_qloop_68 = jac_blending_inv_0_0*(jac_blending_inv_0_0*tmp_moved_constant_3 + jac_blending_inv_1_0*tmp_moved_constant_7) + jac_blending_inv_0_0*(tmp_qloop_50*tmp_qloop_53 + tmp_qloop_54*tmp_qloop_55) + jac_blending_inv_0_1*(jac_blending_inv_0_1*tmp_moved_constant_3 + jac_blending_inv_1_1*tmp_moved_constant_7) + jac_blending_inv_0_1*(tmp_qloop_50*tmp_qloop_58 + tmp_qloop_54*tmp_qloop_59) + jac_blending_inv_1_0*(jac_blending_inv_0_0*tmp_moved_constant_8 + jac_blending_inv_1_0*tmp_moved_constant_9) + jac_blending_inv_1_0*(tmp_qloop_50*tmp_qloop_62 + tmp_qloop_54*tmp_qloop_63) + jac_blending_inv_1_1*(jac_blending_inv_0_1*tmp_moved_constant_8 + jac_blending_inv_1_1*tmp_moved_constant_9) + jac_blending_inv_1_1*(tmp_qloop_50*tmp_qloop_66 + tmp_qloop_54*tmp_qloop_67); + const real_t tmp_qloop_89 = jac_blending_inv_0_0*(jac_blending_inv_0_0*tmp_moved_constant_11 + jac_blending_inv_1_0*tmp_moved_constant_10) + jac_blending_inv_0_0*(tmp_qloop_53*tmp_qloop_87 + tmp_qloop_55*tmp_qloop_88) + jac_blending_inv_0_1*(jac_blending_inv_0_1*tmp_moved_constant_11 + jac_blending_inv_1_1*tmp_moved_constant_10) + jac_blending_inv_0_1*(tmp_qloop_58*tmp_qloop_87 + tmp_qloop_59*tmp_qloop_88) + jac_blending_inv_1_0*(jac_blending_inv_0_0*tmp_moved_constant_10 + jac_blending_inv_1_0*tmp_moved_constant_12) + jac_blending_inv_1_0*(tmp_qloop_62*tmp_qloop_87 + tmp_qloop_63*tmp_qloop_88) + jac_blending_inv_1_1*(jac_blending_inv_0_1*tmp_moved_constant_10 + jac_blending_inv_1_1*tmp_moved_constant_12) + jac_blending_inv_1_1*(tmp_qloop_66*tmp_qloop_87 + tmp_qloop_67*tmp_qloop_88); + const real_t tmp_qloop_96 = jac_blending_inv_0_0*(jac_blending_inv_0_0*tmp_moved_constant_14 + jac_blending_inv_1_0*tmp_moved_constant_13) + jac_blending_inv_0_0*(tmp_qloop_53*tmp_qloop_94 + tmp_qloop_55*tmp_qloop_95) + jac_blending_inv_0_1*(jac_blending_inv_0_1*tmp_moved_constant_14 + jac_blending_inv_1_1*tmp_moved_constant_13) + jac_blending_inv_0_1*(tmp_qloop_58*tmp_qloop_94 + tmp_qloop_59*tmp_qloop_95) + jac_blending_inv_1_0*(jac_blending_inv_0_0*tmp_moved_constant_13 + jac_blending_inv_1_0*tmp_moved_constant_15) + jac_blending_inv_1_0*(tmp_qloop_62*tmp_qloop_94 + tmp_qloop_63*tmp_qloop_95) + jac_blending_inv_1_1*(jac_blending_inv_0_1*tmp_moved_constant_13 + jac_blending_inv_1_1*tmp_moved_constant_15) + jac_blending_inv_1_1*(tmp_qloop_66*tmp_qloop_94 + tmp_qloop_67*tmp_qloop_95); + const real_t tmp_qloop_110 = jac_blending_inv_0_0*(jac_blending_inv_0_0*tmp_moved_constant_17 + jac_blending_inv_1_0*tmp_moved_constant_20) + jac_blending_inv_0_0*(tmp_qloop_106*tmp_qloop_53 + tmp_qloop_109*tmp_qloop_55) + jac_blending_inv_0_1*(jac_blending_inv_0_1*tmp_moved_constant_17 + jac_blending_inv_1_1*tmp_moved_constant_20) + jac_blending_inv_0_1*(tmp_qloop_106*tmp_qloop_58 + tmp_qloop_109*tmp_qloop_59) + jac_blending_inv_1_0*(jac_blending_inv_0_0*tmp_moved_constant_20 + jac_blending_inv_1_0*tmp_moved_constant_22) + jac_blending_inv_1_0*(tmp_qloop_106*tmp_qloop_62 + tmp_qloop_109*tmp_qloop_63) + jac_blending_inv_1_1*(jac_blending_inv_0_1*tmp_moved_constant_20 + jac_blending_inv_1_1*tmp_moved_constant_22) + jac_blending_inv_1_1*(tmp_qloop_106*tmp_qloop_66 + tmp_qloop_109*tmp_qloop_67); + const real_t tmp_qloop_122 = jac_blending_inv_0_0*(jac_blending_inv_0_0*tmp_moved_constant_25 + jac_blending_inv_1_0*tmp_moved_constant_27) + jac_blending_inv_0_0*(tmp_qloop_120*tmp_qloop_53 + tmp_qloop_121*tmp_qloop_55) + jac_blending_inv_0_1*(jac_blending_inv_0_1*tmp_moved_constant_25 + jac_blending_inv_1_1*tmp_moved_constant_27) + jac_blending_inv_0_1*(tmp_qloop_120*tmp_qloop_58 + tmp_qloop_121*tmp_qloop_59) + jac_blending_inv_1_0*(jac_blending_inv_0_0*tmp_moved_constant_28 + jac_blending_inv_1_0*tmp_moved_constant_30) + jac_blending_inv_1_0*(tmp_qloop_120*tmp_qloop_62 + tmp_qloop_121*tmp_qloop_63) + jac_blending_inv_1_1*(jac_blending_inv_0_1*tmp_moved_constant_28 + jac_blending_inv_1_1*tmp_moved_constant_30) + jac_blending_inv_1_1*(tmp_qloop_120*tmp_qloop_66 + tmp_qloop_121*tmp_qloop_67); + const real_t tmp_qloop_132 = jac_blending_inv_0_0*(jac_blending_inv_0_0*tmp_moved_constant_32 + jac_blending_inv_1_0*tmp_moved_constant_34) + jac_blending_inv_0_0*(tmp_qloop_130*tmp_qloop_53 + tmp_qloop_131*tmp_qloop_55) + jac_blending_inv_0_1*(jac_blending_inv_0_1*tmp_moved_constant_32 + jac_blending_inv_1_1*tmp_moved_constant_34) + jac_blending_inv_0_1*(tmp_qloop_130*tmp_qloop_58 + tmp_qloop_131*tmp_qloop_59) + jac_blending_inv_1_0*(jac_blending_inv_0_0*tmp_moved_constant_35 + jac_blending_inv_1_0*tmp_moved_constant_36) + jac_blending_inv_1_0*(tmp_qloop_130*tmp_qloop_62 + tmp_qloop_131*tmp_qloop_63) + jac_blending_inv_1_1*(jac_blending_inv_0_1*tmp_moved_constant_35 + jac_blending_inv_1_1*tmp_moved_constant_36) + jac_blending_inv_1_1*(tmp_qloop_130*tmp_qloop_66 + tmp_qloop_131*tmp_qloop_67); + const real_t q_tmp_0_0 = tmp_qloop_68*tmp_qloop_82; + const real_t q_tmp_0_1 = tmp_qloop_82*tmp_qloop_89; + const real_t q_tmp_0_2 = tmp_qloop_82*tmp_qloop_96; + const real_t q_tmp_0_3 = tmp_qloop_110*tmp_qloop_82; + const real_t q_tmp_0_4 = tmp_qloop_122*tmp_qloop_82; + const real_t q_tmp_0_5 = tmp_qloop_132*tmp_qloop_82; + const real_t q_tmp_1_0 = tmp_qloop_133*tmp_qloop_68; + const real_t q_tmp_1_1 = tmp_qloop_133*tmp_qloop_89; + const real_t q_tmp_1_2 = tmp_qloop_133*tmp_qloop_96; + const real_t q_tmp_1_3 = tmp_qloop_110*tmp_qloop_133; + const real_t q_tmp_1_4 = tmp_qloop_122*tmp_qloop_133; + const real_t q_tmp_1_5 = tmp_qloop_132*tmp_qloop_133; + const real_t q_tmp_2_0 = tmp_qloop_134*tmp_qloop_68; + const real_t q_tmp_2_1 = tmp_qloop_134*tmp_qloop_89; + const real_t q_tmp_2_2 = tmp_qloop_134*tmp_qloop_96; + const real_t q_tmp_2_3 = tmp_qloop_110*tmp_qloop_134; + const real_t q_tmp_2_4 = tmp_qloop_122*tmp_qloop_134; + const real_t q_tmp_2_5 = tmp_qloop_132*tmp_qloop_134; + const real_t q_tmp_3_0 = tmp_qloop_135*tmp_qloop_68; + const real_t q_tmp_3_1 = tmp_qloop_135*tmp_qloop_89; + const real_t q_tmp_3_2 = tmp_qloop_135*tmp_qloop_96; + const real_t q_tmp_3_3 = tmp_qloop_110*tmp_qloop_135; + const real_t q_tmp_3_4 = tmp_qloop_122*tmp_qloop_135; + const real_t q_tmp_3_5 = tmp_qloop_132*tmp_qloop_135; + const real_t q_tmp_4_0 = tmp_qloop_136*tmp_qloop_68; + const real_t q_tmp_4_1 = tmp_qloop_136*tmp_qloop_89; + const real_t q_tmp_4_2 = tmp_qloop_136*tmp_qloop_96; + const real_t q_tmp_4_3 = tmp_qloop_110*tmp_qloop_136; + const real_t q_tmp_4_4 = tmp_qloop_122*tmp_qloop_136; + const real_t q_tmp_4_5 = tmp_qloop_132*tmp_qloop_136; + const real_t q_tmp_5_0 = tmp_qloop_137*tmp_qloop_68; + const real_t q_tmp_5_1 = tmp_qloop_137*tmp_qloop_89; + const real_t q_tmp_5_2 = tmp_qloop_137*tmp_qloop_96; + const real_t q_tmp_5_3 = tmp_qloop_110*tmp_qloop_137; + const real_t q_tmp_5_4 = tmp_qloop_122*tmp_qloop_137; + const real_t q_tmp_5_5 = tmp_qloop_132*tmp_qloop_137; + q_acc_0_0 = q_acc_0_0 + q_tmp_0_0; + q_acc_0_1 = q_acc_0_1 + q_tmp_0_1; + q_acc_0_2 = q_acc_0_2 + q_tmp_0_2; + q_acc_0_3 = q_acc_0_3 + q_tmp_0_3; + q_acc_0_4 = q_acc_0_4 + q_tmp_0_4; + q_acc_0_5 = q_acc_0_5 + q_tmp_0_5; + q_acc_1_0 = q_acc_1_0 + q_tmp_1_0; + q_acc_1_1 = q_acc_1_1 + q_tmp_1_1; + q_acc_1_2 = q_acc_1_2 + q_tmp_1_2; + q_acc_1_3 = q_acc_1_3 + q_tmp_1_3; + q_acc_1_4 = q_acc_1_4 + q_tmp_1_4; + q_acc_1_5 = q_acc_1_5 + q_tmp_1_5; + q_acc_2_0 = q_acc_2_0 + q_tmp_2_0; + q_acc_2_1 = q_acc_2_1 + q_tmp_2_1; + q_acc_2_2 = q_acc_2_2 + q_tmp_2_2; + q_acc_2_3 = q_acc_2_3 + q_tmp_2_3; + q_acc_2_4 = q_acc_2_4 + q_tmp_2_4; + q_acc_2_5 = q_acc_2_5 + q_tmp_2_5; + q_acc_3_0 = q_acc_3_0 + q_tmp_3_0; + q_acc_3_1 = q_acc_3_1 + q_tmp_3_1; + q_acc_3_2 = q_acc_3_2 + q_tmp_3_2; + q_acc_3_3 = q_acc_3_3 + q_tmp_3_3; + q_acc_3_4 = q_acc_3_4 + q_tmp_3_4; + q_acc_3_5 = q_acc_3_5 + q_tmp_3_5; + q_acc_4_0 = q_acc_4_0 + q_tmp_4_0; + q_acc_4_1 = q_acc_4_1 + q_tmp_4_1; + q_acc_4_2 = q_acc_4_2 + q_tmp_4_2; + q_acc_4_3 = q_acc_4_3 + q_tmp_4_3; + q_acc_4_4 = q_acc_4_4 + q_tmp_4_4; + q_acc_4_5 = q_acc_4_5 + q_tmp_4_5; + q_acc_5_0 = q_acc_5_0 + q_tmp_5_0; + q_acc_5_1 = q_acc_5_1 + q_tmp_5_1; + q_acc_5_2 = q_acc_5_2 + q_tmp_5_2; + q_acc_5_3 = q_acc_5_3 + q_tmp_5_3; + q_acc_5_4 = q_acc_5_4 + q_tmp_5_4; + q_acc_5_5 = q_acc_5_5 + q_tmp_5_5; + } + const real_t elMatVec_0 = q_acc_0_0*src_dof_0 + q_acc_0_1*src_dof_1 + q_acc_0_2*src_dof_2 + q_acc_0_3*src_dof_3 + q_acc_0_4*src_dof_4 + q_acc_0_5*src_dof_5; + const real_t elMatVec_1 = q_acc_1_0*src_dof_0 + q_acc_1_1*src_dof_1 + q_acc_1_2*src_dof_2 + q_acc_1_3*src_dof_3 + q_acc_1_4*src_dof_4 + q_acc_1_5*src_dof_5; + const real_t elMatVec_2 = q_acc_2_0*src_dof_0 + q_acc_2_1*src_dof_1 + q_acc_2_2*src_dof_2 + q_acc_2_3*src_dof_3 + q_acc_2_4*src_dof_4 + q_acc_2_5*src_dof_5; + const real_t elMatVec_3 = q_acc_3_0*src_dof_0 + q_acc_3_1*src_dof_1 + q_acc_3_2*src_dof_2 + q_acc_3_3*src_dof_3 + q_acc_3_4*src_dof_4 + q_acc_3_5*src_dof_5; + const real_t elMatVec_4 = q_acc_4_0*src_dof_0 + q_acc_4_1*src_dof_1 + q_acc_4_2*src_dof_2 + q_acc_4_3*src_dof_3 + q_acc_4_4*src_dof_4 + q_acc_4_5*src_dof_5; + const real_t elMatVec_5 = q_acc_5_0*src_dof_0 + q_acc_5_1*src_dof_1 + q_acc_5_2*src_dof_2 + q_acc_5_3*src_dof_3 + q_acc_5_4*src_dof_4 + q_acc_5_5*src_dof_5; + _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1] = elMatVec_0 + _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatVec_1 + _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1] = elMatVec_2 + _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]; + _data_dstEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatVec_3 + _data_dstEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1] = elMatVec_4 + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]; + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))] = elMatVec_5 + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + } + } + } + } +} +} // namespace operatorgeneration + +} // namespace hyteg diff --git a/operators/supg_diffusion/avx/P2ElementwiseSupgDiffusionAnnulusMap_computeInverseDiagonalOperatorValues_P2ElementwiseSupgDiffusionAnnulusMap_macro_2D.cpp b/operators/supg_diffusion/avx/P2ElementwiseSupgDiffusionAnnulusMap_computeInverseDiagonalOperatorValues_P2ElementwiseSupgDiffusionAnnulusMap_macro_2D.cpp new file mode 100644 index 0000000000000000000000000000000000000000..e678660437ec9b75877f6770b0b7687a0aa7f532 --- /dev/null +++ b/operators/supg_diffusion/avx/P2ElementwiseSupgDiffusionAnnulusMap_computeInverseDiagonalOperatorValues_P2ElementwiseSupgDiffusionAnnulusMap_macro_2D.cpp @@ -0,0 +1,899 @@ +/* +* Copyright (c) 2017-2024 Nils Kohl, Daniel Bauer, Fabian Böhm. +* +* This file is part of HyTeG +* (see https://i10git.cs.fau.de/hyteg/hyteg). +* +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. +* +* You should have received a copy of the GNU General Public License +* along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +/* +* The entire file was generated with the HyTeG Operator Generator. +* +* Avoid modifying this file. If buggy, consider fixing the generator itself. +*/ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +#include "../P2ElementwiseSupgDiffusionAnnulusMap.hpp" + +#define FUNC_PREFIX + +namespace hyteg { + +namespace operatorgeneration { + +void P2ElementwiseSupgDiffusionAnnulusMap::computeInverseDiagonalOperatorValues_P2ElementwiseSupgDiffusionAnnulusMap_macro_2D( real_t * RESTRICT _data_diffusivity_times_deltaEdge, real_t * RESTRICT _data_diffusivity_times_deltaVertex, real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_wxEdge, real_t * RESTRICT _data_wxVertex, real_t * RESTRICT _data_wyEdge, real_t * RESTRICT _data_wyVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const +{ + { + const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669}; + + const real_t _data_q_p_0 [] = {0.33333333333333331, 0.20000000000000001, 0.59999999999999998, 0.20000000000000001}; + + const real_t _data_q_p_1 [] = {0.33333333333333331, 0.59999999999999998, 0.20000000000000001, 0.20000000000000001}; + + const real_t tmp_coords_jac_0_GRAY = 1.0 / (micro_edges_per_macro_edge_float)*1.0; + const real_t p_affine_const_0_0_GRAY = macro_vertex_coord_id_0comp0; + const real_t p_affine_const_0_1_GRAY = macro_vertex_coord_id_0comp1; + const real_t p_affine_const_1_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0); + const real_t p_affine_const_1_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1); + const real_t p_affine_const_2_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0); + const real_t p_affine_const_2_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1); + const real_t jac_affine_0_0_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_1_0_GRAY; + const real_t jac_affine_0_1_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_2_0_GRAY; + const real_t jac_affine_1_0_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_1_1_GRAY; + const real_t jac_affine_1_1_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_2_1_GRAY; + const real_t tmp_coords_jac_1_GRAY = jac_affine_0_0_GRAY*jac_affine_1_1_GRAY - jac_affine_0_1_GRAY*jac_affine_1_0_GRAY; + const real_t tmp_coords_jac_2_GRAY = 1.0 / (tmp_coords_jac_1_GRAY); + const real_t jac_affine_inv_0_0_GRAY = jac_affine_1_1_GRAY*tmp_coords_jac_2_GRAY; + const real_t jac_affine_inv_0_1_GRAY = -jac_affine_0_1_GRAY*tmp_coords_jac_2_GRAY; + const real_t jac_affine_inv_1_0_GRAY = -jac_affine_1_0_GRAY*tmp_coords_jac_2_GRAY; + const real_t jac_affine_inv_1_1_GRAY = jac_affine_0_0_GRAY*tmp_coords_jac_2_GRAY; + const real_t abs_det_jac_affine_GRAY = abs(tmp_coords_jac_1_GRAY); + const real_t tmp_qloop_1 = -rayVertex_1 + thrVertex_1; + const real_t tmp_qloop_7 = rayVertex_0 - thrVertex_0; + const real_t tmp_qloop_8 = -tmp_qloop_7; + const real_t tmp_qloop_9 = 1.0 / (tmp_qloop_1*(-rayVertex_0 + refVertex_0) - tmp_qloop_8*(-rayVertex_1 + refVertex_1)); + const real_t tmp_qloop_10 = -radRayVertex + radRefVertex; + const real_t tmp_qloop_11 = tmp_qloop_10*tmp_qloop_9; + const real_t tmp_qloop_12 = tmp_qloop_11*1.0; + const real_t tmp_qloop_54 = jac_affine_inv_0_0_GRAY*4.0; + const real_t tmp_qloop_55 = jac_affine_inv_1_0_GRAY*4.0; + const real_t tmp_qloop_56 = tmp_qloop_54 + tmp_qloop_55; + const real_t tmp_qloop_57 = jac_affine_inv_0_0_GRAY*tmp_qloop_56 + jac_affine_inv_1_0_GRAY*tmp_qloop_56; + const real_t tmp_qloop_58 = jac_affine_inv_0_1_GRAY*4.0; + const real_t tmp_qloop_59 = jac_affine_inv_1_1_GRAY*4.0; + const real_t tmp_qloop_60 = tmp_qloop_58 + tmp_qloop_59; + const real_t tmp_qloop_61 = jac_affine_inv_0_0_GRAY*tmp_qloop_60 + jac_affine_inv_1_0_GRAY*tmp_qloop_60; + const real_t tmp_qloop_62 = jac_affine_inv_0_1_GRAY*tmp_qloop_56 + jac_affine_inv_1_1_GRAY*tmp_qloop_56; + const real_t tmp_qloop_63 = jac_affine_inv_0_1_GRAY*tmp_qloop_60 + jac_affine_inv_1_1_GRAY*tmp_qloop_60; + const real_t tmp_qloop_84 = jac_affine_inv_0_1_GRAY*tmp_qloop_54; + const real_t tmp_qloop_85 = (jac_affine_inv_0_0_GRAY*jac_affine_inv_0_0_GRAY)*4.0; + const real_t tmp_qloop_86 = (jac_affine_inv_0_1_GRAY*jac_affine_inv_0_1_GRAY)*4.0; + const real_t tmp_qloop_90 = jac_affine_inv_1_1_GRAY*tmp_qloop_55; + const real_t tmp_qloop_91 = (jac_affine_inv_1_0_GRAY*jac_affine_inv_1_0_GRAY)*4.0; + const real_t tmp_qloop_92 = (jac_affine_inv_1_1_GRAY*jac_affine_inv_1_1_GRAY)*4.0; + const real_t tmp_qloop_99 = jac_affine_inv_1_0_GRAY*8.0; + const real_t tmp_qloop_100 = jac_affine_inv_0_0_GRAY*tmp_qloop_99; + const real_t tmp_qloop_101 = jac_affine_inv_1_1_GRAY*tmp_qloop_54; + const real_t tmp_qloop_102 = jac_affine_inv_0_1_GRAY*tmp_qloop_55; + const real_t tmp_qloop_103 = tmp_qloop_101 + tmp_qloop_102; + const real_t tmp_qloop_104 = jac_affine_inv_1_1_GRAY*8.0; + const real_t tmp_qloop_105 = jac_affine_inv_0_1_GRAY*tmp_qloop_104; + const real_t tmp_qloop_109 = jac_affine_inv_1_0_GRAY*tmp_qloop_54; + const real_t tmp_qloop_110 = -tmp_qloop_54 - tmp_qloop_99; + const real_t tmp_qloop_111 = jac_affine_inv_1_0_GRAY*tmp_qloop_110 - tmp_qloop_109; + const real_t tmp_qloop_112 = -tmp_qloop_104 - tmp_qloop_58; + const real_t tmp_qloop_113 = jac_affine_inv_1_0_GRAY*tmp_qloop_112 - tmp_qloop_101; + const real_t tmp_qloop_114 = jac_affine_inv_1_1_GRAY*tmp_qloop_110 - tmp_qloop_102; + const real_t tmp_qloop_115 = jac_affine_inv_1_1_GRAY*tmp_qloop_58; + const real_t tmp_qloop_116 = jac_affine_inv_1_1_GRAY*tmp_qloop_112 - tmp_qloop_115; + const real_t tmp_qloop_120 = jac_affine_inv_0_0_GRAY*-8.0 - tmp_qloop_55; + const real_t tmp_qloop_121 = jac_affine_inv_0_0_GRAY*tmp_qloop_120 - tmp_qloop_109; + const real_t tmp_qloop_122 = jac_affine_inv_0_1_GRAY*-8.0 - tmp_qloop_59; + const real_t tmp_qloop_123 = jac_affine_inv_0_0_GRAY*tmp_qloop_122 - tmp_qloop_102; + const real_t tmp_qloop_124 = jac_affine_inv_0_1_GRAY*tmp_qloop_120 - tmp_qloop_101; + const real_t tmp_qloop_125 = jac_affine_inv_0_1_GRAY*tmp_qloop_122 - tmp_qloop_115; + { + /* FaceType.GRAY */ + for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1) + { + for (int64_t ctr_0 = 0; ctr_0 < (int64_t)((-ctr_1 + micro_edges_per_macro_edge) / (4)) * (4); ctr_0 += 4) + { + + const int64_t phantom_ctr_0 = ctr_0; + real_t _data_float_loop_ctr_array_dim_0[4]; + _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0; + _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1; + _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2; + _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3; + real_t _data_float_loop_ctr_array_dim_1[4]; + _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1; + + const __m256d p_affine_0_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)); + const __m256d p_affine_0_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)); + const __m256d p_affine_1_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)); + const __m256d p_affine_1_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)); + const __m256d p_affine_2_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)); + const __m256d p_affine_2_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)); + const __m256d diffusivity_times_delta_dof_0 = _mm256_loadu_pd(& _data_diffusivity_times_deltaVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]); + const __m256d diffusivity_times_delta_dof_1 = _mm256_loadu_pd(& _data_diffusivity_times_deltaVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]); + const __m256d diffusivity_times_delta_dof_2 = _mm256_loadu_pd(& _data_diffusivity_times_deltaVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]); + const __m256d diffusivity_times_delta_dof_3 = _mm256_loadu_pd(& _data_diffusivity_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]); + const __m256d diffusivity_times_delta_dof_4 = _mm256_loadu_pd(& _data_diffusivity_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]); + const __m256d diffusivity_times_delta_dof_5 = _mm256_loadu_pd(& _data_diffusivity_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]); + const __m256d wx_dof_0 = _mm256_loadu_pd(& _data_wxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]); + const __m256d wx_dof_1 = _mm256_loadu_pd(& _data_wxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]); + const __m256d wx_dof_2 = _mm256_loadu_pd(& _data_wxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]); + const __m256d wx_dof_3 = _mm256_loadu_pd(& _data_wxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]); + const __m256d wx_dof_4 = _mm256_loadu_pd(& _data_wxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]); + const __m256d wx_dof_5 = _mm256_loadu_pd(& _data_wxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]); + const __m256d wy_dof_0 = _mm256_loadu_pd(& _data_wyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]); + const __m256d wy_dof_1 = _mm256_loadu_pd(& _data_wyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]); + const __m256d wy_dof_2 = _mm256_loadu_pd(& _data_wyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]); + const __m256d wy_dof_3 = _mm256_loadu_pd(& _data_wyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]); + const __m256d wy_dof_4 = _mm256_loadu_pd(& _data_wyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]); + const __m256d wy_dof_5 = _mm256_loadu_pd(& _data_wyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]); + __m256d q_acc_0_0 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_1 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_2 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_3 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_4_4 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_5_5 = _mm256_set_pd(0.0,0.0,0.0,0.0); + for (int64_t q = 0; q < 4; q += 1) + { + const __m256d tmp_qloop_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_1_0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_2_0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),p_affine_0_0); + const __m256d tmp_qloop_2 = _mm256_mul_pd(tmp_qloop_0,tmp_qloop_0); + const __m256d tmp_qloop_3 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_1_1),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_2_1),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),p_affine_0_1); + const __m256d tmp_qloop_4 = _mm256_mul_pd(tmp_qloop_3,tmp_qloop_3); + const __m256d tmp_qloop_5 = _mm256_add_pd(tmp_qloop_2,tmp_qloop_4); + const __m256d tmp_qloop_6 = _mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_sqrt_pd(tmp_qloop_5)); + const __m256d tmp_qloop_13 = _mm256_mul_pd(tmp_qloop_6,_mm256_set_pd(tmp_qloop_12,tmp_qloop_12,tmp_qloop_12,tmp_qloop_12)); + const __m256d tmp_qloop_14 = _mm256_mul_pd(tmp_qloop_13,_mm256_set_pd(tmp_qloop_1,tmp_qloop_1,tmp_qloop_1,tmp_qloop_1)); + const __m256d tmp_qloop_15 = _mm256_div_pd(_mm256_sqrt_pd(tmp_qloop_5),_mm256_mul_pd(tmp_qloop_5,tmp_qloop_5)); + const __m256d tmp_qloop_16 = _mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(rayVertex_0,rayVertex_0,rayVertex_0,rayVertex_0)),tmp_qloop_0),_mm256_set_pd(tmp_qloop_1,tmp_qloop_1,tmp_qloop_1,tmp_qloop_1)),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(rayVertex_1,rayVertex_1,rayVertex_1,rayVertex_1)),tmp_qloop_3),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_8,tmp_qloop_8,tmp_qloop_8,tmp_qloop_8))),_mm256_set_pd(tmp_qloop_11,tmp_qloop_11,tmp_qloop_11,tmp_qloop_11)),_mm256_set_pd(radRayVertex,radRayVertex,radRayVertex,radRayVertex)); + const __m256d tmp_qloop_17 = _mm256_mul_pd(tmp_qloop_15,tmp_qloop_16); + const __m256d tmp_qloop_18 = _mm256_mul_pd(tmp_qloop_17,_mm256_set_pd(1.0,1.0,1.0,1.0)); + const __m256d tmp_qloop_19 = _mm256_mul_pd(tmp_qloop_13,_mm256_set_pd(tmp_qloop_8,tmp_qloop_8,tmp_qloop_8,tmp_qloop_8)); + const __m256d tmp_qloop_20 = _mm256_mul_pd(tmp_qloop_0,tmp_qloop_18); + const __m256d tmp_qloop_23 = _mm256_mul_pd(tmp_qloop_15,_mm256_set_pd(tmp_qloop_12,tmp_qloop_12,tmp_qloop_12,tmp_qloop_12)); + const __m256d tmp_qloop_24 = _mm256_mul_pd(tmp_qloop_23,_mm256_set_pd(tmp_qloop_1,tmp_qloop_1,tmp_qloop_1,tmp_qloop_1)); + const __m256d tmp_qloop_25 = _mm256_mul_pd(tmp_qloop_2,tmp_qloop_24); + const __m256d tmp_qloop_26 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_16,_mm256_div_pd(_mm256_sqrt_pd(tmp_qloop_5),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_5,tmp_qloop_5),tmp_qloop_5))),_mm256_set_pd(3.0,3.0,3.0,3.0)); + const __m256d tmp_qloop_27 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_0,tmp_qloop_26),tmp_qloop_4); + const __m256d tmp_qloop_28 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_27,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_24,tmp_qloop_4)); + const __m256d tmp_qloop_29 = _mm256_mul_pd(tmp_qloop_23,_mm256_set_pd(tmp_qloop_7,tmp_qloop_7,tmp_qloop_7,tmp_qloop_7)); + const __m256d tmp_qloop_30 = _mm256_mul_pd(tmp_qloop_0,tmp_qloop_3); + const __m256d tmp_qloop_31 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_30); + const __m256d tmp_qloop_32 = _mm256_mul_pd(tmp_qloop_17,_mm256_set_pd(2.0,2.0,2.0,2.0)); + const __m256d tmp_qloop_33 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_2,tmp_qloop_26),tmp_qloop_3); + const __m256d tmp_qloop_34 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_33,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_18,tmp_qloop_3)); + const __m256d tmp_qloop_35 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_30); + const __m256d tmp_qloop_36 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_23,tmp_qloop_30),_mm256_set_pd(tmp_qloop_8,tmp_qloop_8,tmp_qloop_8,tmp_qloop_8)); + const __m256d tmp_qloop_37 = _mm256_mul_pd(_mm256_set_pd(4.0,4.0,4.0,4.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])); + const __m256d tmp_qloop_38 = _mm256_mul_pd(_mm256_set_pd(4.0,4.0,4.0,4.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])); + const __m256d tmp_qloop_39 = _mm256_add_pd(_mm256_add_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),tmp_qloop_37),tmp_qloop_38); + const __m256d tmp_qloop_40 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_39,_mm256_set_pd(jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY)),_mm256_mul_pd(tmp_qloop_39,_mm256_set_pd(jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY))); + const __m256d tmp_qloop_41 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_39,_mm256_set_pd(jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY)),_mm256_mul_pd(tmp_qloop_39,_mm256_set_pd(jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY))); + const __m256d tmp_qloop_42 = _mm256_mul_pd(tmp_qloop_37,_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])); + const __m256d tmp_qloop_43 = _mm256_mul_pd(_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])); + const __m256d tmp_qloop_44 = _mm256_mul_pd(tmp_qloop_43,_mm256_set_pd(2.0,2.0,2.0,2.0)); + const __m256d tmp_qloop_45 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),tmp_qloop_44); + const __m256d tmp_qloop_46 = _mm256_mul_pd(_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])); + const __m256d tmp_qloop_47 = _mm256_mul_pd(tmp_qloop_46,_mm256_set_pd(2.0,2.0,2.0,2.0)); + const __m256d tmp_qloop_48 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])),tmp_qloop_47); + const __m256d tmp_qloop_49 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_46,_mm256_set_pd(-4.0,-4.0,-4.0,-4.0)),_mm256_mul_pd(tmp_qloop_42,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_38); + const __m256d tmp_qloop_50 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_43,_mm256_set_pd(-4.0,-4.0,-4.0,-4.0)),_mm256_mul_pd(tmp_qloop_42,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_37); + const __m256d tmp_qloop_51 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),tmp_qloop_42),tmp_qloop_44),tmp_qloop_47); + const __m256d tmp_qloop_52 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_42,wx_dof_3),_mm256_mul_pd(tmp_qloop_45,wx_dof_1)),_mm256_mul_pd(tmp_qloop_48,wx_dof_2)),_mm256_mul_pd(tmp_qloop_49,wx_dof_4)),_mm256_mul_pd(tmp_qloop_50,wx_dof_5)),_mm256_mul_pd(tmp_qloop_51,wx_dof_0)); + const __m256d tmp_qloop_53 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_42,wy_dof_3),_mm256_mul_pd(tmp_qloop_45,wy_dof_1)),_mm256_mul_pd(tmp_qloop_48,wy_dof_2)),_mm256_mul_pd(tmp_qloop_49,wy_dof_4)),_mm256_mul_pd(tmp_qloop_50,wy_dof_5)),_mm256_mul_pd(tmp_qloop_51,wy_dof_0)); + const __m256d tmp_qloop_81 = _mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),tmp_qloop_37); + const __m256d tmp_qloop_82 = _mm256_mul_pd(tmp_qloop_81,_mm256_set_pd(jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY)); + const __m256d tmp_qloop_83 = _mm256_mul_pd(tmp_qloop_81,_mm256_set_pd(jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY)); + const __m256d tmp_qloop_87 = _mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),tmp_qloop_38); + const __m256d tmp_qloop_88 = _mm256_mul_pd(tmp_qloop_87,_mm256_set_pd(jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY)); + const __m256d tmp_qloop_89 = _mm256_mul_pd(tmp_qloop_87,_mm256_set_pd(jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY)); + const __m256d tmp_qloop_93 = _mm256_mul_pd(tmp_qloop_37,_mm256_set_pd(jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY)); + const __m256d tmp_qloop_94 = _mm256_mul_pd(tmp_qloop_38,_mm256_set_pd(jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY)); + const __m256d tmp_qloop_95 = _mm256_add_pd(tmp_qloop_93,tmp_qloop_94); + const __m256d tmp_qloop_96 = _mm256_mul_pd(tmp_qloop_37,_mm256_set_pd(jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY)); + const __m256d tmp_qloop_97 = _mm256_mul_pd(tmp_qloop_38,_mm256_set_pd(jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY)); + const __m256d tmp_qloop_98 = _mm256_add_pd(tmp_qloop_96,tmp_qloop_97); + const __m256d tmp_qloop_106 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-8.0,-8.0,-8.0,-8.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])),_mm256_mul_pd(tmp_qloop_37,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_set_pd(4.0,4.0,4.0,4.0)); + const __m256d tmp_qloop_107 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_94,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_106,_mm256_set_pd(jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY))); + const __m256d tmp_qloop_108 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_97,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_106,_mm256_set_pd(jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY))); + const __m256d tmp_qloop_117 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-8.0,-8.0,-8.0,-8.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(tmp_qloop_38,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_set_pd(4.0,4.0,4.0,4.0)); + const __m256d tmp_qloop_118 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_93,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_117,_mm256_set_pd(jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY))); + const __m256d tmp_qloop_119 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_96,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_117,_mm256_set_pd(jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY))); + const __m256d jac_blending_0_0 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_0,tmp_qloop_14),_mm256_mul_pd(tmp_qloop_18,tmp_qloop_4)); + const __m256d jac_blending_0_1 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_0,tmp_qloop_19),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_0,tmp_qloop_15),tmp_qloop_16),tmp_qloop_3),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))); + const __m256d jac_blending_1_0 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_14,tmp_qloop_3),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_20,tmp_qloop_3),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))); + const __m256d jac_blending_1_1 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_19,tmp_qloop_3),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_15,tmp_qloop_16),tmp_qloop_2),_mm256_set_pd(1.0,1.0,1.0,1.0))); + const __m256d tmp_qloop_21 = _mm256_add_pd(_mm256_mul_pd(jac_blending_0_0,jac_blending_1_1),_mm256_mul_pd(_mm256_mul_pd(jac_blending_0_1,jac_blending_1_0),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))); + const __m256d tmp_qloop_22 = _mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),tmp_qloop_21); + const __m256d abs_det_jac_blending = tmp_qloop_21; + const __m256d tmp_qloop_80 = _mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(abs_det_jac_blending,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(diffusivity_times_delta_dof_0,tmp_qloop_51),_mm256_mul_pd(diffusivity_times_delta_dof_1,tmp_qloop_45)),_mm256_mul_pd(diffusivity_times_delta_dof_2,tmp_qloop_48)),_mm256_mul_pd(diffusivity_times_delta_dof_3,tmp_qloop_42)),_mm256_mul_pd(diffusivity_times_delta_dof_4,tmp_qloop_49)),_mm256_mul_pd(diffusivity_times_delta_dof_5,tmp_qloop_50))),_mm256_set_pd(_data_q_w[q],_data_q_w[q],_data_q_w[q],_data_q_w[q])),_mm256_set_pd(abs_det_jac_affine_GRAY,abs_det_jac_affine_GRAY,abs_det_jac_affine_GRAY,abs_det_jac_affine_GRAY)); + const __m256d jac_blending_inv_0_0 = _mm256_mul_pd(jac_blending_1_1,tmp_qloop_22); + const __m256d jac_blending_inv_0_1 = _mm256_mul_pd(_mm256_mul_pd(jac_blending_0_1,tmp_qloop_22),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)); + const __m256d jac_blending_inv_1_0 = _mm256_mul_pd(_mm256_mul_pd(jac_blending_1_0,tmp_qloop_22),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)); + const __m256d jac_blending_inv_1_1 = _mm256_mul_pd(jac_blending_0_0,tmp_qloop_22); + const __m256d hessian_blending_0_0_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_25,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),tmp_qloop_14),tmp_qloop_28); + const __m256d hessian_blending_1_0_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_31,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_29,tmp_qloop_4)),_mm256_mul_pd(tmp_qloop_3,tmp_qloop_32)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_26,_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_3,tmp_qloop_3),tmp_qloop_3)),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))); + const __m256d hessian_blending_0_0_1 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_34,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_15,tmp_qloop_30),_mm256_set_pd(-2.0,-2.0,-2.0,-2.0)),_mm256_set_pd(tmp_qloop_1,tmp_qloop_1,tmp_qloop_1,tmp_qloop_1)),_mm256_set_pd(tmp_qloop_11,tmp_qloop_11,tmp_qloop_11,tmp_qloop_11))); + const __m256d hessian_blending_1_0_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_20,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_28,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(tmp_qloop_35,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_6,_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_1,tmp_qloop_1,tmp_qloop_1,tmp_qloop_1)),_mm256_set_pd(tmp_qloop_10,tmp_qloop_10,tmp_qloop_10,tmp_qloop_10)),_mm256_set_pd(tmp_qloop_9,tmp_qloop_9,tmp_qloop_9,tmp_qloop_9))); + const __m256d hessian_blending_0_1_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_19,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_31,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(tmp_qloop_34,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_15,tmp_qloop_2),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_10,tmp_qloop_10,tmp_qloop_10,tmp_qloop_10)),_mm256_set_pd(tmp_qloop_8,tmp_qloop_8,tmp_qloop_8,tmp_qloop_8)),_mm256_set_pd(tmp_qloop_9,tmp_qloop_9,tmp_qloop_9,tmp_qloop_9))); + const __m256d tmp_qloop_64 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(hessian_blending_0_0_0,jac_blending_inv_0_0),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(hessian_blending_0_1_0,jac_blending_inv_1_0),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))); + const __m256d tmp_qloop_72 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(hessian_blending_0_0_0,jac_blending_inv_0_1),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(hessian_blending_0_1_0,jac_blending_inv_1_1),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))); + const __m256d hessian_blending_1_1_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_20,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_35,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_27),tmp_qloop_36); + const __m256d tmp_qloop_68 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(hessian_blending_1_0_0,jac_blending_inv_0_0),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(hessian_blending_1_1_0,jac_blending_inv_1_0),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))); + const __m256d tmp_qloop_76 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(hessian_blending_1_0_0,jac_blending_inv_0_1),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(hessian_blending_1_1_0,jac_blending_inv_1_1),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))); + const __m256d hessian_blending_0_1_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_0,tmp_qloop_32),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_0,tmp_qloop_0),tmp_qloop_0),tmp_qloop_26),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_25),tmp_qloop_36); + const __m256d tmp_qloop_65 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(hessian_blending_0_0_1,jac_blending_inv_0_0),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(hessian_blending_0_1_1,jac_blending_inv_1_0),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))); + const __m256d tmp_qloop_66 = _mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,tmp_qloop_64),_mm256_mul_pd(jac_blending_inv_0_1,tmp_qloop_65)); + const __m256d tmp_qloop_67 = _mm256_add_pd(_mm256_mul_pd(jac_blending_inv_1_0,tmp_qloop_64),_mm256_mul_pd(jac_blending_inv_1_1,tmp_qloop_65)); + const __m256d tmp_qloop_73 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(hessian_blending_0_0_1,jac_blending_inv_0_1),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(hessian_blending_0_1_1,jac_blending_inv_1_1),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))); + const __m256d tmp_qloop_74 = _mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,tmp_qloop_72),_mm256_mul_pd(jac_blending_inv_0_1,tmp_qloop_73)); + const __m256d tmp_qloop_75 = _mm256_add_pd(_mm256_mul_pd(jac_blending_inv_1_0,tmp_qloop_72),_mm256_mul_pd(jac_blending_inv_1_1,tmp_qloop_73)); + const __m256d hessian_blending_1_1_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_19,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_33,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_15,tmp_qloop_2),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_10,tmp_qloop_10,tmp_qloop_10,tmp_qloop_10)),_mm256_set_pd(tmp_qloop_7,tmp_qloop_7,tmp_qloop_7,tmp_qloop_7)),_mm256_set_pd(tmp_qloop_9,tmp_qloop_9,tmp_qloop_9,tmp_qloop_9))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_15,tmp_qloop_4),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_10,tmp_qloop_10,tmp_qloop_10,tmp_qloop_10)),_mm256_set_pd(tmp_qloop_8,tmp_qloop_8,tmp_qloop_8,tmp_qloop_8)),_mm256_set_pd(tmp_qloop_9,tmp_qloop_9,tmp_qloop_9,tmp_qloop_9))); + const __m256d tmp_qloop_69 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(hessian_blending_1_0_1,jac_blending_inv_0_0),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(hessian_blending_1_1_1,jac_blending_inv_1_0),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))); + const __m256d tmp_qloop_70 = _mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,tmp_qloop_68),_mm256_mul_pd(jac_blending_inv_0_1,tmp_qloop_69)); + const __m256d tmp_qloop_71 = _mm256_add_pd(_mm256_mul_pd(jac_blending_inv_1_0,tmp_qloop_68),_mm256_mul_pd(jac_blending_inv_1_1,tmp_qloop_69)); + const __m256d tmp_qloop_77 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(hessian_blending_1_0_1,jac_blending_inv_0_1),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(hessian_blending_1_1_1,jac_blending_inv_1_1),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))); + const __m256d tmp_qloop_78 = _mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,tmp_qloop_76),_mm256_mul_pd(jac_blending_inv_0_1,tmp_qloop_77)); + const __m256d tmp_qloop_79 = _mm256_add_pd(_mm256_mul_pd(jac_blending_inv_1_0,tmp_qloop_76),_mm256_mul_pd(jac_blending_inv_1_1,tmp_qloop_77)); + const __m256d q_tmp_0_0 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_80,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_52,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,tmp_qloop_40),_mm256_mul_pd(jac_blending_inv_1_0,tmp_qloop_41))),_mm256_mul_pd(tmp_qloop_53,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,tmp_qloop_40),_mm256_mul_pd(jac_blending_inv_1_1,tmp_qloop_41))))),_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,_mm256_set_pd(tmp_qloop_57,tmp_qloop_57,tmp_qloop_57,tmp_qloop_57)),_mm256_mul_pd(jac_blending_inv_1_0,_mm256_set_pd(tmp_qloop_61,tmp_qloop_61,tmp_qloop_61,tmp_qloop_61)))),_mm256_mul_pd(jac_blending_inv_0_1,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,_mm256_set_pd(tmp_qloop_57,tmp_qloop_57,tmp_qloop_57,tmp_qloop_57)),_mm256_mul_pd(jac_blending_inv_1_1,_mm256_set_pd(tmp_qloop_61,tmp_qloop_61,tmp_qloop_61,tmp_qloop_61))))),_mm256_mul_pd(jac_blending_inv_1_0,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,_mm256_set_pd(tmp_qloop_62,tmp_qloop_62,tmp_qloop_62,tmp_qloop_62)),_mm256_mul_pd(jac_blending_inv_1_0,_mm256_set_pd(tmp_qloop_63,tmp_qloop_63,tmp_qloop_63,tmp_qloop_63))))),_mm256_mul_pd(jac_blending_inv_1_1,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,_mm256_set_pd(tmp_qloop_62,tmp_qloop_62,tmp_qloop_62,tmp_qloop_62)),_mm256_mul_pd(jac_blending_inv_1_1,_mm256_set_pd(tmp_qloop_63,tmp_qloop_63,tmp_qloop_63,tmp_qloop_63))))),_mm256_mul_pd(jac_blending_inv_0_0,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_40,tmp_qloop_66),_mm256_mul_pd(tmp_qloop_41,tmp_qloop_67)))),_mm256_mul_pd(jac_blending_inv_0_1,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_40,tmp_qloop_70),_mm256_mul_pd(tmp_qloop_41,tmp_qloop_71)))),_mm256_mul_pd(jac_blending_inv_1_0,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_40,tmp_qloop_74),_mm256_mul_pd(tmp_qloop_41,tmp_qloop_75)))),_mm256_mul_pd(jac_blending_inv_1_1,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_40,tmp_qloop_78),_mm256_mul_pd(tmp_qloop_41,tmp_qloop_79))))); + const __m256d q_tmp_1_1 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_80,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_52,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,tmp_qloop_82),_mm256_mul_pd(jac_blending_inv_1_0,tmp_qloop_83))),_mm256_mul_pd(tmp_qloop_53,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,tmp_qloop_82),_mm256_mul_pd(jac_blending_inv_1_1,tmp_qloop_83))))),_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_1_0,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,_mm256_set_pd(tmp_qloop_84,tmp_qloop_84,tmp_qloop_84,tmp_qloop_84)),_mm256_mul_pd(jac_blending_inv_1_0,_mm256_set_pd(tmp_qloop_86,tmp_qloop_86,tmp_qloop_86,tmp_qloop_86)))),_mm256_mul_pd(jac_blending_inv_1_1,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,_mm256_set_pd(tmp_qloop_84,tmp_qloop_84,tmp_qloop_84,tmp_qloop_84)),_mm256_mul_pd(jac_blending_inv_1_1,_mm256_set_pd(tmp_qloop_86,tmp_qloop_86,tmp_qloop_86,tmp_qloop_86))))),_mm256_mul_pd(jac_blending_inv_0_0,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_1_0,_mm256_set_pd(tmp_qloop_84,tmp_qloop_84,tmp_qloop_84,tmp_qloop_84)),_mm256_mul_pd(jac_blending_inv_0_0,_mm256_set_pd(tmp_qloop_85,tmp_qloop_85,tmp_qloop_85,tmp_qloop_85))))),_mm256_mul_pd(jac_blending_inv_0_1,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_1_1,_mm256_set_pd(tmp_qloop_84,tmp_qloop_84,tmp_qloop_84,tmp_qloop_84)),_mm256_mul_pd(jac_blending_inv_0_1,_mm256_set_pd(tmp_qloop_85,tmp_qloop_85,tmp_qloop_85,tmp_qloop_85))))),_mm256_mul_pd(jac_blending_inv_0_0,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_66,tmp_qloop_82),_mm256_mul_pd(tmp_qloop_67,tmp_qloop_83)))),_mm256_mul_pd(jac_blending_inv_0_1,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_70,tmp_qloop_82),_mm256_mul_pd(tmp_qloop_71,tmp_qloop_83)))),_mm256_mul_pd(jac_blending_inv_1_0,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_74,tmp_qloop_82),_mm256_mul_pd(tmp_qloop_75,tmp_qloop_83)))),_mm256_mul_pd(jac_blending_inv_1_1,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_78,tmp_qloop_82),_mm256_mul_pd(tmp_qloop_79,tmp_qloop_83))))); + const __m256d q_tmp_2_2 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_80,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_52,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,tmp_qloop_88),_mm256_mul_pd(jac_blending_inv_1_0,tmp_qloop_89))),_mm256_mul_pd(tmp_qloop_53,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,tmp_qloop_88),_mm256_mul_pd(jac_blending_inv_1_1,tmp_qloop_89))))),_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_1_0,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,_mm256_set_pd(tmp_qloop_90,tmp_qloop_90,tmp_qloop_90,tmp_qloop_90)),_mm256_mul_pd(jac_blending_inv_1_0,_mm256_set_pd(tmp_qloop_92,tmp_qloop_92,tmp_qloop_92,tmp_qloop_92)))),_mm256_mul_pd(jac_blending_inv_1_1,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,_mm256_set_pd(tmp_qloop_90,tmp_qloop_90,tmp_qloop_90,tmp_qloop_90)),_mm256_mul_pd(jac_blending_inv_1_1,_mm256_set_pd(tmp_qloop_92,tmp_qloop_92,tmp_qloop_92,tmp_qloop_92))))),_mm256_mul_pd(jac_blending_inv_0_0,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_1_0,_mm256_set_pd(tmp_qloop_90,tmp_qloop_90,tmp_qloop_90,tmp_qloop_90)),_mm256_mul_pd(jac_blending_inv_0_0,_mm256_set_pd(tmp_qloop_91,tmp_qloop_91,tmp_qloop_91,tmp_qloop_91))))),_mm256_mul_pd(jac_blending_inv_0_1,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_1_1,_mm256_set_pd(tmp_qloop_90,tmp_qloop_90,tmp_qloop_90,tmp_qloop_90)),_mm256_mul_pd(jac_blending_inv_0_1,_mm256_set_pd(tmp_qloop_91,tmp_qloop_91,tmp_qloop_91,tmp_qloop_91))))),_mm256_mul_pd(jac_blending_inv_0_0,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_66,tmp_qloop_88),_mm256_mul_pd(tmp_qloop_67,tmp_qloop_89)))),_mm256_mul_pd(jac_blending_inv_0_1,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_70,tmp_qloop_88),_mm256_mul_pd(tmp_qloop_71,tmp_qloop_89)))),_mm256_mul_pd(jac_blending_inv_1_0,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_74,tmp_qloop_88),_mm256_mul_pd(tmp_qloop_75,tmp_qloop_89)))),_mm256_mul_pd(jac_blending_inv_1_1,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_78,tmp_qloop_88),_mm256_mul_pd(tmp_qloop_79,tmp_qloop_89))))); + const __m256d q_tmp_3_3 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_80,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_52,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,tmp_qloop_95),_mm256_mul_pd(jac_blending_inv_1_0,tmp_qloop_98))),_mm256_mul_pd(tmp_qloop_53,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,tmp_qloop_95),_mm256_mul_pd(jac_blending_inv_1_1,tmp_qloop_98))))),_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,_mm256_set_pd(tmp_qloop_100,tmp_qloop_100,tmp_qloop_100,tmp_qloop_100)),_mm256_mul_pd(jac_blending_inv_1_0,_mm256_set_pd(tmp_qloop_103,tmp_qloop_103,tmp_qloop_103,tmp_qloop_103)))),_mm256_mul_pd(jac_blending_inv_0_1,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,_mm256_set_pd(tmp_qloop_100,tmp_qloop_100,tmp_qloop_100,tmp_qloop_100)),_mm256_mul_pd(jac_blending_inv_1_1,_mm256_set_pd(tmp_qloop_103,tmp_qloop_103,tmp_qloop_103,tmp_qloop_103))))),_mm256_mul_pd(jac_blending_inv_1_0,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,_mm256_set_pd(tmp_qloop_103,tmp_qloop_103,tmp_qloop_103,tmp_qloop_103)),_mm256_mul_pd(jac_blending_inv_1_0,_mm256_set_pd(tmp_qloop_105,tmp_qloop_105,tmp_qloop_105,tmp_qloop_105))))),_mm256_mul_pd(jac_blending_inv_1_1,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,_mm256_set_pd(tmp_qloop_103,tmp_qloop_103,tmp_qloop_103,tmp_qloop_103)),_mm256_mul_pd(jac_blending_inv_1_1,_mm256_set_pd(tmp_qloop_105,tmp_qloop_105,tmp_qloop_105,tmp_qloop_105))))),_mm256_mul_pd(jac_blending_inv_0_0,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_66,tmp_qloop_95),_mm256_mul_pd(tmp_qloop_67,tmp_qloop_98)))),_mm256_mul_pd(jac_blending_inv_0_1,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_70,tmp_qloop_95),_mm256_mul_pd(tmp_qloop_71,tmp_qloop_98)))),_mm256_mul_pd(jac_blending_inv_1_0,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_74,tmp_qloop_95),_mm256_mul_pd(tmp_qloop_75,tmp_qloop_98)))),_mm256_mul_pd(jac_blending_inv_1_1,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_78,tmp_qloop_95),_mm256_mul_pd(tmp_qloop_79,tmp_qloop_98))))); + const __m256d q_tmp_4_4 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_80,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_52,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,tmp_qloop_107),_mm256_mul_pd(jac_blending_inv_1_0,tmp_qloop_108))),_mm256_mul_pd(tmp_qloop_53,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,tmp_qloop_107),_mm256_mul_pd(jac_blending_inv_1_1,tmp_qloop_108))))),_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,_mm256_set_pd(tmp_qloop_111,tmp_qloop_111,tmp_qloop_111,tmp_qloop_111)),_mm256_mul_pd(jac_blending_inv_1_0,_mm256_set_pd(tmp_qloop_113,tmp_qloop_113,tmp_qloop_113,tmp_qloop_113)))),_mm256_mul_pd(jac_blending_inv_0_1,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,_mm256_set_pd(tmp_qloop_111,tmp_qloop_111,tmp_qloop_111,tmp_qloop_111)),_mm256_mul_pd(jac_blending_inv_1_1,_mm256_set_pd(tmp_qloop_113,tmp_qloop_113,tmp_qloop_113,tmp_qloop_113))))),_mm256_mul_pd(jac_blending_inv_1_0,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,_mm256_set_pd(tmp_qloop_114,tmp_qloop_114,tmp_qloop_114,tmp_qloop_114)),_mm256_mul_pd(jac_blending_inv_1_0,_mm256_set_pd(tmp_qloop_116,tmp_qloop_116,tmp_qloop_116,tmp_qloop_116))))),_mm256_mul_pd(jac_blending_inv_1_1,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,_mm256_set_pd(tmp_qloop_114,tmp_qloop_114,tmp_qloop_114,tmp_qloop_114)),_mm256_mul_pd(jac_blending_inv_1_1,_mm256_set_pd(tmp_qloop_116,tmp_qloop_116,tmp_qloop_116,tmp_qloop_116))))),_mm256_mul_pd(jac_blending_inv_0_0,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_107,tmp_qloop_66),_mm256_mul_pd(tmp_qloop_108,tmp_qloop_67)))),_mm256_mul_pd(jac_blending_inv_0_1,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_107,tmp_qloop_70),_mm256_mul_pd(tmp_qloop_108,tmp_qloop_71)))),_mm256_mul_pd(jac_blending_inv_1_0,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_107,tmp_qloop_74),_mm256_mul_pd(tmp_qloop_108,tmp_qloop_75)))),_mm256_mul_pd(jac_blending_inv_1_1,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_107,tmp_qloop_78),_mm256_mul_pd(tmp_qloop_108,tmp_qloop_79))))); + const __m256d q_tmp_5_5 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_80,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_52,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,tmp_qloop_118),_mm256_mul_pd(jac_blending_inv_1_0,tmp_qloop_119))),_mm256_mul_pd(tmp_qloop_53,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,tmp_qloop_118),_mm256_mul_pd(jac_blending_inv_1_1,tmp_qloop_119))))),_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,_mm256_set_pd(tmp_qloop_121,tmp_qloop_121,tmp_qloop_121,tmp_qloop_121)),_mm256_mul_pd(jac_blending_inv_1_0,_mm256_set_pd(tmp_qloop_123,tmp_qloop_123,tmp_qloop_123,tmp_qloop_123)))),_mm256_mul_pd(jac_blending_inv_0_1,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,_mm256_set_pd(tmp_qloop_121,tmp_qloop_121,tmp_qloop_121,tmp_qloop_121)),_mm256_mul_pd(jac_blending_inv_1_1,_mm256_set_pd(tmp_qloop_123,tmp_qloop_123,tmp_qloop_123,tmp_qloop_123))))),_mm256_mul_pd(jac_blending_inv_1_0,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,_mm256_set_pd(tmp_qloop_124,tmp_qloop_124,tmp_qloop_124,tmp_qloop_124)),_mm256_mul_pd(jac_blending_inv_1_0,_mm256_set_pd(tmp_qloop_125,tmp_qloop_125,tmp_qloop_125,tmp_qloop_125))))),_mm256_mul_pd(jac_blending_inv_1_1,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,_mm256_set_pd(tmp_qloop_124,tmp_qloop_124,tmp_qloop_124,tmp_qloop_124)),_mm256_mul_pd(jac_blending_inv_1_1,_mm256_set_pd(tmp_qloop_125,tmp_qloop_125,tmp_qloop_125,tmp_qloop_125))))),_mm256_mul_pd(jac_blending_inv_0_0,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_118,tmp_qloop_66),_mm256_mul_pd(tmp_qloop_119,tmp_qloop_67)))),_mm256_mul_pd(jac_blending_inv_0_1,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_118,tmp_qloop_70),_mm256_mul_pd(tmp_qloop_119,tmp_qloop_71)))),_mm256_mul_pd(jac_blending_inv_1_0,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_118,tmp_qloop_74),_mm256_mul_pd(tmp_qloop_119,tmp_qloop_75)))),_mm256_mul_pd(jac_blending_inv_1_1,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_118,tmp_qloop_78),_mm256_mul_pd(tmp_qloop_119,tmp_qloop_79))))); + q_acc_0_0 = _mm256_add_pd(q_acc_0_0,q_tmp_0_0); + q_acc_1_1 = _mm256_add_pd(q_acc_1_1,q_tmp_1_1); + q_acc_2_2 = _mm256_add_pd(q_acc_2_2,q_tmp_2_2); + q_acc_3_3 = _mm256_add_pd(q_acc_3_3,q_tmp_3_3); + q_acc_4_4 = _mm256_add_pd(q_acc_4_4,q_tmp_4_4); + q_acc_5_5 = _mm256_add_pd(q_acc_5_5,q_tmp_5_5); + } + const __m256d elMatDiag_0 = q_acc_0_0; + const __m256d elMatDiag_1 = q_acc_1_1; + const __m256d elMatDiag_2 = q_acc_2_2; + const __m256d elMatDiag_3 = q_acc_3_3; + const __m256d elMatDiag_4 = q_acc_4_4; + const __m256d elMatDiag_5 = q_acc_5_5; + _mm256_storeu_pd(&_data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))],_mm256_add_pd(elMatDiag_0,_mm256_loadu_pd(& _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]))); + _mm256_storeu_pd(&_data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1],_mm256_add_pd(elMatDiag_1,_mm256_loadu_pd(& _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]))); + _mm256_storeu_pd(&_data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))],_mm256_add_pd(elMatDiag_2,_mm256_loadu_pd(& _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]))); + _mm256_storeu_pd(&_data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))],_mm256_add_pd(elMatDiag_3,_mm256_loadu_pd(& _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]))); + _mm256_storeu_pd(&_data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))],_mm256_add_pd(elMatDiag_4,_mm256_loadu_pd(& _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]))); + _mm256_storeu_pd(&_data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))],_mm256_add_pd(elMatDiag_5,_mm256_loadu_pd(& _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]))); + } + for (int64_t ctr_0 = (int64_t)((-ctr_1 + micro_edges_per_macro_edge) / (4)) * (4); ctr_0 < -ctr_1 + micro_edges_per_macro_edge; ctr_0 += 1) + { + + const int64_t phantom_ctr_0 = ctr_0; + real_t _data_float_loop_ctr_array_dim_0[4]; + _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0; + _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1; + _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2; + _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3; + real_t _data_float_loop_ctr_array_dim_1[4]; + _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1; + + const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; + const real_t diffusivity_times_delta_dof_0 = _data_diffusivity_times_deltaVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]; + const real_t diffusivity_times_delta_dof_1 = _data_diffusivity_times_deltaVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + const real_t diffusivity_times_delta_dof_2 = _data_diffusivity_times_deltaVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t diffusivity_times_delta_dof_3 = _data_diffusivity_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t diffusivity_times_delta_dof_4 = _data_diffusivity_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t diffusivity_times_delta_dof_5 = _data_diffusivity_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]; + const real_t wx_dof_0 = _data_wxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]; + const real_t wx_dof_1 = _data_wxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + const real_t wx_dof_2 = _data_wxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t wx_dof_3 = _data_wxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t wx_dof_4 = _data_wxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t wx_dof_5 = _data_wxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]; + const real_t wy_dof_0 = _data_wyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]; + const real_t wy_dof_1 = _data_wyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + const real_t wy_dof_2 = _data_wyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t wy_dof_3 = _data_wyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t wy_dof_4 = _data_wyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t wy_dof_5 = _data_wyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]; + real_t q_acc_0_0 = 0.0; + real_t q_acc_1_1 = 0.0; + real_t q_acc_2_2 = 0.0; + real_t q_acc_3_3 = 0.0; + real_t q_acc_4_4 = 0.0; + real_t q_acc_5_5 = 0.0; + for (int64_t q = 0; q < 4; q += 1) + { + const real_t tmp_qloop_0 = p_affine_0_0 + (-p_affine_0_0 + p_affine_1_0)*_data_q_p_0[q] + (-p_affine_0_0 + p_affine_2_0)*_data_q_p_1[q]; + const real_t tmp_qloop_2 = (tmp_qloop_0*tmp_qloop_0); + const real_t tmp_qloop_3 = p_affine_0_1 + (-p_affine_0_1 + p_affine_1_1)*_data_q_p_0[q] + (-p_affine_0_1 + p_affine_2_1)*_data_q_p_1[q]; + const real_t tmp_qloop_4 = (tmp_qloop_3*tmp_qloop_3); + const real_t tmp_qloop_5 = tmp_qloop_2 + tmp_qloop_4; + const real_t tmp_qloop_6 = pow(tmp_qloop_5, -0.50000000000000000); + const real_t tmp_qloop_13 = tmp_qloop_12*tmp_qloop_6; + const real_t tmp_qloop_14 = tmp_qloop_1*tmp_qloop_13; + const real_t tmp_qloop_15 = pow(tmp_qloop_5, -1.5000000000000000); + const real_t tmp_qloop_16 = radRayVertex + tmp_qloop_11*(tmp_qloop_1*(-rayVertex_0 + tmp_qloop_0) - tmp_qloop_8*(-rayVertex_1 + tmp_qloop_3)); + const real_t tmp_qloop_17 = tmp_qloop_15*tmp_qloop_16; + const real_t tmp_qloop_18 = tmp_qloop_17*1.0; + const real_t tmp_qloop_19 = tmp_qloop_13*tmp_qloop_8; + const real_t tmp_qloop_20 = tmp_qloop_0*tmp_qloop_18; + const real_t tmp_qloop_23 = tmp_qloop_12*tmp_qloop_15; + const real_t tmp_qloop_24 = tmp_qloop_1*tmp_qloop_23; + const real_t tmp_qloop_25 = tmp_qloop_2*tmp_qloop_24; + const real_t tmp_qloop_26 = tmp_qloop_16*pow(tmp_qloop_5, -2.5000000000000000)*3.0; + const real_t tmp_qloop_27 = tmp_qloop_0*tmp_qloop_26*tmp_qloop_4; + const real_t tmp_qloop_28 = tmp_qloop_24*tmp_qloop_4 - tmp_qloop_27; + const real_t tmp_qloop_29 = tmp_qloop_23*tmp_qloop_7; + const real_t tmp_qloop_30 = tmp_qloop_0*tmp_qloop_3; + const real_t tmp_qloop_31 = tmp_qloop_24*tmp_qloop_30; + const real_t tmp_qloop_32 = tmp_qloop_17*2.0; + const real_t tmp_qloop_33 = tmp_qloop_2*tmp_qloop_26*tmp_qloop_3; + const real_t tmp_qloop_34 = tmp_qloop_18*tmp_qloop_3 - tmp_qloop_33; + const real_t tmp_qloop_35 = tmp_qloop_29*tmp_qloop_30; + const real_t tmp_qloop_36 = tmp_qloop_23*tmp_qloop_30*tmp_qloop_8; + const real_t tmp_qloop_37 = 4.0*_data_q_p_0[q]; + const real_t tmp_qloop_38 = 4.0*_data_q_p_1[q]; + const real_t tmp_qloop_39 = tmp_qloop_37 + tmp_qloop_38 - 3.0; + const real_t tmp_qloop_40 = jac_affine_inv_0_0_GRAY*tmp_qloop_39 + jac_affine_inv_1_0_GRAY*tmp_qloop_39; + const real_t tmp_qloop_41 = jac_affine_inv_0_1_GRAY*tmp_qloop_39 + jac_affine_inv_1_1_GRAY*tmp_qloop_39; + const real_t tmp_qloop_42 = tmp_qloop_37*_data_q_p_1[q]; + const real_t tmp_qloop_43 = (_data_q_p_0[q]*_data_q_p_0[q]); + const real_t tmp_qloop_44 = tmp_qloop_43*2.0; + const real_t tmp_qloop_45 = tmp_qloop_44 - _data_q_p_0[q]; + const real_t tmp_qloop_46 = (_data_q_p_1[q]*_data_q_p_1[q]); + const real_t tmp_qloop_47 = tmp_qloop_46*2.0; + const real_t tmp_qloop_48 = tmp_qloop_47 - _data_q_p_1[q]; + const real_t tmp_qloop_49 = tmp_qloop_38 - tmp_qloop_42 + tmp_qloop_46*-4.0; + const real_t tmp_qloop_50 = tmp_qloop_37 - tmp_qloop_42 + tmp_qloop_43*-4.0; + const real_t tmp_qloop_51 = tmp_qloop_42 + tmp_qloop_44 + tmp_qloop_47 - 3.0*_data_q_p_0[q] - 3.0*_data_q_p_1[q] + 1.0; + const real_t tmp_qloop_52 = tmp_qloop_42*wx_dof_3 + tmp_qloop_45*wx_dof_1 + tmp_qloop_48*wx_dof_2 + tmp_qloop_49*wx_dof_4 + tmp_qloop_50*wx_dof_5 + tmp_qloop_51*wx_dof_0; + const real_t tmp_qloop_53 = tmp_qloop_42*wy_dof_3 + tmp_qloop_45*wy_dof_1 + tmp_qloop_48*wy_dof_2 + tmp_qloop_49*wy_dof_4 + tmp_qloop_50*wy_dof_5 + tmp_qloop_51*wy_dof_0; + const real_t tmp_qloop_81 = tmp_qloop_37 - 1.0; + const real_t tmp_qloop_82 = jac_affine_inv_0_0_GRAY*tmp_qloop_81; + const real_t tmp_qloop_83 = jac_affine_inv_0_1_GRAY*tmp_qloop_81; + const real_t tmp_qloop_87 = tmp_qloop_38 - 1.0; + const real_t tmp_qloop_88 = jac_affine_inv_1_0_GRAY*tmp_qloop_87; + const real_t tmp_qloop_89 = jac_affine_inv_1_1_GRAY*tmp_qloop_87; + const real_t tmp_qloop_93 = jac_affine_inv_1_0_GRAY*tmp_qloop_37; + const real_t tmp_qloop_94 = jac_affine_inv_0_0_GRAY*tmp_qloop_38; + const real_t tmp_qloop_95 = tmp_qloop_93 + tmp_qloop_94; + const real_t tmp_qloop_96 = jac_affine_inv_1_1_GRAY*tmp_qloop_37; + const real_t tmp_qloop_97 = jac_affine_inv_0_1_GRAY*tmp_qloop_38; + const real_t tmp_qloop_98 = tmp_qloop_96 + tmp_qloop_97; + const real_t tmp_qloop_106 = -tmp_qloop_37 - 8.0*_data_q_p_1[q] + 4.0; + const real_t tmp_qloop_107 = jac_affine_inv_1_0_GRAY*tmp_qloop_106 - tmp_qloop_94; + const real_t tmp_qloop_108 = jac_affine_inv_1_1_GRAY*tmp_qloop_106 - tmp_qloop_97; + const real_t tmp_qloop_117 = -tmp_qloop_38 - 8.0*_data_q_p_0[q] + 4.0; + const real_t tmp_qloop_118 = jac_affine_inv_0_0_GRAY*tmp_qloop_117 - tmp_qloop_93; + const real_t tmp_qloop_119 = jac_affine_inv_0_1_GRAY*tmp_qloop_117 - tmp_qloop_96; + const real_t jac_blending_0_0 = tmp_qloop_0*tmp_qloop_14 + tmp_qloop_18*tmp_qloop_4; + const real_t jac_blending_0_1 = -tmp_qloop_0*tmp_qloop_15*tmp_qloop_16*tmp_qloop_3 - tmp_qloop_0*tmp_qloop_19; + const real_t jac_blending_1_0 = tmp_qloop_14*tmp_qloop_3 - tmp_qloop_20*tmp_qloop_3; + const real_t jac_blending_1_1 = tmp_qloop_15*tmp_qloop_16*tmp_qloop_2*1.0 - tmp_qloop_19*tmp_qloop_3; + const real_t tmp_qloop_21 = jac_blending_0_0*jac_blending_1_1 - jac_blending_0_1*jac_blending_1_0; + const real_t tmp_qloop_22 = 1.0 / (tmp_qloop_21); + const real_t abs_det_jac_blending = tmp_qloop_21; + const real_t tmp_qloop_80 = abs_det_jac_affine_GRAY*abs_det_jac_blending*(diffusivity_times_delta_dof_0*tmp_qloop_51 + diffusivity_times_delta_dof_1*tmp_qloop_45 + diffusivity_times_delta_dof_2*tmp_qloop_48 + diffusivity_times_delta_dof_3*tmp_qloop_42 + diffusivity_times_delta_dof_4*tmp_qloop_49 + diffusivity_times_delta_dof_5*tmp_qloop_50)*_data_q_w[q]; + const real_t jac_blending_inv_0_0 = jac_blending_1_1*tmp_qloop_22; + const real_t jac_blending_inv_0_1 = -jac_blending_0_1*tmp_qloop_22; + const real_t jac_blending_inv_1_0 = -jac_blending_1_0*tmp_qloop_22; + const real_t jac_blending_inv_1_1 = jac_blending_0_0*tmp_qloop_22; + const real_t hessian_blending_0_0_0 = tmp_qloop_14 - tmp_qloop_25 + tmp_qloop_28; + const real_t hessian_blending_1_0_0 = -tmp_qloop_26*(tmp_qloop_3*tmp_qloop_3*tmp_qloop_3) + tmp_qloop_29*tmp_qloop_4 + tmp_qloop_3*tmp_qloop_32 - tmp_qloop_31; + const real_t hessian_blending_0_0_1 = tmp_qloop_1*tmp_qloop_11*tmp_qloop_15*tmp_qloop_30*-2.0 - tmp_qloop_34; + const real_t hessian_blending_1_0_1 = tmp_qloop_1*tmp_qloop_10*tmp_qloop_6*tmp_qloop_9*1.0 - tmp_qloop_20 - tmp_qloop_28 - tmp_qloop_35; + const real_t hessian_blending_0_1_0 = tmp_qloop_10*tmp_qloop_15*tmp_qloop_2*tmp_qloop_8*tmp_qloop_9*1.0 - tmp_qloop_19 - tmp_qloop_31 - tmp_qloop_34; + const real_t tmp_qloop_64 = -hessian_blending_0_0_0*jac_blending_inv_0_0 - hessian_blending_0_1_0*jac_blending_inv_1_0; + const real_t tmp_qloop_72 = -hessian_blending_0_0_0*jac_blending_inv_0_1 - hessian_blending_0_1_0*jac_blending_inv_1_1; + const real_t hessian_blending_1_1_0 = -tmp_qloop_20 + tmp_qloop_27 - tmp_qloop_35 + tmp_qloop_36; + const real_t tmp_qloop_68 = -hessian_blending_1_0_0*jac_blending_inv_0_0 - hessian_blending_1_1_0*jac_blending_inv_1_0; + const real_t tmp_qloop_76 = -hessian_blending_1_0_0*jac_blending_inv_0_1 - hessian_blending_1_1_0*jac_blending_inv_1_1; + const real_t hessian_blending_0_1_1 = tmp_qloop_0*tmp_qloop_32 - (tmp_qloop_0*tmp_qloop_0*tmp_qloop_0)*tmp_qloop_26 + tmp_qloop_25 + tmp_qloop_36; + const real_t tmp_qloop_65 = -hessian_blending_0_0_1*jac_blending_inv_0_0 - hessian_blending_0_1_1*jac_blending_inv_1_0; + const real_t tmp_qloop_66 = jac_blending_inv_0_0*tmp_qloop_64 + jac_blending_inv_0_1*tmp_qloop_65; + const real_t tmp_qloop_67 = jac_blending_inv_1_0*tmp_qloop_64 + jac_blending_inv_1_1*tmp_qloop_65; + const real_t tmp_qloop_73 = -hessian_blending_0_0_1*jac_blending_inv_0_1 - hessian_blending_0_1_1*jac_blending_inv_1_1; + const real_t tmp_qloop_74 = jac_blending_inv_0_0*tmp_qloop_72 + jac_blending_inv_0_1*tmp_qloop_73; + const real_t tmp_qloop_75 = jac_blending_inv_1_0*tmp_qloop_72 + jac_blending_inv_1_1*tmp_qloop_73; + const real_t hessian_blending_1_1_1 = tmp_qloop_10*tmp_qloop_15*tmp_qloop_2*tmp_qloop_7*tmp_qloop_9*1.0 + tmp_qloop_10*tmp_qloop_15*tmp_qloop_4*tmp_qloop_8*tmp_qloop_9*1.0 - tmp_qloop_19 - tmp_qloop_33; + const real_t tmp_qloop_69 = -hessian_blending_1_0_1*jac_blending_inv_0_0 - hessian_blending_1_1_1*jac_blending_inv_1_0; + const real_t tmp_qloop_70 = jac_blending_inv_0_0*tmp_qloop_68 + jac_blending_inv_0_1*tmp_qloop_69; + const real_t tmp_qloop_71 = jac_blending_inv_1_0*tmp_qloop_68 + jac_blending_inv_1_1*tmp_qloop_69; + const real_t tmp_qloop_77 = -hessian_blending_1_0_1*jac_blending_inv_0_1 - hessian_blending_1_1_1*jac_blending_inv_1_1; + const real_t tmp_qloop_78 = jac_blending_inv_0_0*tmp_qloop_76 + jac_blending_inv_0_1*tmp_qloop_77; + const real_t tmp_qloop_79 = jac_blending_inv_1_0*tmp_qloop_76 + jac_blending_inv_1_1*tmp_qloop_77; + const real_t q_tmp_0_0 = tmp_qloop_80*(tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_40 + jac_blending_inv_1_0*tmp_qloop_41) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_40 + jac_blending_inv_1_1*tmp_qloop_41))*(jac_blending_inv_0_0*(jac_blending_inv_0_0*tmp_qloop_57 + jac_blending_inv_1_0*tmp_qloop_61) + jac_blending_inv_0_0*(tmp_qloop_40*tmp_qloop_66 + tmp_qloop_41*tmp_qloop_67) + jac_blending_inv_0_1*(jac_blending_inv_0_1*tmp_qloop_57 + jac_blending_inv_1_1*tmp_qloop_61) + jac_blending_inv_0_1*(tmp_qloop_40*tmp_qloop_70 + tmp_qloop_41*tmp_qloop_71) + jac_blending_inv_1_0*(jac_blending_inv_0_0*tmp_qloop_62 + jac_blending_inv_1_0*tmp_qloop_63) + jac_blending_inv_1_0*(tmp_qloop_40*tmp_qloop_74 + tmp_qloop_41*tmp_qloop_75) + jac_blending_inv_1_1*(jac_blending_inv_0_1*tmp_qloop_62 + jac_blending_inv_1_1*tmp_qloop_63) + jac_blending_inv_1_1*(tmp_qloop_40*tmp_qloop_78 + tmp_qloop_41*tmp_qloop_79)); + const real_t q_tmp_1_1 = tmp_qloop_80*(tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_82 + jac_blending_inv_1_0*tmp_qloop_83) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_82 + jac_blending_inv_1_1*tmp_qloop_83))*(jac_blending_inv_0_0*(jac_blending_inv_0_0*tmp_qloop_85 + jac_blending_inv_1_0*tmp_qloop_84) + jac_blending_inv_0_0*(tmp_qloop_66*tmp_qloop_82 + tmp_qloop_67*tmp_qloop_83) + jac_blending_inv_0_1*(jac_blending_inv_0_1*tmp_qloop_85 + jac_blending_inv_1_1*tmp_qloop_84) + jac_blending_inv_0_1*(tmp_qloop_70*tmp_qloop_82 + tmp_qloop_71*tmp_qloop_83) + jac_blending_inv_1_0*(jac_blending_inv_0_0*tmp_qloop_84 + jac_blending_inv_1_0*tmp_qloop_86) + jac_blending_inv_1_0*(tmp_qloop_74*tmp_qloop_82 + tmp_qloop_75*tmp_qloop_83) + jac_blending_inv_1_1*(jac_blending_inv_0_1*tmp_qloop_84 + jac_blending_inv_1_1*tmp_qloop_86) + jac_blending_inv_1_1*(tmp_qloop_78*tmp_qloop_82 + tmp_qloop_79*tmp_qloop_83)); + const real_t q_tmp_2_2 = tmp_qloop_80*(tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_88 + jac_blending_inv_1_0*tmp_qloop_89) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_88 + jac_blending_inv_1_1*tmp_qloop_89))*(jac_blending_inv_0_0*(jac_blending_inv_0_0*tmp_qloop_91 + jac_blending_inv_1_0*tmp_qloop_90) + jac_blending_inv_0_0*(tmp_qloop_66*tmp_qloop_88 + tmp_qloop_67*tmp_qloop_89) + jac_blending_inv_0_1*(jac_blending_inv_0_1*tmp_qloop_91 + jac_blending_inv_1_1*tmp_qloop_90) + jac_blending_inv_0_1*(tmp_qloop_70*tmp_qloop_88 + tmp_qloop_71*tmp_qloop_89) + jac_blending_inv_1_0*(jac_blending_inv_0_0*tmp_qloop_90 + jac_blending_inv_1_0*tmp_qloop_92) + jac_blending_inv_1_0*(tmp_qloop_74*tmp_qloop_88 + tmp_qloop_75*tmp_qloop_89) + jac_blending_inv_1_1*(jac_blending_inv_0_1*tmp_qloop_90 + jac_blending_inv_1_1*tmp_qloop_92) + jac_blending_inv_1_1*(tmp_qloop_78*tmp_qloop_88 + tmp_qloop_79*tmp_qloop_89)); + const real_t q_tmp_3_3 = tmp_qloop_80*(tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_95 + jac_blending_inv_1_0*tmp_qloop_98) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_95 + jac_blending_inv_1_1*tmp_qloop_98))*(jac_blending_inv_0_0*(jac_blending_inv_0_0*tmp_qloop_100 + jac_blending_inv_1_0*tmp_qloop_103) + jac_blending_inv_0_0*(tmp_qloop_66*tmp_qloop_95 + tmp_qloop_67*tmp_qloop_98) + jac_blending_inv_0_1*(jac_blending_inv_0_1*tmp_qloop_100 + jac_blending_inv_1_1*tmp_qloop_103) + jac_blending_inv_0_1*(tmp_qloop_70*tmp_qloop_95 + tmp_qloop_71*tmp_qloop_98) + jac_blending_inv_1_0*(jac_blending_inv_0_0*tmp_qloop_103 + jac_blending_inv_1_0*tmp_qloop_105) + jac_blending_inv_1_0*(tmp_qloop_74*tmp_qloop_95 + tmp_qloop_75*tmp_qloop_98) + jac_blending_inv_1_1*(jac_blending_inv_0_1*tmp_qloop_103 + jac_blending_inv_1_1*tmp_qloop_105) + jac_blending_inv_1_1*(tmp_qloop_78*tmp_qloop_95 + tmp_qloop_79*tmp_qloop_98)); + const real_t q_tmp_4_4 = tmp_qloop_80*(tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_107 + jac_blending_inv_1_0*tmp_qloop_108) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_107 + jac_blending_inv_1_1*tmp_qloop_108))*(jac_blending_inv_0_0*(jac_blending_inv_0_0*tmp_qloop_111 + jac_blending_inv_1_0*tmp_qloop_113) + jac_blending_inv_0_0*(tmp_qloop_107*tmp_qloop_66 + tmp_qloop_108*tmp_qloop_67) + jac_blending_inv_0_1*(jac_blending_inv_0_1*tmp_qloop_111 + jac_blending_inv_1_1*tmp_qloop_113) + jac_blending_inv_0_1*(tmp_qloop_107*tmp_qloop_70 + tmp_qloop_108*tmp_qloop_71) + jac_blending_inv_1_0*(jac_blending_inv_0_0*tmp_qloop_114 + jac_blending_inv_1_0*tmp_qloop_116) + jac_blending_inv_1_0*(tmp_qloop_107*tmp_qloop_74 + tmp_qloop_108*tmp_qloop_75) + jac_blending_inv_1_1*(jac_blending_inv_0_1*tmp_qloop_114 + jac_blending_inv_1_1*tmp_qloop_116) + jac_blending_inv_1_1*(tmp_qloop_107*tmp_qloop_78 + tmp_qloop_108*tmp_qloop_79)); + const real_t q_tmp_5_5 = tmp_qloop_80*(tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_118 + jac_blending_inv_1_0*tmp_qloop_119) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_118 + jac_blending_inv_1_1*tmp_qloop_119))*(jac_blending_inv_0_0*(jac_blending_inv_0_0*tmp_qloop_121 + jac_blending_inv_1_0*tmp_qloop_123) + jac_blending_inv_0_0*(tmp_qloop_118*tmp_qloop_66 + tmp_qloop_119*tmp_qloop_67) + jac_blending_inv_0_1*(jac_blending_inv_0_1*tmp_qloop_121 + jac_blending_inv_1_1*tmp_qloop_123) + jac_blending_inv_0_1*(tmp_qloop_118*tmp_qloop_70 + tmp_qloop_119*tmp_qloop_71) + jac_blending_inv_1_0*(jac_blending_inv_0_0*tmp_qloop_124 + jac_blending_inv_1_0*tmp_qloop_125) + jac_blending_inv_1_0*(tmp_qloop_118*tmp_qloop_74 + tmp_qloop_119*tmp_qloop_75) + jac_blending_inv_1_1*(jac_blending_inv_0_1*tmp_qloop_124 + jac_blending_inv_1_1*tmp_qloop_125) + jac_blending_inv_1_1*(tmp_qloop_118*tmp_qloop_78 + tmp_qloop_119*tmp_qloop_79)); + q_acc_0_0 = q_acc_0_0 + q_tmp_0_0; + q_acc_1_1 = q_acc_1_1 + q_tmp_1_1; + q_acc_2_2 = q_acc_2_2 + q_tmp_2_2; + q_acc_3_3 = q_acc_3_3 + q_tmp_3_3; + q_acc_4_4 = q_acc_4_4 + q_tmp_4_4; + q_acc_5_5 = q_acc_5_5 + q_tmp_5_5; + } + const real_t elMatDiag_0 = q_acc_0_0; + const real_t elMatDiag_1 = q_acc_1_1; + const real_t elMatDiag_2 = q_acc_2_2; + const real_t elMatDiag_3 = q_acc_3_3; + const real_t elMatDiag_4 = q_acc_4_4; + const real_t elMatDiag_5 = q_acc_5_5; + _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))] = elMatDiag_0 + _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]; + _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1] = elMatDiag_1 + _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatDiag_2 + _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))] = elMatDiag_3 + _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))] = elMatDiag_4 + _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))] = elMatDiag_5 + _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]; + } + } + } + const real_t tmp_coords_jac_0_BLUE = 1.0 / (micro_edges_per_macro_edge_float)*1.0; + const real_t tmp_coords_jac_1_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0); + const real_t tmp_coords_jac_2_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1); + const real_t tmp_coords_jac_3_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0); + const real_t tmp_coords_jac_4_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1); + const real_t p_affine_const_0_0_BLUE = tmp_coords_jac_1_BLUE; + const real_t p_affine_const_0_1_BLUE = tmp_coords_jac_2_BLUE; + const real_t p_affine_const_1_0_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_3_BLUE; + const real_t p_affine_const_1_1_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_4_BLUE; + const real_t p_affine_const_2_0_BLUE = tmp_coords_jac_1_BLUE + tmp_coords_jac_3_BLUE; + const real_t p_affine_const_2_1_BLUE = tmp_coords_jac_2_BLUE + tmp_coords_jac_4_BLUE; + const real_t jac_affine_0_0_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_1_0_BLUE; + const real_t jac_affine_0_1_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_2_0_BLUE; + const real_t jac_affine_1_0_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_1_1_BLUE; + const real_t jac_affine_1_1_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_2_1_BLUE; + const real_t tmp_coords_jac_5_BLUE = jac_affine_0_0_BLUE*jac_affine_1_1_BLUE - jac_affine_0_1_BLUE*jac_affine_1_0_BLUE; + const real_t tmp_coords_jac_6_BLUE = 1.0 / (tmp_coords_jac_5_BLUE); + const real_t jac_affine_inv_0_0_BLUE = jac_affine_1_1_BLUE*tmp_coords_jac_6_BLUE; + const real_t jac_affine_inv_0_1_BLUE = -jac_affine_0_1_BLUE*tmp_coords_jac_6_BLUE; + const real_t jac_affine_inv_1_0_BLUE = -jac_affine_1_0_BLUE*tmp_coords_jac_6_BLUE; + const real_t jac_affine_inv_1_1_BLUE = jac_affine_0_0_BLUE*tmp_coords_jac_6_BLUE; + const real_t abs_det_jac_affine_BLUE = abs(tmp_coords_jac_5_BLUE); + const real_t tmp_moved_constant_0 = jac_affine_inv_0_0_BLUE*4.0; + const real_t tmp_moved_constant_1 = jac_affine_inv_1_0_BLUE*4.0; + const real_t tmp_moved_constant_2 = tmp_moved_constant_0 + tmp_moved_constant_1; + const real_t tmp_moved_constant_3 = jac_affine_inv_0_0_BLUE*tmp_moved_constant_2 + jac_affine_inv_1_0_BLUE*tmp_moved_constant_2; + const real_t tmp_moved_constant_4 = jac_affine_inv_0_1_BLUE*4.0; + const real_t tmp_moved_constant_5 = jac_affine_inv_1_1_BLUE*4.0; + const real_t tmp_moved_constant_6 = tmp_moved_constant_4 + tmp_moved_constant_5; + const real_t tmp_moved_constant_7 = jac_affine_inv_0_0_BLUE*tmp_moved_constant_6 + jac_affine_inv_1_0_BLUE*tmp_moved_constant_6; + const real_t tmp_moved_constant_8 = jac_affine_inv_0_1_BLUE*tmp_moved_constant_2 + jac_affine_inv_1_1_BLUE*tmp_moved_constant_2; + const real_t tmp_moved_constant_9 = jac_affine_inv_0_1_BLUE*tmp_moved_constant_6 + jac_affine_inv_1_1_BLUE*tmp_moved_constant_6; + const real_t tmp_moved_constant_10 = jac_affine_inv_0_1_BLUE*tmp_moved_constant_0; + const real_t tmp_moved_constant_11 = (jac_affine_inv_0_0_BLUE*jac_affine_inv_0_0_BLUE)*4.0; + const real_t tmp_moved_constant_12 = (jac_affine_inv_0_1_BLUE*jac_affine_inv_0_1_BLUE)*4.0; + const real_t tmp_moved_constant_13 = jac_affine_inv_1_1_BLUE*tmp_moved_constant_1; + const real_t tmp_moved_constant_14 = (jac_affine_inv_1_0_BLUE*jac_affine_inv_1_0_BLUE)*4.0; + const real_t tmp_moved_constant_15 = (jac_affine_inv_1_1_BLUE*jac_affine_inv_1_1_BLUE)*4.0; + const real_t tmp_moved_constant_16 = jac_affine_inv_1_0_BLUE*8.0; + const real_t tmp_moved_constant_17 = jac_affine_inv_0_0_BLUE*tmp_moved_constant_16; + const real_t tmp_moved_constant_18 = jac_affine_inv_1_1_BLUE*tmp_moved_constant_0; + const real_t tmp_moved_constant_19 = jac_affine_inv_0_1_BLUE*tmp_moved_constant_1; + const real_t tmp_moved_constant_20 = tmp_moved_constant_18 + tmp_moved_constant_19; + const real_t tmp_moved_constant_21 = jac_affine_inv_1_1_BLUE*8.0; + const real_t tmp_moved_constant_22 = jac_affine_inv_0_1_BLUE*tmp_moved_constant_21; + const real_t tmp_moved_constant_23 = jac_affine_inv_1_0_BLUE*tmp_moved_constant_0; + const real_t tmp_moved_constant_24 = -tmp_moved_constant_0 - tmp_moved_constant_16; + const real_t tmp_moved_constant_25 = jac_affine_inv_1_0_BLUE*tmp_moved_constant_24 - tmp_moved_constant_23; + const real_t tmp_moved_constant_26 = -tmp_moved_constant_21 - tmp_moved_constant_4; + const real_t tmp_moved_constant_27 = jac_affine_inv_1_0_BLUE*tmp_moved_constant_26 - tmp_moved_constant_18; + const real_t tmp_moved_constant_28 = jac_affine_inv_1_1_BLUE*tmp_moved_constant_24 - tmp_moved_constant_19; + const real_t tmp_moved_constant_29 = jac_affine_inv_1_1_BLUE*tmp_moved_constant_4; + const real_t tmp_moved_constant_30 = jac_affine_inv_1_1_BLUE*tmp_moved_constant_26 - tmp_moved_constant_29; + const real_t tmp_moved_constant_31 = jac_affine_inv_0_0_BLUE*-8.0 - tmp_moved_constant_1; + const real_t tmp_moved_constant_32 = jac_affine_inv_0_0_BLUE*tmp_moved_constant_31 - tmp_moved_constant_23; + const real_t tmp_moved_constant_33 = jac_affine_inv_0_1_BLUE*-8.0 - tmp_moved_constant_5; + const real_t tmp_moved_constant_34 = jac_affine_inv_0_0_BLUE*tmp_moved_constant_33 - tmp_moved_constant_19; + const real_t tmp_moved_constant_35 = jac_affine_inv_0_1_BLUE*tmp_moved_constant_31 - tmp_moved_constant_18; + const real_t tmp_moved_constant_36 = jac_affine_inv_0_1_BLUE*tmp_moved_constant_33 - tmp_moved_constant_29; + { + /* FaceType.BLUE */ + for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1) + { + for (int64_t ctr_0 = 0; ctr_0 < (int64_t)((-ctr_1 + micro_edges_per_macro_edge - 1) / (4)) * (4); ctr_0 += 4) + { + + const int64_t phantom_ctr_0 = ctr_0; + real_t _data_float_loop_ctr_array_dim_0[4]; + _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0; + _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1; + _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2; + _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3; + real_t _data_float_loop_ctr_array_dim_1[4]; + _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1; + + const __m256d p_affine_0_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)); + const __m256d p_affine_0_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)); + const __m256d p_affine_1_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)); + const __m256d p_affine_1_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)); + const __m256d p_affine_2_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)); + const __m256d p_affine_2_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)); + const __m256d diffusivity_times_delta_dof_0 = _mm256_loadu_pd(& _data_diffusivity_times_deltaVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]); + const __m256d diffusivity_times_delta_dof_1 = _mm256_loadu_pd(& _data_diffusivity_times_deltaVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]); + const __m256d diffusivity_times_delta_dof_2 = _mm256_loadu_pd(& _data_diffusivity_times_deltaVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]); + const __m256d diffusivity_times_delta_dof_3 = _mm256_loadu_pd(& _data_diffusivity_times_deltaEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]); + const __m256d diffusivity_times_delta_dof_4 = _mm256_loadu_pd(& _data_diffusivity_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]); + const __m256d diffusivity_times_delta_dof_5 = _mm256_loadu_pd(& _data_diffusivity_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]); + const __m256d wx_dof_0 = _mm256_loadu_pd(& _data_wxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]); + const __m256d wx_dof_1 = _mm256_loadu_pd(& _data_wxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]); + const __m256d wx_dof_2 = _mm256_loadu_pd(& _data_wxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]); + const __m256d wx_dof_3 = _mm256_loadu_pd(& _data_wxEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]); + const __m256d wx_dof_4 = _mm256_loadu_pd(& _data_wxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]); + const __m256d wx_dof_5 = _mm256_loadu_pd(& _data_wxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]); + const __m256d wy_dof_0 = _mm256_loadu_pd(& _data_wyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]); + const __m256d wy_dof_1 = _mm256_loadu_pd(& _data_wyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]); + const __m256d wy_dof_2 = _mm256_loadu_pd(& _data_wyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]); + const __m256d wy_dof_3 = _mm256_loadu_pd(& _data_wyEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]); + const __m256d wy_dof_4 = _mm256_loadu_pd(& _data_wyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]); + const __m256d wy_dof_5 = _mm256_loadu_pd(& _data_wyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]); + __m256d q_acc_0_0 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_1 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_2 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_3 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_4_4 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_5_5 = _mm256_set_pd(0.0,0.0,0.0,0.0); + for (int64_t q = 0; q < 4; q += 1) + { + const __m256d tmp_qloop_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_1_0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_2_0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),p_affine_0_0); + const __m256d tmp_qloop_2 = _mm256_mul_pd(tmp_qloop_0,tmp_qloop_0); + const __m256d tmp_qloop_3 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_1_1),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(p_affine_0_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),p_affine_2_1),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),p_affine_0_1); + const __m256d tmp_qloop_4 = _mm256_mul_pd(tmp_qloop_3,tmp_qloop_3); + const __m256d tmp_qloop_5 = _mm256_add_pd(tmp_qloop_2,tmp_qloop_4); + const __m256d tmp_qloop_6 = _mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_sqrt_pd(tmp_qloop_5)); + const __m256d tmp_qloop_13 = _mm256_mul_pd(tmp_qloop_6,_mm256_set_pd(tmp_qloop_12,tmp_qloop_12,tmp_qloop_12,tmp_qloop_12)); + const __m256d tmp_qloop_14 = _mm256_mul_pd(tmp_qloop_13,_mm256_set_pd(tmp_qloop_1,tmp_qloop_1,tmp_qloop_1,tmp_qloop_1)); + const __m256d tmp_qloop_15 = _mm256_div_pd(_mm256_sqrt_pd(tmp_qloop_5),_mm256_mul_pd(tmp_qloop_5,tmp_qloop_5)); + const __m256d tmp_qloop_16 = _mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(rayVertex_0,rayVertex_0,rayVertex_0,rayVertex_0)),tmp_qloop_0),_mm256_set_pd(tmp_qloop_1,tmp_qloop_1,tmp_qloop_1,tmp_qloop_1)),_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(rayVertex_1,rayVertex_1,rayVertex_1,rayVertex_1)),tmp_qloop_3),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_set_pd(tmp_qloop_8,tmp_qloop_8,tmp_qloop_8,tmp_qloop_8))),_mm256_set_pd(tmp_qloop_11,tmp_qloop_11,tmp_qloop_11,tmp_qloop_11)),_mm256_set_pd(radRayVertex,radRayVertex,radRayVertex,radRayVertex)); + const __m256d tmp_qloop_17 = _mm256_mul_pd(tmp_qloop_15,tmp_qloop_16); + const __m256d tmp_qloop_18 = _mm256_mul_pd(tmp_qloop_17,_mm256_set_pd(1.0,1.0,1.0,1.0)); + const __m256d tmp_qloop_19 = _mm256_mul_pd(tmp_qloop_13,_mm256_set_pd(tmp_qloop_8,tmp_qloop_8,tmp_qloop_8,tmp_qloop_8)); + const __m256d tmp_qloop_20 = _mm256_mul_pd(tmp_qloop_0,tmp_qloop_18); + const __m256d tmp_qloop_23 = _mm256_mul_pd(tmp_qloop_15,_mm256_set_pd(tmp_qloop_12,tmp_qloop_12,tmp_qloop_12,tmp_qloop_12)); + const __m256d tmp_qloop_24 = _mm256_mul_pd(tmp_qloop_23,_mm256_set_pd(tmp_qloop_1,tmp_qloop_1,tmp_qloop_1,tmp_qloop_1)); + const __m256d tmp_qloop_25 = _mm256_mul_pd(tmp_qloop_2,tmp_qloop_24); + const __m256d tmp_qloop_26 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_16,_mm256_div_pd(_mm256_sqrt_pd(tmp_qloop_5),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_5,tmp_qloop_5),tmp_qloop_5))),_mm256_set_pd(3.0,3.0,3.0,3.0)); + const __m256d tmp_qloop_27 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_0,tmp_qloop_26),tmp_qloop_4); + const __m256d tmp_qloop_28 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_27,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_24,tmp_qloop_4)); + const __m256d tmp_qloop_29 = _mm256_mul_pd(tmp_qloop_23,_mm256_set_pd(tmp_qloop_7,tmp_qloop_7,tmp_qloop_7,tmp_qloop_7)); + const __m256d tmp_qloop_30 = _mm256_mul_pd(tmp_qloop_0,tmp_qloop_3); + const __m256d tmp_qloop_31 = _mm256_mul_pd(tmp_qloop_24,tmp_qloop_30); + const __m256d tmp_qloop_32 = _mm256_mul_pd(tmp_qloop_17,_mm256_set_pd(2.0,2.0,2.0,2.0)); + const __m256d tmp_qloop_33 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_2,tmp_qloop_26),tmp_qloop_3); + const __m256d tmp_qloop_34 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_33,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_18,tmp_qloop_3)); + const __m256d tmp_qloop_35 = _mm256_mul_pd(tmp_qloop_29,tmp_qloop_30); + const __m256d tmp_qloop_36 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_23,tmp_qloop_30),_mm256_set_pd(tmp_qloop_8,tmp_qloop_8,tmp_qloop_8,tmp_qloop_8)); + const __m256d tmp_qloop_37 = _mm256_mul_pd(_mm256_set_pd(4.0,4.0,4.0,4.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])); + const __m256d tmp_qloop_38 = _mm256_mul_pd(_mm256_set_pd(4.0,4.0,4.0,4.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])); + const __m256d tmp_qloop_39 = _mm256_add_pd(_mm256_add_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),tmp_qloop_37),tmp_qloop_38); + const __m256d tmp_qloop_40 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_39,_mm256_set_pd(jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE)),_mm256_mul_pd(tmp_qloop_39,_mm256_set_pd(jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE))); + const __m256d tmp_qloop_41 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_39,_mm256_set_pd(jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE)),_mm256_mul_pd(tmp_qloop_39,_mm256_set_pd(jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE))); + const __m256d tmp_qloop_42 = _mm256_mul_pd(tmp_qloop_37,_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])); + const __m256d tmp_qloop_43 = _mm256_mul_pd(_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])); + const __m256d tmp_qloop_44 = _mm256_mul_pd(tmp_qloop_43,_mm256_set_pd(2.0,2.0,2.0,2.0)); + const __m256d tmp_qloop_45 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),tmp_qloop_44); + const __m256d tmp_qloop_46 = _mm256_mul_pd(_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])); + const __m256d tmp_qloop_47 = _mm256_mul_pd(tmp_qloop_46,_mm256_set_pd(2.0,2.0,2.0,2.0)); + const __m256d tmp_qloop_48 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])),tmp_qloop_47); + const __m256d tmp_qloop_49 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_46,_mm256_set_pd(-4.0,-4.0,-4.0,-4.0)),_mm256_mul_pd(tmp_qloop_42,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_38); + const __m256d tmp_qloop_50 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_43,_mm256_set_pd(-4.0,-4.0,-4.0,-4.0)),_mm256_mul_pd(tmp_qloop_42,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_37); + const __m256d tmp_qloop_51 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),tmp_qloop_42),tmp_qloop_44),tmp_qloop_47); + const __m256d tmp_qloop_52 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_42,wx_dof_3),_mm256_mul_pd(tmp_qloop_45,wx_dof_1)),_mm256_mul_pd(tmp_qloop_48,wx_dof_2)),_mm256_mul_pd(tmp_qloop_49,wx_dof_4)),_mm256_mul_pd(tmp_qloop_50,wx_dof_5)),_mm256_mul_pd(tmp_qloop_51,wx_dof_0)); + const __m256d tmp_qloop_53 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_42,wy_dof_3),_mm256_mul_pd(tmp_qloop_45,wy_dof_1)),_mm256_mul_pd(tmp_qloop_48,wy_dof_2)),_mm256_mul_pd(tmp_qloop_49,wy_dof_4)),_mm256_mul_pd(tmp_qloop_50,wy_dof_5)),_mm256_mul_pd(tmp_qloop_51,wy_dof_0)); + const __m256d tmp_qloop_81 = _mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),tmp_qloop_37); + const __m256d tmp_qloop_82 = _mm256_mul_pd(tmp_qloop_81,_mm256_set_pd(jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE)); + const __m256d tmp_qloop_83 = _mm256_mul_pd(tmp_qloop_81,_mm256_set_pd(jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE)); + const __m256d tmp_qloop_87 = _mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),tmp_qloop_38); + const __m256d tmp_qloop_88 = _mm256_mul_pd(tmp_qloop_87,_mm256_set_pd(jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE)); + const __m256d tmp_qloop_89 = _mm256_mul_pd(tmp_qloop_87,_mm256_set_pd(jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE)); + const __m256d tmp_qloop_93 = _mm256_mul_pd(tmp_qloop_37,_mm256_set_pd(jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE)); + const __m256d tmp_qloop_94 = _mm256_mul_pd(tmp_qloop_38,_mm256_set_pd(jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE)); + const __m256d tmp_qloop_95 = _mm256_add_pd(tmp_qloop_93,tmp_qloop_94); + const __m256d tmp_qloop_96 = _mm256_mul_pd(tmp_qloop_37,_mm256_set_pd(jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE)); + const __m256d tmp_qloop_97 = _mm256_mul_pd(tmp_qloop_38,_mm256_set_pd(jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE)); + const __m256d tmp_qloop_98 = _mm256_add_pd(tmp_qloop_96,tmp_qloop_97); + const __m256d tmp_qloop_106 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-8.0,-8.0,-8.0,-8.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])),_mm256_mul_pd(tmp_qloop_37,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_set_pd(4.0,4.0,4.0,4.0)); + const __m256d tmp_qloop_107 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_94,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_106,_mm256_set_pd(jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE))); + const __m256d tmp_qloop_108 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_97,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_106,_mm256_set_pd(jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE))); + const __m256d tmp_qloop_117 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-8.0,-8.0,-8.0,-8.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(tmp_qloop_38,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_set_pd(4.0,4.0,4.0,4.0)); + const __m256d tmp_qloop_118 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_93,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_117,_mm256_set_pd(jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE))); + const __m256d tmp_qloop_119 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_96,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_117,_mm256_set_pd(jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE))); + const __m256d jac_blending_0_0 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_0,tmp_qloop_14),_mm256_mul_pd(tmp_qloop_18,tmp_qloop_4)); + const __m256d jac_blending_0_1 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_0,tmp_qloop_19),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_0,tmp_qloop_15),tmp_qloop_16),tmp_qloop_3),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))); + const __m256d jac_blending_1_0 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_14,tmp_qloop_3),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_20,tmp_qloop_3),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))); + const __m256d jac_blending_1_1 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_19,tmp_qloop_3),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_15,tmp_qloop_16),tmp_qloop_2),_mm256_set_pd(1.0,1.0,1.0,1.0))); + const __m256d tmp_qloop_21 = _mm256_add_pd(_mm256_mul_pd(jac_blending_0_0,jac_blending_1_1),_mm256_mul_pd(_mm256_mul_pd(jac_blending_0_1,jac_blending_1_0),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))); + const __m256d tmp_qloop_22 = _mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),tmp_qloop_21); + const __m256d abs_det_jac_blending = tmp_qloop_21; + const __m256d tmp_qloop_80 = _mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(abs_det_jac_blending,_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(diffusivity_times_delta_dof_0,tmp_qloop_51),_mm256_mul_pd(diffusivity_times_delta_dof_1,tmp_qloop_45)),_mm256_mul_pd(diffusivity_times_delta_dof_2,tmp_qloop_48)),_mm256_mul_pd(diffusivity_times_delta_dof_3,tmp_qloop_42)),_mm256_mul_pd(diffusivity_times_delta_dof_4,tmp_qloop_49)),_mm256_mul_pd(diffusivity_times_delta_dof_5,tmp_qloop_50))),_mm256_set_pd(_data_q_w[q],_data_q_w[q],_data_q_w[q],_data_q_w[q])),_mm256_set_pd(abs_det_jac_affine_BLUE,abs_det_jac_affine_BLUE,abs_det_jac_affine_BLUE,abs_det_jac_affine_BLUE)); + const __m256d jac_blending_inv_0_0 = _mm256_mul_pd(jac_blending_1_1,tmp_qloop_22); + const __m256d jac_blending_inv_0_1 = _mm256_mul_pd(_mm256_mul_pd(jac_blending_0_1,tmp_qloop_22),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)); + const __m256d jac_blending_inv_1_0 = _mm256_mul_pd(_mm256_mul_pd(jac_blending_1_0,tmp_qloop_22),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)); + const __m256d jac_blending_inv_1_1 = _mm256_mul_pd(jac_blending_0_0,tmp_qloop_22); + const __m256d hessian_blending_0_0_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_25,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),tmp_qloop_14),tmp_qloop_28); + const __m256d hessian_blending_1_0_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_31,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_29,tmp_qloop_4)),_mm256_mul_pd(tmp_qloop_3,tmp_qloop_32)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_26,_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_3,tmp_qloop_3),tmp_qloop_3)),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))); + const __m256d hessian_blending_0_0_1 = _mm256_add_pd(_mm256_mul_pd(tmp_qloop_34,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_15,tmp_qloop_30),_mm256_set_pd(-2.0,-2.0,-2.0,-2.0)),_mm256_set_pd(tmp_qloop_1,tmp_qloop_1,tmp_qloop_1,tmp_qloop_1)),_mm256_set_pd(tmp_qloop_11,tmp_qloop_11,tmp_qloop_11,tmp_qloop_11))); + const __m256d hessian_blending_1_0_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_20,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_28,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(tmp_qloop_35,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_6,_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_1,tmp_qloop_1,tmp_qloop_1,tmp_qloop_1)),_mm256_set_pd(tmp_qloop_10,tmp_qloop_10,tmp_qloop_10,tmp_qloop_10)),_mm256_set_pd(tmp_qloop_9,tmp_qloop_9,tmp_qloop_9,tmp_qloop_9))); + const __m256d hessian_blending_0_1_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_19,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_31,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(tmp_qloop_34,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_15,tmp_qloop_2),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_10,tmp_qloop_10,tmp_qloop_10,tmp_qloop_10)),_mm256_set_pd(tmp_qloop_8,tmp_qloop_8,tmp_qloop_8,tmp_qloop_8)),_mm256_set_pd(tmp_qloop_9,tmp_qloop_9,tmp_qloop_9,tmp_qloop_9))); + const __m256d tmp_qloop_64 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(hessian_blending_0_0_0,jac_blending_inv_0_0),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(hessian_blending_0_1_0,jac_blending_inv_1_0),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))); + const __m256d tmp_qloop_72 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(hessian_blending_0_0_0,jac_blending_inv_0_1),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(hessian_blending_0_1_0,jac_blending_inv_1_1),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))); + const __m256d hessian_blending_1_1_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_20,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_35,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_27),tmp_qloop_36); + const __m256d tmp_qloop_68 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(hessian_blending_1_0_0,jac_blending_inv_0_0),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(hessian_blending_1_1_0,jac_blending_inv_1_0),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))); + const __m256d tmp_qloop_76 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(hessian_blending_1_0_0,jac_blending_inv_0_1),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(hessian_blending_1_1_0,jac_blending_inv_1_1),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))); + const __m256d hessian_blending_0_1_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_0,tmp_qloop_32),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_0,tmp_qloop_0),tmp_qloop_0),tmp_qloop_26),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_25),tmp_qloop_36); + const __m256d tmp_qloop_65 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(hessian_blending_0_0_1,jac_blending_inv_0_0),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(hessian_blending_0_1_1,jac_blending_inv_1_0),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))); + const __m256d tmp_qloop_66 = _mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,tmp_qloop_64),_mm256_mul_pd(jac_blending_inv_0_1,tmp_qloop_65)); + const __m256d tmp_qloop_67 = _mm256_add_pd(_mm256_mul_pd(jac_blending_inv_1_0,tmp_qloop_64),_mm256_mul_pd(jac_blending_inv_1_1,tmp_qloop_65)); + const __m256d tmp_qloop_73 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(hessian_blending_0_0_1,jac_blending_inv_0_1),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(hessian_blending_0_1_1,jac_blending_inv_1_1),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))); + const __m256d tmp_qloop_74 = _mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,tmp_qloop_72),_mm256_mul_pd(jac_blending_inv_0_1,tmp_qloop_73)); + const __m256d tmp_qloop_75 = _mm256_add_pd(_mm256_mul_pd(jac_blending_inv_1_0,tmp_qloop_72),_mm256_mul_pd(jac_blending_inv_1_1,tmp_qloop_73)); + const __m256d hessian_blending_1_1_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_19,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_33,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_15,tmp_qloop_2),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_10,tmp_qloop_10,tmp_qloop_10,tmp_qloop_10)),_mm256_set_pd(tmp_qloop_7,tmp_qloop_7,tmp_qloop_7,tmp_qloop_7)),_mm256_set_pd(tmp_qloop_9,tmp_qloop_9,tmp_qloop_9,tmp_qloop_9))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_15,tmp_qloop_4),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_set_pd(tmp_qloop_10,tmp_qloop_10,tmp_qloop_10,tmp_qloop_10)),_mm256_set_pd(tmp_qloop_8,tmp_qloop_8,tmp_qloop_8,tmp_qloop_8)),_mm256_set_pd(tmp_qloop_9,tmp_qloop_9,tmp_qloop_9,tmp_qloop_9))); + const __m256d tmp_qloop_69 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(hessian_blending_1_0_1,jac_blending_inv_0_0),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(hessian_blending_1_1_1,jac_blending_inv_1_0),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))); + const __m256d tmp_qloop_70 = _mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,tmp_qloop_68),_mm256_mul_pd(jac_blending_inv_0_1,tmp_qloop_69)); + const __m256d tmp_qloop_71 = _mm256_add_pd(_mm256_mul_pd(jac_blending_inv_1_0,tmp_qloop_68),_mm256_mul_pd(jac_blending_inv_1_1,tmp_qloop_69)); + const __m256d tmp_qloop_77 = _mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(hessian_blending_1_0_1,jac_blending_inv_0_1),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(_mm256_mul_pd(hessian_blending_1_1_1,jac_blending_inv_1_1),_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))); + const __m256d tmp_qloop_78 = _mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,tmp_qloop_76),_mm256_mul_pd(jac_blending_inv_0_1,tmp_qloop_77)); + const __m256d tmp_qloop_79 = _mm256_add_pd(_mm256_mul_pd(jac_blending_inv_1_0,tmp_qloop_76),_mm256_mul_pd(jac_blending_inv_1_1,tmp_qloop_77)); + const __m256d q_tmp_0_0 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_80,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_52,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,tmp_qloop_40),_mm256_mul_pd(jac_blending_inv_1_0,tmp_qloop_41))),_mm256_mul_pd(tmp_qloop_53,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,tmp_qloop_40),_mm256_mul_pd(jac_blending_inv_1_1,tmp_qloop_41))))),_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,_mm256_set_pd(tmp_moved_constant_3,tmp_moved_constant_3,tmp_moved_constant_3,tmp_moved_constant_3)),_mm256_mul_pd(jac_blending_inv_1_0,_mm256_set_pd(tmp_moved_constant_7,tmp_moved_constant_7,tmp_moved_constant_7,tmp_moved_constant_7)))),_mm256_mul_pd(jac_blending_inv_0_1,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,_mm256_set_pd(tmp_moved_constant_3,tmp_moved_constant_3,tmp_moved_constant_3,tmp_moved_constant_3)),_mm256_mul_pd(jac_blending_inv_1_1,_mm256_set_pd(tmp_moved_constant_7,tmp_moved_constant_7,tmp_moved_constant_7,tmp_moved_constant_7))))),_mm256_mul_pd(jac_blending_inv_1_0,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,_mm256_set_pd(tmp_moved_constant_8,tmp_moved_constant_8,tmp_moved_constant_8,tmp_moved_constant_8)),_mm256_mul_pd(jac_blending_inv_1_0,_mm256_set_pd(tmp_moved_constant_9,tmp_moved_constant_9,tmp_moved_constant_9,tmp_moved_constant_9))))),_mm256_mul_pd(jac_blending_inv_1_1,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,_mm256_set_pd(tmp_moved_constant_8,tmp_moved_constant_8,tmp_moved_constant_8,tmp_moved_constant_8)),_mm256_mul_pd(jac_blending_inv_1_1,_mm256_set_pd(tmp_moved_constant_9,tmp_moved_constant_9,tmp_moved_constant_9,tmp_moved_constant_9))))),_mm256_mul_pd(jac_blending_inv_0_0,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_40,tmp_qloop_66),_mm256_mul_pd(tmp_qloop_41,tmp_qloop_67)))),_mm256_mul_pd(jac_blending_inv_0_1,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_40,tmp_qloop_70),_mm256_mul_pd(tmp_qloop_41,tmp_qloop_71)))),_mm256_mul_pd(jac_blending_inv_1_0,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_40,tmp_qloop_74),_mm256_mul_pd(tmp_qloop_41,tmp_qloop_75)))),_mm256_mul_pd(jac_blending_inv_1_1,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_40,tmp_qloop_78),_mm256_mul_pd(tmp_qloop_41,tmp_qloop_79))))); + const __m256d q_tmp_1_1 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_80,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_52,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,tmp_qloop_82),_mm256_mul_pd(jac_blending_inv_1_0,tmp_qloop_83))),_mm256_mul_pd(tmp_qloop_53,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,tmp_qloop_82),_mm256_mul_pd(jac_blending_inv_1_1,tmp_qloop_83))))),_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_1_0,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,_mm256_set_pd(tmp_moved_constant_10,tmp_moved_constant_10,tmp_moved_constant_10,tmp_moved_constant_10)),_mm256_mul_pd(jac_blending_inv_1_0,_mm256_set_pd(tmp_moved_constant_12,tmp_moved_constant_12,tmp_moved_constant_12,tmp_moved_constant_12)))),_mm256_mul_pd(jac_blending_inv_1_1,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,_mm256_set_pd(tmp_moved_constant_10,tmp_moved_constant_10,tmp_moved_constant_10,tmp_moved_constant_10)),_mm256_mul_pd(jac_blending_inv_1_1,_mm256_set_pd(tmp_moved_constant_12,tmp_moved_constant_12,tmp_moved_constant_12,tmp_moved_constant_12))))),_mm256_mul_pd(jac_blending_inv_0_0,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_1_0,_mm256_set_pd(tmp_moved_constant_10,tmp_moved_constant_10,tmp_moved_constant_10,tmp_moved_constant_10)),_mm256_mul_pd(jac_blending_inv_0_0,_mm256_set_pd(tmp_moved_constant_11,tmp_moved_constant_11,tmp_moved_constant_11,tmp_moved_constant_11))))),_mm256_mul_pd(jac_blending_inv_0_1,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_1_1,_mm256_set_pd(tmp_moved_constant_10,tmp_moved_constant_10,tmp_moved_constant_10,tmp_moved_constant_10)),_mm256_mul_pd(jac_blending_inv_0_1,_mm256_set_pd(tmp_moved_constant_11,tmp_moved_constant_11,tmp_moved_constant_11,tmp_moved_constant_11))))),_mm256_mul_pd(jac_blending_inv_0_0,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_66,tmp_qloop_82),_mm256_mul_pd(tmp_qloop_67,tmp_qloop_83)))),_mm256_mul_pd(jac_blending_inv_0_1,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_70,tmp_qloop_82),_mm256_mul_pd(tmp_qloop_71,tmp_qloop_83)))),_mm256_mul_pd(jac_blending_inv_1_0,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_74,tmp_qloop_82),_mm256_mul_pd(tmp_qloop_75,tmp_qloop_83)))),_mm256_mul_pd(jac_blending_inv_1_1,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_78,tmp_qloop_82),_mm256_mul_pd(tmp_qloop_79,tmp_qloop_83))))); + const __m256d q_tmp_2_2 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_80,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_52,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,tmp_qloop_88),_mm256_mul_pd(jac_blending_inv_1_0,tmp_qloop_89))),_mm256_mul_pd(tmp_qloop_53,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,tmp_qloop_88),_mm256_mul_pd(jac_blending_inv_1_1,tmp_qloop_89))))),_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_1_0,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,_mm256_set_pd(tmp_moved_constant_13,tmp_moved_constant_13,tmp_moved_constant_13,tmp_moved_constant_13)),_mm256_mul_pd(jac_blending_inv_1_0,_mm256_set_pd(tmp_moved_constant_15,tmp_moved_constant_15,tmp_moved_constant_15,tmp_moved_constant_15)))),_mm256_mul_pd(jac_blending_inv_1_1,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,_mm256_set_pd(tmp_moved_constant_13,tmp_moved_constant_13,tmp_moved_constant_13,tmp_moved_constant_13)),_mm256_mul_pd(jac_blending_inv_1_1,_mm256_set_pd(tmp_moved_constant_15,tmp_moved_constant_15,tmp_moved_constant_15,tmp_moved_constant_15))))),_mm256_mul_pd(jac_blending_inv_0_0,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_1_0,_mm256_set_pd(tmp_moved_constant_13,tmp_moved_constant_13,tmp_moved_constant_13,tmp_moved_constant_13)),_mm256_mul_pd(jac_blending_inv_0_0,_mm256_set_pd(tmp_moved_constant_14,tmp_moved_constant_14,tmp_moved_constant_14,tmp_moved_constant_14))))),_mm256_mul_pd(jac_blending_inv_0_1,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_1_1,_mm256_set_pd(tmp_moved_constant_13,tmp_moved_constant_13,tmp_moved_constant_13,tmp_moved_constant_13)),_mm256_mul_pd(jac_blending_inv_0_1,_mm256_set_pd(tmp_moved_constant_14,tmp_moved_constant_14,tmp_moved_constant_14,tmp_moved_constant_14))))),_mm256_mul_pd(jac_blending_inv_0_0,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_66,tmp_qloop_88),_mm256_mul_pd(tmp_qloop_67,tmp_qloop_89)))),_mm256_mul_pd(jac_blending_inv_0_1,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_70,tmp_qloop_88),_mm256_mul_pd(tmp_qloop_71,tmp_qloop_89)))),_mm256_mul_pd(jac_blending_inv_1_0,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_74,tmp_qloop_88),_mm256_mul_pd(tmp_qloop_75,tmp_qloop_89)))),_mm256_mul_pd(jac_blending_inv_1_1,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_78,tmp_qloop_88),_mm256_mul_pd(tmp_qloop_79,tmp_qloop_89))))); + const __m256d q_tmp_3_3 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_80,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_52,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,tmp_qloop_95),_mm256_mul_pd(jac_blending_inv_1_0,tmp_qloop_98))),_mm256_mul_pd(tmp_qloop_53,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,tmp_qloop_95),_mm256_mul_pd(jac_blending_inv_1_1,tmp_qloop_98))))),_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,_mm256_set_pd(tmp_moved_constant_17,tmp_moved_constant_17,tmp_moved_constant_17,tmp_moved_constant_17)),_mm256_mul_pd(jac_blending_inv_1_0,_mm256_set_pd(tmp_moved_constant_20,tmp_moved_constant_20,tmp_moved_constant_20,tmp_moved_constant_20)))),_mm256_mul_pd(jac_blending_inv_0_1,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,_mm256_set_pd(tmp_moved_constant_17,tmp_moved_constant_17,tmp_moved_constant_17,tmp_moved_constant_17)),_mm256_mul_pd(jac_blending_inv_1_1,_mm256_set_pd(tmp_moved_constant_20,tmp_moved_constant_20,tmp_moved_constant_20,tmp_moved_constant_20))))),_mm256_mul_pd(jac_blending_inv_1_0,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,_mm256_set_pd(tmp_moved_constant_20,tmp_moved_constant_20,tmp_moved_constant_20,tmp_moved_constant_20)),_mm256_mul_pd(jac_blending_inv_1_0,_mm256_set_pd(tmp_moved_constant_22,tmp_moved_constant_22,tmp_moved_constant_22,tmp_moved_constant_22))))),_mm256_mul_pd(jac_blending_inv_1_1,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,_mm256_set_pd(tmp_moved_constant_20,tmp_moved_constant_20,tmp_moved_constant_20,tmp_moved_constant_20)),_mm256_mul_pd(jac_blending_inv_1_1,_mm256_set_pd(tmp_moved_constant_22,tmp_moved_constant_22,tmp_moved_constant_22,tmp_moved_constant_22))))),_mm256_mul_pd(jac_blending_inv_0_0,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_66,tmp_qloop_95),_mm256_mul_pd(tmp_qloop_67,tmp_qloop_98)))),_mm256_mul_pd(jac_blending_inv_0_1,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_70,tmp_qloop_95),_mm256_mul_pd(tmp_qloop_71,tmp_qloop_98)))),_mm256_mul_pd(jac_blending_inv_1_0,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_74,tmp_qloop_95),_mm256_mul_pd(tmp_qloop_75,tmp_qloop_98)))),_mm256_mul_pd(jac_blending_inv_1_1,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_78,tmp_qloop_95),_mm256_mul_pd(tmp_qloop_79,tmp_qloop_98))))); + const __m256d q_tmp_4_4 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_80,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_52,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,tmp_qloop_107),_mm256_mul_pd(jac_blending_inv_1_0,tmp_qloop_108))),_mm256_mul_pd(tmp_qloop_53,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,tmp_qloop_107),_mm256_mul_pd(jac_blending_inv_1_1,tmp_qloop_108))))),_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,_mm256_set_pd(tmp_moved_constant_25,tmp_moved_constant_25,tmp_moved_constant_25,tmp_moved_constant_25)),_mm256_mul_pd(jac_blending_inv_1_0,_mm256_set_pd(tmp_moved_constant_27,tmp_moved_constant_27,tmp_moved_constant_27,tmp_moved_constant_27)))),_mm256_mul_pd(jac_blending_inv_0_1,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,_mm256_set_pd(tmp_moved_constant_25,tmp_moved_constant_25,tmp_moved_constant_25,tmp_moved_constant_25)),_mm256_mul_pd(jac_blending_inv_1_1,_mm256_set_pd(tmp_moved_constant_27,tmp_moved_constant_27,tmp_moved_constant_27,tmp_moved_constant_27))))),_mm256_mul_pd(jac_blending_inv_1_0,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,_mm256_set_pd(tmp_moved_constant_28,tmp_moved_constant_28,tmp_moved_constant_28,tmp_moved_constant_28)),_mm256_mul_pd(jac_blending_inv_1_0,_mm256_set_pd(tmp_moved_constant_30,tmp_moved_constant_30,tmp_moved_constant_30,tmp_moved_constant_30))))),_mm256_mul_pd(jac_blending_inv_1_1,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,_mm256_set_pd(tmp_moved_constant_28,tmp_moved_constant_28,tmp_moved_constant_28,tmp_moved_constant_28)),_mm256_mul_pd(jac_blending_inv_1_1,_mm256_set_pd(tmp_moved_constant_30,tmp_moved_constant_30,tmp_moved_constant_30,tmp_moved_constant_30))))),_mm256_mul_pd(jac_blending_inv_0_0,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_107,tmp_qloop_66),_mm256_mul_pd(tmp_qloop_108,tmp_qloop_67)))),_mm256_mul_pd(jac_blending_inv_0_1,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_107,tmp_qloop_70),_mm256_mul_pd(tmp_qloop_108,tmp_qloop_71)))),_mm256_mul_pd(jac_blending_inv_1_0,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_107,tmp_qloop_74),_mm256_mul_pd(tmp_qloop_108,tmp_qloop_75)))),_mm256_mul_pd(jac_blending_inv_1_1,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_107,tmp_qloop_78),_mm256_mul_pd(tmp_qloop_108,tmp_qloop_79))))); + const __m256d q_tmp_5_5 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_80,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_52,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,tmp_qloop_118),_mm256_mul_pd(jac_blending_inv_1_0,tmp_qloop_119))),_mm256_mul_pd(tmp_qloop_53,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,tmp_qloop_118),_mm256_mul_pd(jac_blending_inv_1_1,tmp_qloop_119))))),_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,_mm256_set_pd(tmp_moved_constant_32,tmp_moved_constant_32,tmp_moved_constant_32,tmp_moved_constant_32)),_mm256_mul_pd(jac_blending_inv_1_0,_mm256_set_pd(tmp_moved_constant_34,tmp_moved_constant_34,tmp_moved_constant_34,tmp_moved_constant_34)))),_mm256_mul_pd(jac_blending_inv_0_1,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,_mm256_set_pd(tmp_moved_constant_32,tmp_moved_constant_32,tmp_moved_constant_32,tmp_moved_constant_32)),_mm256_mul_pd(jac_blending_inv_1_1,_mm256_set_pd(tmp_moved_constant_34,tmp_moved_constant_34,tmp_moved_constant_34,tmp_moved_constant_34))))),_mm256_mul_pd(jac_blending_inv_1_0,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_0,_mm256_set_pd(tmp_moved_constant_35,tmp_moved_constant_35,tmp_moved_constant_35,tmp_moved_constant_35)),_mm256_mul_pd(jac_blending_inv_1_0,_mm256_set_pd(tmp_moved_constant_36,tmp_moved_constant_36,tmp_moved_constant_36,tmp_moved_constant_36))))),_mm256_mul_pd(jac_blending_inv_1_1,_mm256_add_pd(_mm256_mul_pd(jac_blending_inv_0_1,_mm256_set_pd(tmp_moved_constant_35,tmp_moved_constant_35,tmp_moved_constant_35,tmp_moved_constant_35)),_mm256_mul_pd(jac_blending_inv_1_1,_mm256_set_pd(tmp_moved_constant_36,tmp_moved_constant_36,tmp_moved_constant_36,tmp_moved_constant_36))))),_mm256_mul_pd(jac_blending_inv_0_0,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_118,tmp_qloop_66),_mm256_mul_pd(tmp_qloop_119,tmp_qloop_67)))),_mm256_mul_pd(jac_blending_inv_0_1,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_118,tmp_qloop_70),_mm256_mul_pd(tmp_qloop_119,tmp_qloop_71)))),_mm256_mul_pd(jac_blending_inv_1_0,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_118,tmp_qloop_74),_mm256_mul_pd(tmp_qloop_119,tmp_qloop_75)))),_mm256_mul_pd(jac_blending_inv_1_1,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_118,tmp_qloop_78),_mm256_mul_pd(tmp_qloop_119,tmp_qloop_79))))); + q_acc_0_0 = _mm256_add_pd(q_acc_0_0,q_tmp_0_0); + q_acc_1_1 = _mm256_add_pd(q_acc_1_1,q_tmp_1_1); + q_acc_2_2 = _mm256_add_pd(q_acc_2_2,q_tmp_2_2); + q_acc_3_3 = _mm256_add_pd(q_acc_3_3,q_tmp_3_3); + q_acc_4_4 = _mm256_add_pd(q_acc_4_4,q_tmp_4_4); + q_acc_5_5 = _mm256_add_pd(q_acc_5_5,q_tmp_5_5); + } + const __m256d elMatDiag_0 = q_acc_0_0; + const __m256d elMatDiag_1 = q_acc_1_1; + const __m256d elMatDiag_2 = q_acc_2_2; + const __m256d elMatDiag_3 = q_acc_3_3; + const __m256d elMatDiag_4 = q_acc_4_4; + const __m256d elMatDiag_5 = q_acc_5_5; + _mm256_storeu_pd(&_data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1],_mm256_add_pd(elMatDiag_0,_mm256_loadu_pd(& _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]))); + _mm256_storeu_pd(&_data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))],_mm256_add_pd(elMatDiag_1,_mm256_loadu_pd(& _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]))); + _mm256_storeu_pd(&_data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1],_mm256_add_pd(elMatDiag_2,_mm256_loadu_pd(& _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]))); + _mm256_storeu_pd(&_data_invDiag_Edge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))],_mm256_add_pd(elMatDiag_3,_mm256_loadu_pd(& _data_invDiag_Edge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]))); + _mm256_storeu_pd(&_data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1],_mm256_add_pd(elMatDiag_4,_mm256_loadu_pd(& _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]))); + _mm256_storeu_pd(&_data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))],_mm256_add_pd(elMatDiag_5,_mm256_loadu_pd(& _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]))); + } + for (int64_t ctr_0 = (int64_t)((-ctr_1 + micro_edges_per_macro_edge - 1) / (4)) * (4); ctr_0 < -ctr_1 + micro_edges_per_macro_edge - 1; ctr_0 += 1) + { + + const int64_t phantom_ctr_0 = ctr_0; + real_t _data_float_loop_ctr_array_dim_0[4]; + _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0; + _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1; + _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2; + _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3; + real_t _data_float_loop_ctr_array_dim_1[4]; + _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1; + + const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; + const real_t diffusivity_times_delta_dof_0 = _data_diffusivity_times_deltaVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + const real_t diffusivity_times_delta_dof_1 = _data_diffusivity_times_deltaVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t diffusivity_times_delta_dof_2 = _data_diffusivity_times_deltaVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]; + const real_t diffusivity_times_delta_dof_3 = _data_diffusivity_times_deltaEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t diffusivity_times_delta_dof_4 = _data_diffusivity_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]; + const real_t diffusivity_times_delta_dof_5 = _data_diffusivity_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t wx_dof_0 = _data_wxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + const real_t wx_dof_1 = _data_wxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t wx_dof_2 = _data_wxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]; + const real_t wx_dof_3 = _data_wxEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t wx_dof_4 = _data_wxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]; + const real_t wx_dof_5 = _data_wxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t wy_dof_0 = _data_wyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + const real_t wy_dof_1 = _data_wyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t wy_dof_2 = _data_wyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]; + const real_t wy_dof_3 = _data_wyEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t wy_dof_4 = _data_wyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]; + const real_t wy_dof_5 = _data_wyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + real_t q_acc_0_0 = 0.0; + real_t q_acc_1_1 = 0.0; + real_t q_acc_2_2 = 0.0; + real_t q_acc_3_3 = 0.0; + real_t q_acc_4_4 = 0.0; + real_t q_acc_5_5 = 0.0; + for (int64_t q = 0; q < 4; q += 1) + { + const real_t tmp_qloop_0 = p_affine_0_0 + (-p_affine_0_0 + p_affine_1_0)*_data_q_p_0[q] + (-p_affine_0_0 + p_affine_2_0)*_data_q_p_1[q]; + const real_t tmp_qloop_2 = (tmp_qloop_0*tmp_qloop_0); + const real_t tmp_qloop_3 = p_affine_0_1 + (-p_affine_0_1 + p_affine_1_1)*_data_q_p_0[q] + (-p_affine_0_1 + p_affine_2_1)*_data_q_p_1[q]; + const real_t tmp_qloop_4 = (tmp_qloop_3*tmp_qloop_3); + const real_t tmp_qloop_5 = tmp_qloop_2 + tmp_qloop_4; + const real_t tmp_qloop_6 = pow(tmp_qloop_5, -0.50000000000000000); + const real_t tmp_qloop_13 = tmp_qloop_12*tmp_qloop_6; + const real_t tmp_qloop_14 = tmp_qloop_1*tmp_qloop_13; + const real_t tmp_qloop_15 = pow(tmp_qloop_5, -1.5000000000000000); + const real_t tmp_qloop_16 = radRayVertex + tmp_qloop_11*(tmp_qloop_1*(-rayVertex_0 + tmp_qloop_0) - tmp_qloop_8*(-rayVertex_1 + tmp_qloop_3)); + const real_t tmp_qloop_17 = tmp_qloop_15*tmp_qloop_16; + const real_t tmp_qloop_18 = tmp_qloop_17*1.0; + const real_t tmp_qloop_19 = tmp_qloop_13*tmp_qloop_8; + const real_t tmp_qloop_20 = tmp_qloop_0*tmp_qloop_18; + const real_t tmp_qloop_23 = tmp_qloop_12*tmp_qloop_15; + const real_t tmp_qloop_24 = tmp_qloop_1*tmp_qloop_23; + const real_t tmp_qloop_25 = tmp_qloop_2*tmp_qloop_24; + const real_t tmp_qloop_26 = tmp_qloop_16*pow(tmp_qloop_5, -2.5000000000000000)*3.0; + const real_t tmp_qloop_27 = tmp_qloop_0*tmp_qloop_26*tmp_qloop_4; + const real_t tmp_qloop_28 = tmp_qloop_24*tmp_qloop_4 - tmp_qloop_27; + const real_t tmp_qloop_29 = tmp_qloop_23*tmp_qloop_7; + const real_t tmp_qloop_30 = tmp_qloop_0*tmp_qloop_3; + const real_t tmp_qloop_31 = tmp_qloop_24*tmp_qloop_30; + const real_t tmp_qloop_32 = tmp_qloop_17*2.0; + const real_t tmp_qloop_33 = tmp_qloop_2*tmp_qloop_26*tmp_qloop_3; + const real_t tmp_qloop_34 = tmp_qloop_18*tmp_qloop_3 - tmp_qloop_33; + const real_t tmp_qloop_35 = tmp_qloop_29*tmp_qloop_30; + const real_t tmp_qloop_36 = tmp_qloop_23*tmp_qloop_30*tmp_qloop_8; + const real_t tmp_qloop_37 = 4.0*_data_q_p_0[q]; + const real_t tmp_qloop_38 = 4.0*_data_q_p_1[q]; + const real_t tmp_qloop_39 = tmp_qloop_37 + tmp_qloop_38 - 3.0; + const real_t tmp_qloop_40 = jac_affine_inv_0_0_BLUE*tmp_qloop_39 + jac_affine_inv_1_0_BLUE*tmp_qloop_39; + const real_t tmp_qloop_41 = jac_affine_inv_0_1_BLUE*tmp_qloop_39 + jac_affine_inv_1_1_BLUE*tmp_qloop_39; + const real_t tmp_qloop_42 = tmp_qloop_37*_data_q_p_1[q]; + const real_t tmp_qloop_43 = (_data_q_p_0[q]*_data_q_p_0[q]); + const real_t tmp_qloop_44 = tmp_qloop_43*2.0; + const real_t tmp_qloop_45 = tmp_qloop_44 - _data_q_p_0[q]; + const real_t tmp_qloop_46 = (_data_q_p_1[q]*_data_q_p_1[q]); + const real_t tmp_qloop_47 = tmp_qloop_46*2.0; + const real_t tmp_qloop_48 = tmp_qloop_47 - _data_q_p_1[q]; + const real_t tmp_qloop_49 = tmp_qloop_38 - tmp_qloop_42 + tmp_qloop_46*-4.0; + const real_t tmp_qloop_50 = tmp_qloop_37 - tmp_qloop_42 + tmp_qloop_43*-4.0; + const real_t tmp_qloop_51 = tmp_qloop_42 + tmp_qloop_44 + tmp_qloop_47 - 3.0*_data_q_p_0[q] - 3.0*_data_q_p_1[q] + 1.0; + const real_t tmp_qloop_52 = tmp_qloop_42*wx_dof_3 + tmp_qloop_45*wx_dof_1 + tmp_qloop_48*wx_dof_2 + tmp_qloop_49*wx_dof_4 + tmp_qloop_50*wx_dof_5 + tmp_qloop_51*wx_dof_0; + const real_t tmp_qloop_53 = tmp_qloop_42*wy_dof_3 + tmp_qloop_45*wy_dof_1 + tmp_qloop_48*wy_dof_2 + tmp_qloop_49*wy_dof_4 + tmp_qloop_50*wy_dof_5 + tmp_qloop_51*wy_dof_0; + const real_t tmp_qloop_81 = tmp_qloop_37 - 1.0; + const real_t tmp_qloop_82 = jac_affine_inv_0_0_BLUE*tmp_qloop_81; + const real_t tmp_qloop_83 = jac_affine_inv_0_1_BLUE*tmp_qloop_81; + const real_t tmp_qloop_87 = tmp_qloop_38 - 1.0; + const real_t tmp_qloop_88 = jac_affine_inv_1_0_BLUE*tmp_qloop_87; + const real_t tmp_qloop_89 = jac_affine_inv_1_1_BLUE*tmp_qloop_87; + const real_t tmp_qloop_93 = jac_affine_inv_1_0_BLUE*tmp_qloop_37; + const real_t tmp_qloop_94 = jac_affine_inv_0_0_BLUE*tmp_qloop_38; + const real_t tmp_qloop_95 = tmp_qloop_93 + tmp_qloop_94; + const real_t tmp_qloop_96 = jac_affine_inv_1_1_BLUE*tmp_qloop_37; + const real_t tmp_qloop_97 = jac_affine_inv_0_1_BLUE*tmp_qloop_38; + const real_t tmp_qloop_98 = tmp_qloop_96 + tmp_qloop_97; + const real_t tmp_qloop_106 = -tmp_qloop_37 - 8.0*_data_q_p_1[q] + 4.0; + const real_t tmp_qloop_107 = jac_affine_inv_1_0_BLUE*tmp_qloop_106 - tmp_qloop_94; + const real_t tmp_qloop_108 = jac_affine_inv_1_1_BLUE*tmp_qloop_106 - tmp_qloop_97; + const real_t tmp_qloop_117 = -tmp_qloop_38 - 8.0*_data_q_p_0[q] + 4.0; + const real_t tmp_qloop_118 = jac_affine_inv_0_0_BLUE*tmp_qloop_117 - tmp_qloop_93; + const real_t tmp_qloop_119 = jac_affine_inv_0_1_BLUE*tmp_qloop_117 - tmp_qloop_96; + const real_t jac_blending_0_0 = tmp_qloop_0*tmp_qloop_14 + tmp_qloop_18*tmp_qloop_4; + const real_t jac_blending_0_1 = -tmp_qloop_0*tmp_qloop_15*tmp_qloop_16*tmp_qloop_3 - tmp_qloop_0*tmp_qloop_19; + const real_t jac_blending_1_0 = tmp_qloop_14*tmp_qloop_3 - tmp_qloop_20*tmp_qloop_3; + const real_t jac_blending_1_1 = tmp_qloop_15*tmp_qloop_16*tmp_qloop_2*1.0 - tmp_qloop_19*tmp_qloop_3; + const real_t tmp_qloop_21 = jac_blending_0_0*jac_blending_1_1 - jac_blending_0_1*jac_blending_1_0; + const real_t tmp_qloop_22 = 1.0 / (tmp_qloop_21); + const real_t abs_det_jac_blending = tmp_qloop_21; + const real_t tmp_qloop_80 = abs_det_jac_affine_BLUE*abs_det_jac_blending*(diffusivity_times_delta_dof_0*tmp_qloop_51 + diffusivity_times_delta_dof_1*tmp_qloop_45 + diffusivity_times_delta_dof_2*tmp_qloop_48 + diffusivity_times_delta_dof_3*tmp_qloop_42 + diffusivity_times_delta_dof_4*tmp_qloop_49 + diffusivity_times_delta_dof_5*tmp_qloop_50)*_data_q_w[q]; + const real_t jac_blending_inv_0_0 = jac_blending_1_1*tmp_qloop_22; + const real_t jac_blending_inv_0_1 = -jac_blending_0_1*tmp_qloop_22; + const real_t jac_blending_inv_1_0 = -jac_blending_1_0*tmp_qloop_22; + const real_t jac_blending_inv_1_1 = jac_blending_0_0*tmp_qloop_22; + const real_t hessian_blending_0_0_0 = tmp_qloop_14 - tmp_qloop_25 + tmp_qloop_28; + const real_t hessian_blending_1_0_0 = -tmp_qloop_26*(tmp_qloop_3*tmp_qloop_3*tmp_qloop_3) + tmp_qloop_29*tmp_qloop_4 + tmp_qloop_3*tmp_qloop_32 - tmp_qloop_31; + const real_t hessian_blending_0_0_1 = tmp_qloop_1*tmp_qloop_11*tmp_qloop_15*tmp_qloop_30*-2.0 - tmp_qloop_34; + const real_t hessian_blending_1_0_1 = tmp_qloop_1*tmp_qloop_10*tmp_qloop_6*tmp_qloop_9*1.0 - tmp_qloop_20 - tmp_qloop_28 - tmp_qloop_35; + const real_t hessian_blending_0_1_0 = tmp_qloop_10*tmp_qloop_15*tmp_qloop_2*tmp_qloop_8*tmp_qloop_9*1.0 - tmp_qloop_19 - tmp_qloop_31 - tmp_qloop_34; + const real_t tmp_qloop_64 = -hessian_blending_0_0_0*jac_blending_inv_0_0 - hessian_blending_0_1_0*jac_blending_inv_1_0; + const real_t tmp_qloop_72 = -hessian_blending_0_0_0*jac_blending_inv_0_1 - hessian_blending_0_1_0*jac_blending_inv_1_1; + const real_t hessian_blending_1_1_0 = -tmp_qloop_20 + tmp_qloop_27 - tmp_qloop_35 + tmp_qloop_36; + const real_t tmp_qloop_68 = -hessian_blending_1_0_0*jac_blending_inv_0_0 - hessian_blending_1_1_0*jac_blending_inv_1_0; + const real_t tmp_qloop_76 = -hessian_blending_1_0_0*jac_blending_inv_0_1 - hessian_blending_1_1_0*jac_blending_inv_1_1; + const real_t hessian_blending_0_1_1 = tmp_qloop_0*tmp_qloop_32 - (tmp_qloop_0*tmp_qloop_0*tmp_qloop_0)*tmp_qloop_26 + tmp_qloop_25 + tmp_qloop_36; + const real_t tmp_qloop_65 = -hessian_blending_0_0_1*jac_blending_inv_0_0 - hessian_blending_0_1_1*jac_blending_inv_1_0; + const real_t tmp_qloop_66 = jac_blending_inv_0_0*tmp_qloop_64 + jac_blending_inv_0_1*tmp_qloop_65; + const real_t tmp_qloop_67 = jac_blending_inv_1_0*tmp_qloop_64 + jac_blending_inv_1_1*tmp_qloop_65; + const real_t tmp_qloop_73 = -hessian_blending_0_0_1*jac_blending_inv_0_1 - hessian_blending_0_1_1*jac_blending_inv_1_1; + const real_t tmp_qloop_74 = jac_blending_inv_0_0*tmp_qloop_72 + jac_blending_inv_0_1*tmp_qloop_73; + const real_t tmp_qloop_75 = jac_blending_inv_1_0*tmp_qloop_72 + jac_blending_inv_1_1*tmp_qloop_73; + const real_t hessian_blending_1_1_1 = tmp_qloop_10*tmp_qloop_15*tmp_qloop_2*tmp_qloop_7*tmp_qloop_9*1.0 + tmp_qloop_10*tmp_qloop_15*tmp_qloop_4*tmp_qloop_8*tmp_qloop_9*1.0 - tmp_qloop_19 - tmp_qloop_33; + const real_t tmp_qloop_69 = -hessian_blending_1_0_1*jac_blending_inv_0_0 - hessian_blending_1_1_1*jac_blending_inv_1_0; + const real_t tmp_qloop_70 = jac_blending_inv_0_0*tmp_qloop_68 + jac_blending_inv_0_1*tmp_qloop_69; + const real_t tmp_qloop_71 = jac_blending_inv_1_0*tmp_qloop_68 + jac_blending_inv_1_1*tmp_qloop_69; + const real_t tmp_qloop_77 = -hessian_blending_1_0_1*jac_blending_inv_0_1 - hessian_blending_1_1_1*jac_blending_inv_1_1; + const real_t tmp_qloop_78 = jac_blending_inv_0_0*tmp_qloop_76 + jac_blending_inv_0_1*tmp_qloop_77; + const real_t tmp_qloop_79 = jac_blending_inv_1_0*tmp_qloop_76 + jac_blending_inv_1_1*tmp_qloop_77; + const real_t q_tmp_0_0 = tmp_qloop_80*(tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_40 + jac_blending_inv_1_0*tmp_qloop_41) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_40 + jac_blending_inv_1_1*tmp_qloop_41))*(jac_blending_inv_0_0*(jac_blending_inv_0_0*tmp_moved_constant_3 + jac_blending_inv_1_0*tmp_moved_constant_7) + jac_blending_inv_0_0*(tmp_qloop_40*tmp_qloop_66 + tmp_qloop_41*tmp_qloop_67) + jac_blending_inv_0_1*(jac_blending_inv_0_1*tmp_moved_constant_3 + jac_blending_inv_1_1*tmp_moved_constant_7) + jac_blending_inv_0_1*(tmp_qloop_40*tmp_qloop_70 + tmp_qloop_41*tmp_qloop_71) + jac_blending_inv_1_0*(jac_blending_inv_0_0*tmp_moved_constant_8 + jac_blending_inv_1_0*tmp_moved_constant_9) + jac_blending_inv_1_0*(tmp_qloop_40*tmp_qloop_74 + tmp_qloop_41*tmp_qloop_75) + jac_blending_inv_1_1*(jac_blending_inv_0_1*tmp_moved_constant_8 + jac_blending_inv_1_1*tmp_moved_constant_9) + jac_blending_inv_1_1*(tmp_qloop_40*tmp_qloop_78 + tmp_qloop_41*tmp_qloop_79)); + const real_t q_tmp_1_1 = tmp_qloop_80*(tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_82 + jac_blending_inv_1_0*tmp_qloop_83) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_82 + jac_blending_inv_1_1*tmp_qloop_83))*(jac_blending_inv_0_0*(jac_blending_inv_0_0*tmp_moved_constant_11 + jac_blending_inv_1_0*tmp_moved_constant_10) + jac_blending_inv_0_0*(tmp_qloop_66*tmp_qloop_82 + tmp_qloop_67*tmp_qloop_83) + jac_blending_inv_0_1*(jac_blending_inv_0_1*tmp_moved_constant_11 + jac_blending_inv_1_1*tmp_moved_constant_10) + jac_blending_inv_0_1*(tmp_qloop_70*tmp_qloop_82 + tmp_qloop_71*tmp_qloop_83) + jac_blending_inv_1_0*(jac_blending_inv_0_0*tmp_moved_constant_10 + jac_blending_inv_1_0*tmp_moved_constant_12) + jac_blending_inv_1_0*(tmp_qloop_74*tmp_qloop_82 + tmp_qloop_75*tmp_qloop_83) + jac_blending_inv_1_1*(jac_blending_inv_0_1*tmp_moved_constant_10 + jac_blending_inv_1_1*tmp_moved_constant_12) + jac_blending_inv_1_1*(tmp_qloop_78*tmp_qloop_82 + tmp_qloop_79*tmp_qloop_83)); + const real_t q_tmp_2_2 = tmp_qloop_80*(tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_88 + jac_blending_inv_1_0*tmp_qloop_89) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_88 + jac_blending_inv_1_1*tmp_qloop_89))*(jac_blending_inv_0_0*(jac_blending_inv_0_0*tmp_moved_constant_14 + jac_blending_inv_1_0*tmp_moved_constant_13) + jac_blending_inv_0_0*(tmp_qloop_66*tmp_qloop_88 + tmp_qloop_67*tmp_qloop_89) + jac_blending_inv_0_1*(jac_blending_inv_0_1*tmp_moved_constant_14 + jac_blending_inv_1_1*tmp_moved_constant_13) + jac_blending_inv_0_1*(tmp_qloop_70*tmp_qloop_88 + tmp_qloop_71*tmp_qloop_89) + jac_blending_inv_1_0*(jac_blending_inv_0_0*tmp_moved_constant_13 + jac_blending_inv_1_0*tmp_moved_constant_15) + jac_blending_inv_1_0*(tmp_qloop_74*tmp_qloop_88 + tmp_qloop_75*tmp_qloop_89) + jac_blending_inv_1_1*(jac_blending_inv_0_1*tmp_moved_constant_13 + jac_blending_inv_1_1*tmp_moved_constant_15) + jac_blending_inv_1_1*(tmp_qloop_78*tmp_qloop_88 + tmp_qloop_79*tmp_qloop_89)); + const real_t q_tmp_3_3 = tmp_qloop_80*(tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_95 + jac_blending_inv_1_0*tmp_qloop_98) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_95 + jac_blending_inv_1_1*tmp_qloop_98))*(jac_blending_inv_0_0*(jac_blending_inv_0_0*tmp_moved_constant_17 + jac_blending_inv_1_0*tmp_moved_constant_20) + jac_blending_inv_0_0*(tmp_qloop_66*tmp_qloop_95 + tmp_qloop_67*tmp_qloop_98) + jac_blending_inv_0_1*(jac_blending_inv_0_1*tmp_moved_constant_17 + jac_blending_inv_1_1*tmp_moved_constant_20) + jac_blending_inv_0_1*(tmp_qloop_70*tmp_qloop_95 + tmp_qloop_71*tmp_qloop_98) + jac_blending_inv_1_0*(jac_blending_inv_0_0*tmp_moved_constant_20 + jac_blending_inv_1_0*tmp_moved_constant_22) + jac_blending_inv_1_0*(tmp_qloop_74*tmp_qloop_95 + tmp_qloop_75*tmp_qloop_98) + jac_blending_inv_1_1*(jac_blending_inv_0_1*tmp_moved_constant_20 + jac_blending_inv_1_1*tmp_moved_constant_22) + jac_blending_inv_1_1*(tmp_qloop_78*tmp_qloop_95 + tmp_qloop_79*tmp_qloop_98)); + const real_t q_tmp_4_4 = tmp_qloop_80*(tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_107 + jac_blending_inv_1_0*tmp_qloop_108) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_107 + jac_blending_inv_1_1*tmp_qloop_108))*(jac_blending_inv_0_0*(jac_blending_inv_0_0*tmp_moved_constant_25 + jac_blending_inv_1_0*tmp_moved_constant_27) + jac_blending_inv_0_0*(tmp_qloop_107*tmp_qloop_66 + tmp_qloop_108*tmp_qloop_67) + jac_blending_inv_0_1*(jac_blending_inv_0_1*tmp_moved_constant_25 + jac_blending_inv_1_1*tmp_moved_constant_27) + jac_blending_inv_0_1*(tmp_qloop_107*tmp_qloop_70 + tmp_qloop_108*tmp_qloop_71) + jac_blending_inv_1_0*(jac_blending_inv_0_0*tmp_moved_constant_28 + jac_blending_inv_1_0*tmp_moved_constant_30) + jac_blending_inv_1_0*(tmp_qloop_107*tmp_qloop_74 + tmp_qloop_108*tmp_qloop_75) + jac_blending_inv_1_1*(jac_blending_inv_0_1*tmp_moved_constant_28 + jac_blending_inv_1_1*tmp_moved_constant_30) + jac_blending_inv_1_1*(tmp_qloop_107*tmp_qloop_78 + tmp_qloop_108*tmp_qloop_79)); + const real_t q_tmp_5_5 = tmp_qloop_80*(tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_118 + jac_blending_inv_1_0*tmp_qloop_119) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_118 + jac_blending_inv_1_1*tmp_qloop_119))*(jac_blending_inv_0_0*(jac_blending_inv_0_0*tmp_moved_constant_32 + jac_blending_inv_1_0*tmp_moved_constant_34) + jac_blending_inv_0_0*(tmp_qloop_118*tmp_qloop_66 + tmp_qloop_119*tmp_qloop_67) + jac_blending_inv_0_1*(jac_blending_inv_0_1*tmp_moved_constant_32 + jac_blending_inv_1_1*tmp_moved_constant_34) + jac_blending_inv_0_1*(tmp_qloop_118*tmp_qloop_70 + tmp_qloop_119*tmp_qloop_71) + jac_blending_inv_1_0*(jac_blending_inv_0_0*tmp_moved_constant_35 + jac_blending_inv_1_0*tmp_moved_constant_36) + jac_blending_inv_1_0*(tmp_qloop_118*tmp_qloop_74 + tmp_qloop_119*tmp_qloop_75) + jac_blending_inv_1_1*(jac_blending_inv_0_1*tmp_moved_constant_35 + jac_blending_inv_1_1*tmp_moved_constant_36) + jac_blending_inv_1_1*(tmp_qloop_118*tmp_qloop_78 + tmp_qloop_119*tmp_qloop_79)); + q_acc_0_0 = q_acc_0_0 + q_tmp_0_0; + q_acc_1_1 = q_acc_1_1 + q_tmp_1_1; + q_acc_2_2 = q_acc_2_2 + q_tmp_2_2; + q_acc_3_3 = q_acc_3_3 + q_tmp_3_3; + q_acc_4_4 = q_acc_4_4 + q_tmp_4_4; + q_acc_5_5 = q_acc_5_5 + q_tmp_5_5; + } + const real_t elMatDiag_0 = q_acc_0_0; + const real_t elMatDiag_1 = q_acc_1_1; + const real_t elMatDiag_2 = q_acc_2_2; + const real_t elMatDiag_3 = q_acc_3_3; + const real_t elMatDiag_4 = q_acc_4_4; + const real_t elMatDiag_5 = q_acc_5_5; + _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1] = elMatDiag_0 + _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatDiag_1 + _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1] = elMatDiag_2 + _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]; + _data_invDiag_Edge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatDiag_3 + _data_invDiag_Edge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1] = elMatDiag_4 + _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]; + _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))] = elMatDiag_5 + _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + } + } + } + } +} +} // namespace operatorgeneration + +} // namespace hyteg diff --git a/operators/supg_diffusion/avx/P2ElementwiseSupgDiffusion_apply_P2ElementwiseSupgDiffusion_macro_2D.cpp b/operators/supg_diffusion/avx/P2ElementwiseSupgDiffusion_apply_P2ElementwiseSupgDiffusion_macro_2D.cpp new file mode 100644 index 0000000000000000000000000000000000000000..47499674a8b8e1e90de09f928f8db54e1f907445 --- /dev/null +++ b/operators/supg_diffusion/avx/P2ElementwiseSupgDiffusion_apply_P2ElementwiseSupgDiffusion_macro_2D.cpp @@ -0,0 +1,954 @@ +/* +* Copyright (c) 2017-2024 Nils Kohl, Daniel Bauer, Fabian Böhm. +* +* This file is part of HyTeG +* (see https://i10git.cs.fau.de/hyteg/hyteg). +* +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. +* +* You should have received a copy of the GNU General Public License +* along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +/* +* The entire file was generated with the HyTeG Operator Generator. +* +* Avoid modifying this file. If buggy, consider fixing the generator itself. +*/ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +#include "../P2ElementwiseSupgDiffusion.hpp" + +#define FUNC_PREFIX + +namespace hyteg { + +namespace operatorgeneration { + +void P2ElementwiseSupgDiffusion::apply_P2ElementwiseSupgDiffusion_macro_2D( real_t * RESTRICT _data_diffusivity_times_deltaEdge, real_t * RESTRICT _data_diffusivity_times_deltaVertex, real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t * RESTRICT _data_wxEdge, real_t * RESTRICT _data_wxVertex, real_t * RESTRICT _data_wyEdge, real_t * RESTRICT _data_wyVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +{ + { + const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669}; + + const real_t _data_q_p_0 [] = {0.33333333333333331, 0.20000000000000001, 0.59999999999999998, 0.20000000000000001}; + + const real_t _data_q_p_1 [] = {0.33333333333333331, 0.59999999999999998, 0.20000000000000001, 0.20000000000000001}; + + const real_t tmp_coords_jac_0_GRAY = 1.0 / (micro_edges_per_macro_edge_float)*1.0; + const real_t p_affine_const_0_0_GRAY = macro_vertex_coord_id_0comp0; + const real_t p_affine_const_0_1_GRAY = macro_vertex_coord_id_0comp1; + const real_t p_affine_const_1_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0); + const real_t p_affine_const_1_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1); + const real_t p_affine_const_2_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0); + const real_t p_affine_const_2_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1); + const real_t jac_affine_0_0_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_1_0_GRAY; + const real_t jac_affine_0_1_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_2_0_GRAY; + const real_t jac_affine_1_0_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_1_1_GRAY; + const real_t jac_affine_1_1_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_2_1_GRAY; + const real_t tmp_coords_jac_1_GRAY = jac_affine_0_0_GRAY*jac_affine_1_1_GRAY - jac_affine_0_1_GRAY*jac_affine_1_0_GRAY; + const real_t tmp_coords_jac_2_GRAY = 1.0 / (tmp_coords_jac_1_GRAY); + const real_t jac_affine_inv_0_0_GRAY = jac_affine_1_1_GRAY*tmp_coords_jac_2_GRAY; + const real_t jac_affine_inv_0_1_GRAY = -jac_affine_0_1_GRAY*tmp_coords_jac_2_GRAY; + const real_t jac_affine_inv_1_0_GRAY = -jac_affine_1_0_GRAY*tmp_coords_jac_2_GRAY; + const real_t jac_affine_inv_1_1_GRAY = jac_affine_0_0_GRAY*tmp_coords_jac_2_GRAY; + const real_t abs_det_jac_affine_GRAY = abs(tmp_coords_jac_1_GRAY); + const real_t tmp_qloop_0 = jac_affine_inv_0_0_GRAY*4.0; + const real_t tmp_qloop_1 = jac_affine_inv_1_0_GRAY*4.0; + const real_t tmp_qloop_2 = tmp_qloop_0 + tmp_qloop_1; + const real_t tmp_qloop_3 = jac_affine_inv_0_1_GRAY*4.0; + const real_t tmp_qloop_4 = jac_affine_inv_1_1_GRAY*4.0; + const real_t tmp_qloop_5 = tmp_qloop_3 + tmp_qloop_4; + const real_t tmp_qloop_6 = jac_affine_inv_0_0_GRAY*tmp_qloop_2 + jac_affine_inv_0_1_GRAY*tmp_qloop_5 + jac_affine_inv_1_0_GRAY*tmp_qloop_2 + jac_affine_inv_1_1_GRAY*tmp_qloop_5; + const real_t tmp_qloop_24 = (jac_affine_inv_0_0_GRAY*jac_affine_inv_0_0_GRAY)*4.0 + (jac_affine_inv_0_1_GRAY*jac_affine_inv_0_1_GRAY)*4.0; + const real_t tmp_qloop_25 = (jac_affine_inv_1_0_GRAY*jac_affine_inv_1_0_GRAY)*4.0 + (jac_affine_inv_1_1_GRAY*jac_affine_inv_1_1_GRAY)*4.0; + const real_t tmp_qloop_26 = jac_affine_inv_1_0_GRAY*8.0; + const real_t tmp_qloop_27 = jac_affine_inv_1_1_GRAY*8.0; + const real_t tmp_qloop_28 = jac_affine_inv_0_0_GRAY*tmp_qloop_26 + jac_affine_inv_0_1_GRAY*tmp_qloop_27; + const real_t tmp_qloop_29 = jac_affine_inv_1_0_GRAY*tmp_qloop_0 + jac_affine_inv_1_1_GRAY*tmp_qloop_3; + const real_t tmp_qloop_30 = jac_affine_inv_1_0_GRAY*(-tmp_qloop_0 - tmp_qloop_26) + jac_affine_inv_1_1_GRAY*(-tmp_qloop_27 - tmp_qloop_3) - tmp_qloop_29; + const real_t tmp_qloop_31 = jac_affine_inv_0_0_GRAY*(jac_affine_inv_0_0_GRAY*-8.0 - tmp_qloop_1) + jac_affine_inv_0_1_GRAY*(jac_affine_inv_0_1_GRAY*-8.0 - tmp_qloop_4) - tmp_qloop_29; + { + /* FaceType.GRAY */ + for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1) + { + for (int64_t ctr_0 = 0; ctr_0 < (int64_t)((-ctr_1 + micro_edges_per_macro_edge) / (4)) * (4); ctr_0 += 4) + { + + const int64_t phantom_ctr_0 = ctr_0; + real_t _data_float_loop_ctr_array_dim_0[4]; + _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0; + _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1; + _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2; + _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3; + real_t _data_float_loop_ctr_array_dim_1[4]; + _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1; + + const __m256d p_affine_0_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)); + const __m256d p_affine_0_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)); + const __m256d p_affine_1_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)); + const __m256d p_affine_1_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)); + const __m256d p_affine_2_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)); + const __m256d p_affine_2_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)); + const __m256d src_dof_0 = _mm256_loadu_pd(& _data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]); + const __m256d src_dof_1 = _mm256_loadu_pd(& _data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]); + const __m256d src_dof_2 = _mm256_loadu_pd(& _data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]); + const __m256d src_dof_3 = _mm256_loadu_pd(& _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]); + const __m256d src_dof_4 = _mm256_loadu_pd(& _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]); + const __m256d src_dof_5 = _mm256_loadu_pd(& _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]); + const __m256d diffusivity_times_delta_dof_0 = _mm256_loadu_pd(& _data_diffusivity_times_deltaVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]); + const __m256d diffusivity_times_delta_dof_1 = _mm256_loadu_pd(& _data_diffusivity_times_deltaVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]); + const __m256d diffusivity_times_delta_dof_2 = _mm256_loadu_pd(& _data_diffusivity_times_deltaVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]); + const __m256d diffusivity_times_delta_dof_3 = _mm256_loadu_pd(& _data_diffusivity_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]); + const __m256d diffusivity_times_delta_dof_4 = _mm256_loadu_pd(& _data_diffusivity_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]); + const __m256d diffusivity_times_delta_dof_5 = _mm256_loadu_pd(& _data_diffusivity_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]); + const __m256d wx_dof_0 = _mm256_loadu_pd(& _data_wxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]); + const __m256d wx_dof_1 = _mm256_loadu_pd(& _data_wxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]); + const __m256d wx_dof_2 = _mm256_loadu_pd(& _data_wxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]); + const __m256d wx_dof_3 = _mm256_loadu_pd(& _data_wxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]); + const __m256d wx_dof_4 = _mm256_loadu_pd(& _data_wxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]); + const __m256d wx_dof_5 = _mm256_loadu_pd(& _data_wxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]); + const __m256d wy_dof_0 = _mm256_loadu_pd(& _data_wyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]); + const __m256d wy_dof_1 = _mm256_loadu_pd(& _data_wyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]); + const __m256d wy_dof_2 = _mm256_loadu_pd(& _data_wyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]); + const __m256d wy_dof_3 = _mm256_loadu_pd(& _data_wyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]); + const __m256d wy_dof_4 = _mm256_loadu_pd(& _data_wyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]); + const __m256d wy_dof_5 = _mm256_loadu_pd(& _data_wyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]); + __m256d q_acc_0_0 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_1 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_2 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_3 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_4 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_5 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_0 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_1 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_2 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_3 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_4 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_5 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_0 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_1 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_2 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_3 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_4 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_5 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_0 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_1 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_2 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_3 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_4 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_5 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_4_0 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_4_1 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_4_2 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_4_3 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_4_4 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_4_5 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_5_0 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_5_1 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_5_2 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_5_3 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_5_4 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_5_5 = _mm256_set_pd(0.0,0.0,0.0,0.0); + for (int64_t q = 0; q < 4; q += 1) + { + const __m256d tmp_qloop_7 = _mm256_mul_pd(_mm256_set_pd(4.0,4.0,4.0,4.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])); + const __m256d tmp_qloop_8 = _mm256_mul_pd(_mm256_set_pd(4.0,4.0,4.0,4.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])); + const __m256d tmp_qloop_9 = _mm256_add_pd(_mm256_add_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),tmp_qloop_7),tmp_qloop_8); + const __m256d tmp_qloop_10 = _mm256_mul_pd(tmp_qloop_7,_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])); + const __m256d tmp_qloop_11 = _mm256_mul_pd(_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])); + const __m256d tmp_qloop_12 = _mm256_mul_pd(tmp_qloop_11,_mm256_set_pd(2.0,2.0,2.0,2.0)); + const __m256d tmp_qloop_13 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),tmp_qloop_12); + const __m256d tmp_qloop_14 = _mm256_mul_pd(_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])); + const __m256d tmp_qloop_15 = _mm256_mul_pd(tmp_qloop_14,_mm256_set_pd(2.0,2.0,2.0,2.0)); + const __m256d tmp_qloop_16 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])),tmp_qloop_15); + const __m256d tmp_qloop_17 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_14,_mm256_set_pd(-4.0,-4.0,-4.0,-4.0)),_mm256_mul_pd(tmp_qloop_10,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_8); + const __m256d tmp_qloop_18 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_11,_mm256_set_pd(-4.0,-4.0,-4.0,-4.0)),_mm256_mul_pd(tmp_qloop_10,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_7); + const __m256d tmp_qloop_19 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),tmp_qloop_10),tmp_qloop_12),tmp_qloop_15); + const __m256d tmp_qloop_20 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_10,wx_dof_3),_mm256_mul_pd(tmp_qloop_13,wx_dof_1)),_mm256_mul_pd(tmp_qloop_16,wx_dof_2)),_mm256_mul_pd(tmp_qloop_17,wx_dof_4)),_mm256_mul_pd(tmp_qloop_18,wx_dof_5)),_mm256_mul_pd(tmp_qloop_19,wx_dof_0)); + const __m256d tmp_qloop_21 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_10,wy_dof_3),_mm256_mul_pd(tmp_qloop_13,wy_dof_1)),_mm256_mul_pd(tmp_qloop_16,wy_dof_2)),_mm256_mul_pd(tmp_qloop_17,wy_dof_4)),_mm256_mul_pd(tmp_qloop_18,wy_dof_5)),_mm256_mul_pd(tmp_qloop_19,wy_dof_0)); + const __m256d tmp_qloop_22 = _mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(diffusivity_times_delta_dof_0,tmp_qloop_19),_mm256_mul_pd(diffusivity_times_delta_dof_1,tmp_qloop_13)),_mm256_mul_pd(diffusivity_times_delta_dof_2,tmp_qloop_16)),_mm256_mul_pd(diffusivity_times_delta_dof_3,tmp_qloop_10)),_mm256_mul_pd(diffusivity_times_delta_dof_4,tmp_qloop_17)),_mm256_mul_pd(diffusivity_times_delta_dof_5,tmp_qloop_18)),_mm256_set_pd(_data_q_w[q],_data_q_w[q],_data_q_w[q],_data_q_w[q])),_mm256_set_pd(abs_det_jac_affine_GRAY,abs_det_jac_affine_GRAY,abs_det_jac_affine_GRAY,abs_det_jac_affine_GRAY)); + const __m256d tmp_qloop_23 = _mm256_mul_pd(tmp_qloop_22,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_20,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_9,_mm256_set_pd(jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY)),_mm256_mul_pd(tmp_qloop_9,_mm256_set_pd(jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY)))),_mm256_mul_pd(tmp_qloop_21,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_9,_mm256_set_pd(jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY)),_mm256_mul_pd(tmp_qloop_9,_mm256_set_pd(jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY)))))); + const __m256d tmp_qloop_32 = _mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),tmp_qloop_7); + const __m256d tmp_qloop_33 = _mm256_mul_pd(tmp_qloop_22,_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_20,tmp_qloop_32),_mm256_set_pd(jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_21,tmp_qloop_32),_mm256_set_pd(jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY)))); + const __m256d tmp_qloop_34 = _mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),tmp_qloop_8); + const __m256d tmp_qloop_35 = _mm256_mul_pd(tmp_qloop_22,_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_20,tmp_qloop_34),_mm256_set_pd(jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_21,tmp_qloop_34),_mm256_set_pd(jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY)))); + const __m256d tmp_qloop_36 = _mm256_mul_pd(tmp_qloop_7,_mm256_set_pd(jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY)); + const __m256d tmp_qloop_37 = _mm256_mul_pd(tmp_qloop_8,_mm256_set_pd(jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY)); + const __m256d tmp_qloop_38 = _mm256_mul_pd(tmp_qloop_7,_mm256_set_pd(jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY)); + const __m256d tmp_qloop_39 = _mm256_mul_pd(tmp_qloop_8,_mm256_set_pd(jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY)); + const __m256d tmp_qloop_40 = _mm256_mul_pd(tmp_qloop_22,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_20,_mm256_add_pd(tmp_qloop_36,tmp_qloop_37)),_mm256_mul_pd(tmp_qloop_21,_mm256_add_pd(tmp_qloop_38,tmp_qloop_39)))); + const __m256d tmp_qloop_41 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-8.0,-8.0,-8.0,-8.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])),_mm256_mul_pd(tmp_qloop_7,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_set_pd(4.0,4.0,4.0,4.0)); + const __m256d tmp_qloop_42 = _mm256_mul_pd(tmp_qloop_22,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_20,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_37,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_41,_mm256_set_pd(jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY)))),_mm256_mul_pd(tmp_qloop_21,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_39,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_41,_mm256_set_pd(jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY)))))); + const __m256d tmp_qloop_43 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-8.0,-8.0,-8.0,-8.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(tmp_qloop_8,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_set_pd(4.0,4.0,4.0,4.0)); + const __m256d tmp_qloop_44 = _mm256_mul_pd(tmp_qloop_22,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_20,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_36,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_43,_mm256_set_pd(jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY)))),_mm256_mul_pd(tmp_qloop_21,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_38,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_43,_mm256_set_pd(jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY)))))); + const __m256d q_tmp_0_0 = _mm256_mul_pd(tmp_qloop_23,_mm256_set_pd(tmp_qloop_6,tmp_qloop_6,tmp_qloop_6,tmp_qloop_6)); + const __m256d q_tmp_0_1 = _mm256_mul_pd(tmp_qloop_23,_mm256_set_pd(tmp_qloop_24,tmp_qloop_24,tmp_qloop_24,tmp_qloop_24)); + const __m256d q_tmp_0_2 = _mm256_mul_pd(tmp_qloop_23,_mm256_set_pd(tmp_qloop_25,tmp_qloop_25,tmp_qloop_25,tmp_qloop_25)); + const __m256d q_tmp_0_3 = _mm256_mul_pd(tmp_qloop_23,_mm256_set_pd(tmp_qloop_28,tmp_qloop_28,tmp_qloop_28,tmp_qloop_28)); + const __m256d q_tmp_0_4 = _mm256_mul_pd(tmp_qloop_23,_mm256_set_pd(tmp_qloop_30,tmp_qloop_30,tmp_qloop_30,tmp_qloop_30)); + const __m256d q_tmp_0_5 = _mm256_mul_pd(tmp_qloop_23,_mm256_set_pd(tmp_qloop_31,tmp_qloop_31,tmp_qloop_31,tmp_qloop_31)); + const __m256d q_tmp_1_0 = _mm256_mul_pd(tmp_qloop_33,_mm256_set_pd(tmp_qloop_6,tmp_qloop_6,tmp_qloop_6,tmp_qloop_6)); + const __m256d q_tmp_1_1 = _mm256_mul_pd(tmp_qloop_33,_mm256_set_pd(tmp_qloop_24,tmp_qloop_24,tmp_qloop_24,tmp_qloop_24)); + const __m256d q_tmp_1_2 = _mm256_mul_pd(tmp_qloop_33,_mm256_set_pd(tmp_qloop_25,tmp_qloop_25,tmp_qloop_25,tmp_qloop_25)); + const __m256d q_tmp_1_3 = _mm256_mul_pd(tmp_qloop_33,_mm256_set_pd(tmp_qloop_28,tmp_qloop_28,tmp_qloop_28,tmp_qloop_28)); + const __m256d q_tmp_1_4 = _mm256_mul_pd(tmp_qloop_33,_mm256_set_pd(tmp_qloop_30,tmp_qloop_30,tmp_qloop_30,tmp_qloop_30)); + const __m256d q_tmp_1_5 = _mm256_mul_pd(tmp_qloop_33,_mm256_set_pd(tmp_qloop_31,tmp_qloop_31,tmp_qloop_31,tmp_qloop_31)); + const __m256d q_tmp_2_0 = _mm256_mul_pd(tmp_qloop_35,_mm256_set_pd(tmp_qloop_6,tmp_qloop_6,tmp_qloop_6,tmp_qloop_6)); + const __m256d q_tmp_2_1 = _mm256_mul_pd(tmp_qloop_35,_mm256_set_pd(tmp_qloop_24,tmp_qloop_24,tmp_qloop_24,tmp_qloop_24)); + const __m256d q_tmp_2_2 = _mm256_mul_pd(tmp_qloop_35,_mm256_set_pd(tmp_qloop_25,tmp_qloop_25,tmp_qloop_25,tmp_qloop_25)); + const __m256d q_tmp_2_3 = _mm256_mul_pd(tmp_qloop_35,_mm256_set_pd(tmp_qloop_28,tmp_qloop_28,tmp_qloop_28,tmp_qloop_28)); + const __m256d q_tmp_2_4 = _mm256_mul_pd(tmp_qloop_35,_mm256_set_pd(tmp_qloop_30,tmp_qloop_30,tmp_qloop_30,tmp_qloop_30)); + const __m256d q_tmp_2_5 = _mm256_mul_pd(tmp_qloop_35,_mm256_set_pd(tmp_qloop_31,tmp_qloop_31,tmp_qloop_31,tmp_qloop_31)); + const __m256d q_tmp_3_0 = _mm256_mul_pd(tmp_qloop_40,_mm256_set_pd(tmp_qloop_6,tmp_qloop_6,tmp_qloop_6,tmp_qloop_6)); + const __m256d q_tmp_3_1 = _mm256_mul_pd(tmp_qloop_40,_mm256_set_pd(tmp_qloop_24,tmp_qloop_24,tmp_qloop_24,tmp_qloop_24)); + const __m256d q_tmp_3_2 = _mm256_mul_pd(tmp_qloop_40,_mm256_set_pd(tmp_qloop_25,tmp_qloop_25,tmp_qloop_25,tmp_qloop_25)); + const __m256d q_tmp_3_3 = _mm256_mul_pd(tmp_qloop_40,_mm256_set_pd(tmp_qloop_28,tmp_qloop_28,tmp_qloop_28,tmp_qloop_28)); + const __m256d q_tmp_3_4 = _mm256_mul_pd(tmp_qloop_40,_mm256_set_pd(tmp_qloop_30,tmp_qloop_30,tmp_qloop_30,tmp_qloop_30)); + const __m256d q_tmp_3_5 = _mm256_mul_pd(tmp_qloop_40,_mm256_set_pd(tmp_qloop_31,tmp_qloop_31,tmp_qloop_31,tmp_qloop_31)); + const __m256d q_tmp_4_0 = _mm256_mul_pd(tmp_qloop_42,_mm256_set_pd(tmp_qloop_6,tmp_qloop_6,tmp_qloop_6,tmp_qloop_6)); + const __m256d q_tmp_4_1 = _mm256_mul_pd(tmp_qloop_42,_mm256_set_pd(tmp_qloop_24,tmp_qloop_24,tmp_qloop_24,tmp_qloop_24)); + const __m256d q_tmp_4_2 = _mm256_mul_pd(tmp_qloop_42,_mm256_set_pd(tmp_qloop_25,tmp_qloop_25,tmp_qloop_25,tmp_qloop_25)); + const __m256d q_tmp_4_3 = _mm256_mul_pd(tmp_qloop_42,_mm256_set_pd(tmp_qloop_28,tmp_qloop_28,tmp_qloop_28,tmp_qloop_28)); + const __m256d q_tmp_4_4 = _mm256_mul_pd(tmp_qloop_42,_mm256_set_pd(tmp_qloop_30,tmp_qloop_30,tmp_qloop_30,tmp_qloop_30)); + const __m256d q_tmp_4_5 = _mm256_mul_pd(tmp_qloop_42,_mm256_set_pd(tmp_qloop_31,tmp_qloop_31,tmp_qloop_31,tmp_qloop_31)); + const __m256d q_tmp_5_0 = _mm256_mul_pd(tmp_qloop_44,_mm256_set_pd(tmp_qloop_6,tmp_qloop_6,tmp_qloop_6,tmp_qloop_6)); + const __m256d q_tmp_5_1 = _mm256_mul_pd(tmp_qloop_44,_mm256_set_pd(tmp_qloop_24,tmp_qloop_24,tmp_qloop_24,tmp_qloop_24)); + const __m256d q_tmp_5_2 = _mm256_mul_pd(tmp_qloop_44,_mm256_set_pd(tmp_qloop_25,tmp_qloop_25,tmp_qloop_25,tmp_qloop_25)); + const __m256d q_tmp_5_3 = _mm256_mul_pd(tmp_qloop_44,_mm256_set_pd(tmp_qloop_28,tmp_qloop_28,tmp_qloop_28,tmp_qloop_28)); + const __m256d q_tmp_5_4 = _mm256_mul_pd(tmp_qloop_44,_mm256_set_pd(tmp_qloop_30,tmp_qloop_30,tmp_qloop_30,tmp_qloop_30)); + const __m256d q_tmp_5_5 = _mm256_mul_pd(tmp_qloop_44,_mm256_set_pd(tmp_qloop_31,tmp_qloop_31,tmp_qloop_31,tmp_qloop_31)); + q_acc_0_0 = _mm256_add_pd(q_acc_0_0,q_tmp_0_0); + q_acc_0_1 = _mm256_add_pd(q_acc_0_1,q_tmp_0_1); + q_acc_0_2 = _mm256_add_pd(q_acc_0_2,q_tmp_0_2); + q_acc_0_3 = _mm256_add_pd(q_acc_0_3,q_tmp_0_3); + q_acc_0_4 = _mm256_add_pd(q_acc_0_4,q_tmp_0_4); + q_acc_0_5 = _mm256_add_pd(q_acc_0_5,q_tmp_0_5); + q_acc_1_0 = _mm256_add_pd(q_acc_1_0,q_tmp_1_0); + q_acc_1_1 = _mm256_add_pd(q_acc_1_1,q_tmp_1_1); + q_acc_1_2 = _mm256_add_pd(q_acc_1_2,q_tmp_1_2); + q_acc_1_3 = _mm256_add_pd(q_acc_1_3,q_tmp_1_3); + q_acc_1_4 = _mm256_add_pd(q_acc_1_4,q_tmp_1_4); + q_acc_1_5 = _mm256_add_pd(q_acc_1_5,q_tmp_1_5); + q_acc_2_0 = _mm256_add_pd(q_acc_2_0,q_tmp_2_0); + q_acc_2_1 = _mm256_add_pd(q_acc_2_1,q_tmp_2_1); + q_acc_2_2 = _mm256_add_pd(q_acc_2_2,q_tmp_2_2); + q_acc_2_3 = _mm256_add_pd(q_acc_2_3,q_tmp_2_3); + q_acc_2_4 = _mm256_add_pd(q_acc_2_4,q_tmp_2_4); + q_acc_2_5 = _mm256_add_pd(q_acc_2_5,q_tmp_2_5); + q_acc_3_0 = _mm256_add_pd(q_acc_3_0,q_tmp_3_0); + q_acc_3_1 = _mm256_add_pd(q_acc_3_1,q_tmp_3_1); + q_acc_3_2 = _mm256_add_pd(q_acc_3_2,q_tmp_3_2); + q_acc_3_3 = _mm256_add_pd(q_acc_3_3,q_tmp_3_3); + q_acc_3_4 = _mm256_add_pd(q_acc_3_4,q_tmp_3_4); + q_acc_3_5 = _mm256_add_pd(q_acc_3_5,q_tmp_3_5); + q_acc_4_0 = _mm256_add_pd(q_acc_4_0,q_tmp_4_0); + q_acc_4_1 = _mm256_add_pd(q_acc_4_1,q_tmp_4_1); + q_acc_4_2 = _mm256_add_pd(q_acc_4_2,q_tmp_4_2); + q_acc_4_3 = _mm256_add_pd(q_acc_4_3,q_tmp_4_3); + q_acc_4_4 = _mm256_add_pd(q_acc_4_4,q_tmp_4_4); + q_acc_4_5 = _mm256_add_pd(q_acc_4_5,q_tmp_4_5); + q_acc_5_0 = _mm256_add_pd(q_acc_5_0,q_tmp_5_0); + q_acc_5_1 = _mm256_add_pd(q_acc_5_1,q_tmp_5_1); + q_acc_5_2 = _mm256_add_pd(q_acc_5_2,q_tmp_5_2); + q_acc_5_3 = _mm256_add_pd(q_acc_5_3,q_tmp_5_3); + q_acc_5_4 = _mm256_add_pd(q_acc_5_4,q_tmp_5_4); + q_acc_5_5 = _mm256_add_pd(q_acc_5_5,q_tmp_5_5); + } + const __m256d elMatVec_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_0_0,src_dof_0),_mm256_mul_pd(q_acc_0_1,src_dof_1)),_mm256_mul_pd(q_acc_0_2,src_dof_2)),_mm256_mul_pd(q_acc_0_3,src_dof_3)),_mm256_mul_pd(q_acc_0_4,src_dof_4)),_mm256_mul_pd(q_acc_0_5,src_dof_5)); + const __m256d elMatVec_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_1_0,src_dof_0),_mm256_mul_pd(q_acc_1_1,src_dof_1)),_mm256_mul_pd(q_acc_1_2,src_dof_2)),_mm256_mul_pd(q_acc_1_3,src_dof_3)),_mm256_mul_pd(q_acc_1_4,src_dof_4)),_mm256_mul_pd(q_acc_1_5,src_dof_5)); + const __m256d elMatVec_2 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_2_0,src_dof_0),_mm256_mul_pd(q_acc_2_1,src_dof_1)),_mm256_mul_pd(q_acc_2_2,src_dof_2)),_mm256_mul_pd(q_acc_2_3,src_dof_3)),_mm256_mul_pd(q_acc_2_4,src_dof_4)),_mm256_mul_pd(q_acc_2_5,src_dof_5)); + const __m256d elMatVec_3 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_3_0,src_dof_0),_mm256_mul_pd(q_acc_3_1,src_dof_1)),_mm256_mul_pd(q_acc_3_2,src_dof_2)),_mm256_mul_pd(q_acc_3_3,src_dof_3)),_mm256_mul_pd(q_acc_3_4,src_dof_4)),_mm256_mul_pd(q_acc_3_5,src_dof_5)); + const __m256d elMatVec_4 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_4_0,src_dof_0),_mm256_mul_pd(q_acc_4_1,src_dof_1)),_mm256_mul_pd(q_acc_4_2,src_dof_2)),_mm256_mul_pd(q_acc_4_3,src_dof_3)),_mm256_mul_pd(q_acc_4_4,src_dof_4)),_mm256_mul_pd(q_acc_4_5,src_dof_5)); + const __m256d elMatVec_5 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_5_0,src_dof_0),_mm256_mul_pd(q_acc_5_1,src_dof_1)),_mm256_mul_pd(q_acc_5_2,src_dof_2)),_mm256_mul_pd(q_acc_5_3,src_dof_3)),_mm256_mul_pd(q_acc_5_4,src_dof_4)),_mm256_mul_pd(q_acc_5_5,src_dof_5)); + _mm256_storeu_pd(&_data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))],_mm256_add_pd(elMatVec_0,_mm256_loadu_pd(& _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]))); + _mm256_storeu_pd(&_data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1],_mm256_add_pd(elMatVec_1,_mm256_loadu_pd(& _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]))); + _mm256_storeu_pd(&_data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))],_mm256_add_pd(elMatVec_2,_mm256_loadu_pd(& _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]))); + _mm256_storeu_pd(&_data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))],_mm256_add_pd(elMatVec_3,_mm256_loadu_pd(& _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]))); + _mm256_storeu_pd(&_data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))],_mm256_add_pd(elMatVec_4,_mm256_loadu_pd(& _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]))); + _mm256_storeu_pd(&_data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))],_mm256_add_pd(elMatVec_5,_mm256_loadu_pd(& _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]))); + } + for (int64_t ctr_0 = (int64_t)((-ctr_1 + micro_edges_per_macro_edge) / (4)) * (4); ctr_0 < -ctr_1 + micro_edges_per_macro_edge; ctr_0 += 1) + { + + const int64_t phantom_ctr_0 = ctr_0; + real_t _data_float_loop_ctr_array_dim_0[4]; + _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0; + _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1; + _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2; + _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3; + real_t _data_float_loop_ctr_array_dim_1[4]; + _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1; + + const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; + const real_t src_dof_0 = _data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]; + const real_t src_dof_1 = _data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + const real_t src_dof_2 = _data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t src_dof_3 = _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t src_dof_4 = _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t src_dof_5 = _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]; + const real_t diffusivity_times_delta_dof_0 = _data_diffusivity_times_deltaVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]; + const real_t diffusivity_times_delta_dof_1 = _data_diffusivity_times_deltaVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + const real_t diffusivity_times_delta_dof_2 = _data_diffusivity_times_deltaVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t diffusivity_times_delta_dof_3 = _data_diffusivity_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t diffusivity_times_delta_dof_4 = _data_diffusivity_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t diffusivity_times_delta_dof_5 = _data_diffusivity_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]; + const real_t wx_dof_0 = _data_wxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]; + const real_t wx_dof_1 = _data_wxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + const real_t wx_dof_2 = _data_wxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t wx_dof_3 = _data_wxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t wx_dof_4 = _data_wxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t wx_dof_5 = _data_wxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]; + const real_t wy_dof_0 = _data_wyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]; + const real_t wy_dof_1 = _data_wyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + const real_t wy_dof_2 = _data_wyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t wy_dof_3 = _data_wyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t wy_dof_4 = _data_wyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t wy_dof_5 = _data_wyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]; + real_t q_acc_0_0 = 0.0; + real_t q_acc_0_1 = 0.0; + real_t q_acc_0_2 = 0.0; + real_t q_acc_0_3 = 0.0; + real_t q_acc_0_4 = 0.0; + real_t q_acc_0_5 = 0.0; + real_t q_acc_1_0 = 0.0; + real_t q_acc_1_1 = 0.0; + real_t q_acc_1_2 = 0.0; + real_t q_acc_1_3 = 0.0; + real_t q_acc_1_4 = 0.0; + real_t q_acc_1_5 = 0.0; + real_t q_acc_2_0 = 0.0; + real_t q_acc_2_1 = 0.0; + real_t q_acc_2_2 = 0.0; + real_t q_acc_2_3 = 0.0; + real_t q_acc_2_4 = 0.0; + real_t q_acc_2_5 = 0.0; + real_t q_acc_3_0 = 0.0; + real_t q_acc_3_1 = 0.0; + real_t q_acc_3_2 = 0.0; + real_t q_acc_3_3 = 0.0; + real_t q_acc_3_4 = 0.0; + real_t q_acc_3_5 = 0.0; + real_t q_acc_4_0 = 0.0; + real_t q_acc_4_1 = 0.0; + real_t q_acc_4_2 = 0.0; + real_t q_acc_4_3 = 0.0; + real_t q_acc_4_4 = 0.0; + real_t q_acc_4_5 = 0.0; + real_t q_acc_5_0 = 0.0; + real_t q_acc_5_1 = 0.0; + real_t q_acc_5_2 = 0.0; + real_t q_acc_5_3 = 0.0; + real_t q_acc_5_4 = 0.0; + real_t q_acc_5_5 = 0.0; + for (int64_t q = 0; q < 4; q += 1) + { + const real_t tmp_qloop_7 = 4.0*_data_q_p_0[q]; + const real_t tmp_qloop_8 = 4.0*_data_q_p_1[q]; + const real_t tmp_qloop_9 = tmp_qloop_7 + tmp_qloop_8 - 3.0; + const real_t tmp_qloop_10 = tmp_qloop_7*_data_q_p_1[q]; + const real_t tmp_qloop_11 = (_data_q_p_0[q]*_data_q_p_0[q]); + const real_t tmp_qloop_12 = tmp_qloop_11*2.0; + const real_t tmp_qloop_13 = tmp_qloop_12 - _data_q_p_0[q]; + const real_t tmp_qloop_14 = (_data_q_p_1[q]*_data_q_p_1[q]); + const real_t tmp_qloop_15 = tmp_qloop_14*2.0; + const real_t tmp_qloop_16 = tmp_qloop_15 - _data_q_p_1[q]; + const real_t tmp_qloop_17 = -tmp_qloop_10 + tmp_qloop_14*-4.0 + tmp_qloop_8; + const real_t tmp_qloop_18 = -tmp_qloop_10 + tmp_qloop_11*-4.0 + tmp_qloop_7; + const real_t tmp_qloop_19 = tmp_qloop_10 + tmp_qloop_12 + tmp_qloop_15 - 3.0*_data_q_p_0[q] - 3.0*_data_q_p_1[q] + 1.0; + const real_t tmp_qloop_20 = tmp_qloop_10*wx_dof_3 + tmp_qloop_13*wx_dof_1 + tmp_qloop_16*wx_dof_2 + tmp_qloop_17*wx_dof_4 + tmp_qloop_18*wx_dof_5 + tmp_qloop_19*wx_dof_0; + const real_t tmp_qloop_21 = tmp_qloop_10*wy_dof_3 + tmp_qloop_13*wy_dof_1 + tmp_qloop_16*wy_dof_2 + tmp_qloop_17*wy_dof_4 + tmp_qloop_18*wy_dof_5 + tmp_qloop_19*wy_dof_0; + const real_t tmp_qloop_22 = abs_det_jac_affine_GRAY*(diffusivity_times_delta_dof_0*tmp_qloop_19 + diffusivity_times_delta_dof_1*tmp_qloop_13 + diffusivity_times_delta_dof_2*tmp_qloop_16 + diffusivity_times_delta_dof_3*tmp_qloop_10 + diffusivity_times_delta_dof_4*tmp_qloop_17 + diffusivity_times_delta_dof_5*tmp_qloop_18)*_data_q_w[q]; + const real_t tmp_qloop_23 = tmp_qloop_22*(tmp_qloop_20*(jac_affine_inv_0_0_GRAY*tmp_qloop_9 + jac_affine_inv_1_0_GRAY*tmp_qloop_9) + tmp_qloop_21*(jac_affine_inv_0_1_GRAY*tmp_qloop_9 + jac_affine_inv_1_1_GRAY*tmp_qloop_9)); + const real_t tmp_qloop_32 = tmp_qloop_7 - 1.0; + const real_t tmp_qloop_33 = tmp_qloop_22*(jac_affine_inv_0_0_GRAY*tmp_qloop_20*tmp_qloop_32 + jac_affine_inv_0_1_GRAY*tmp_qloop_21*tmp_qloop_32); + const real_t tmp_qloop_34 = tmp_qloop_8 - 1.0; + const real_t tmp_qloop_35 = tmp_qloop_22*(jac_affine_inv_1_0_GRAY*tmp_qloop_20*tmp_qloop_34 + jac_affine_inv_1_1_GRAY*tmp_qloop_21*tmp_qloop_34); + const real_t tmp_qloop_36 = jac_affine_inv_1_0_GRAY*tmp_qloop_7; + const real_t tmp_qloop_37 = jac_affine_inv_0_0_GRAY*tmp_qloop_8; + const real_t tmp_qloop_38 = jac_affine_inv_1_1_GRAY*tmp_qloop_7; + const real_t tmp_qloop_39 = jac_affine_inv_0_1_GRAY*tmp_qloop_8; + const real_t tmp_qloop_40 = tmp_qloop_22*(tmp_qloop_20*(tmp_qloop_36 + tmp_qloop_37) + tmp_qloop_21*(tmp_qloop_38 + tmp_qloop_39)); + const real_t tmp_qloop_41 = -tmp_qloop_7 - 8.0*_data_q_p_1[q] + 4.0; + const real_t tmp_qloop_42 = tmp_qloop_22*(tmp_qloop_20*(jac_affine_inv_1_0_GRAY*tmp_qloop_41 - tmp_qloop_37) + tmp_qloop_21*(jac_affine_inv_1_1_GRAY*tmp_qloop_41 - tmp_qloop_39)); + const real_t tmp_qloop_43 = -tmp_qloop_8 - 8.0*_data_q_p_0[q] + 4.0; + const real_t tmp_qloop_44 = tmp_qloop_22*(tmp_qloop_20*(jac_affine_inv_0_0_GRAY*tmp_qloop_43 - tmp_qloop_36) + tmp_qloop_21*(jac_affine_inv_0_1_GRAY*tmp_qloop_43 - tmp_qloop_38)); + const real_t q_tmp_0_0 = tmp_qloop_23*tmp_qloop_6; + const real_t q_tmp_0_1 = tmp_qloop_23*tmp_qloop_24; + const real_t q_tmp_0_2 = tmp_qloop_23*tmp_qloop_25; + const real_t q_tmp_0_3 = tmp_qloop_23*tmp_qloop_28; + const real_t q_tmp_0_4 = tmp_qloop_23*tmp_qloop_30; + const real_t q_tmp_0_5 = tmp_qloop_23*tmp_qloop_31; + const real_t q_tmp_1_0 = tmp_qloop_33*tmp_qloop_6; + const real_t q_tmp_1_1 = tmp_qloop_24*tmp_qloop_33; + const real_t q_tmp_1_2 = tmp_qloop_25*tmp_qloop_33; + const real_t q_tmp_1_3 = tmp_qloop_28*tmp_qloop_33; + const real_t q_tmp_1_4 = tmp_qloop_30*tmp_qloop_33; + const real_t q_tmp_1_5 = tmp_qloop_31*tmp_qloop_33; + const real_t q_tmp_2_0 = tmp_qloop_35*tmp_qloop_6; + const real_t q_tmp_2_1 = tmp_qloop_24*tmp_qloop_35; + const real_t q_tmp_2_2 = tmp_qloop_25*tmp_qloop_35; + const real_t q_tmp_2_3 = tmp_qloop_28*tmp_qloop_35; + const real_t q_tmp_2_4 = tmp_qloop_30*tmp_qloop_35; + const real_t q_tmp_2_5 = tmp_qloop_31*tmp_qloop_35; + const real_t q_tmp_3_0 = tmp_qloop_40*tmp_qloop_6; + const real_t q_tmp_3_1 = tmp_qloop_24*tmp_qloop_40; + const real_t q_tmp_3_2 = tmp_qloop_25*tmp_qloop_40; + const real_t q_tmp_3_3 = tmp_qloop_28*tmp_qloop_40; + const real_t q_tmp_3_4 = tmp_qloop_30*tmp_qloop_40; + const real_t q_tmp_3_5 = tmp_qloop_31*tmp_qloop_40; + const real_t q_tmp_4_0 = tmp_qloop_42*tmp_qloop_6; + const real_t q_tmp_4_1 = tmp_qloop_24*tmp_qloop_42; + const real_t q_tmp_4_2 = tmp_qloop_25*tmp_qloop_42; + const real_t q_tmp_4_3 = tmp_qloop_28*tmp_qloop_42; + const real_t q_tmp_4_4 = tmp_qloop_30*tmp_qloop_42; + const real_t q_tmp_4_5 = tmp_qloop_31*tmp_qloop_42; + const real_t q_tmp_5_0 = tmp_qloop_44*tmp_qloop_6; + const real_t q_tmp_5_1 = tmp_qloop_24*tmp_qloop_44; + const real_t q_tmp_5_2 = tmp_qloop_25*tmp_qloop_44; + const real_t q_tmp_5_3 = tmp_qloop_28*tmp_qloop_44; + const real_t q_tmp_5_4 = tmp_qloop_30*tmp_qloop_44; + const real_t q_tmp_5_5 = tmp_qloop_31*tmp_qloop_44; + q_acc_0_0 = q_acc_0_0 + q_tmp_0_0; + q_acc_0_1 = q_acc_0_1 + q_tmp_0_1; + q_acc_0_2 = q_acc_0_2 + q_tmp_0_2; + q_acc_0_3 = q_acc_0_3 + q_tmp_0_3; + q_acc_0_4 = q_acc_0_4 + q_tmp_0_4; + q_acc_0_5 = q_acc_0_5 + q_tmp_0_5; + q_acc_1_0 = q_acc_1_0 + q_tmp_1_0; + q_acc_1_1 = q_acc_1_1 + q_tmp_1_1; + q_acc_1_2 = q_acc_1_2 + q_tmp_1_2; + q_acc_1_3 = q_acc_1_3 + q_tmp_1_3; + q_acc_1_4 = q_acc_1_4 + q_tmp_1_4; + q_acc_1_5 = q_acc_1_5 + q_tmp_1_5; + q_acc_2_0 = q_acc_2_0 + q_tmp_2_0; + q_acc_2_1 = q_acc_2_1 + q_tmp_2_1; + q_acc_2_2 = q_acc_2_2 + q_tmp_2_2; + q_acc_2_3 = q_acc_2_3 + q_tmp_2_3; + q_acc_2_4 = q_acc_2_4 + q_tmp_2_4; + q_acc_2_5 = q_acc_2_5 + q_tmp_2_5; + q_acc_3_0 = q_acc_3_0 + q_tmp_3_0; + q_acc_3_1 = q_acc_3_1 + q_tmp_3_1; + q_acc_3_2 = q_acc_3_2 + q_tmp_3_2; + q_acc_3_3 = q_acc_3_3 + q_tmp_3_3; + q_acc_3_4 = q_acc_3_4 + q_tmp_3_4; + q_acc_3_5 = q_acc_3_5 + q_tmp_3_5; + q_acc_4_0 = q_acc_4_0 + q_tmp_4_0; + q_acc_4_1 = q_acc_4_1 + q_tmp_4_1; + q_acc_4_2 = q_acc_4_2 + q_tmp_4_2; + q_acc_4_3 = q_acc_4_3 + q_tmp_4_3; + q_acc_4_4 = q_acc_4_4 + q_tmp_4_4; + q_acc_4_5 = q_acc_4_5 + q_tmp_4_5; + q_acc_5_0 = q_acc_5_0 + q_tmp_5_0; + q_acc_5_1 = q_acc_5_1 + q_tmp_5_1; + q_acc_5_2 = q_acc_5_2 + q_tmp_5_2; + q_acc_5_3 = q_acc_5_3 + q_tmp_5_3; + q_acc_5_4 = q_acc_5_4 + q_tmp_5_4; + q_acc_5_5 = q_acc_5_5 + q_tmp_5_5; + } + const real_t elMatVec_0 = q_acc_0_0*src_dof_0 + q_acc_0_1*src_dof_1 + q_acc_0_2*src_dof_2 + q_acc_0_3*src_dof_3 + q_acc_0_4*src_dof_4 + q_acc_0_5*src_dof_5; + const real_t elMatVec_1 = q_acc_1_0*src_dof_0 + q_acc_1_1*src_dof_1 + q_acc_1_2*src_dof_2 + q_acc_1_3*src_dof_3 + q_acc_1_4*src_dof_4 + q_acc_1_5*src_dof_5; + const real_t elMatVec_2 = q_acc_2_0*src_dof_0 + q_acc_2_1*src_dof_1 + q_acc_2_2*src_dof_2 + q_acc_2_3*src_dof_3 + q_acc_2_4*src_dof_4 + q_acc_2_5*src_dof_5; + const real_t elMatVec_3 = q_acc_3_0*src_dof_0 + q_acc_3_1*src_dof_1 + q_acc_3_2*src_dof_2 + q_acc_3_3*src_dof_3 + q_acc_3_4*src_dof_4 + q_acc_3_5*src_dof_5; + const real_t elMatVec_4 = q_acc_4_0*src_dof_0 + q_acc_4_1*src_dof_1 + q_acc_4_2*src_dof_2 + q_acc_4_3*src_dof_3 + q_acc_4_4*src_dof_4 + q_acc_4_5*src_dof_5; + const real_t elMatVec_5 = q_acc_5_0*src_dof_0 + q_acc_5_1*src_dof_1 + q_acc_5_2*src_dof_2 + q_acc_5_3*src_dof_3 + q_acc_5_4*src_dof_4 + q_acc_5_5*src_dof_5; + _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))] = elMatVec_0 + _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]; + _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1] = elMatVec_1 + _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatVec_2 + _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))] = elMatVec_3 + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))] = elMatVec_4 + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))] = elMatVec_5 + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]; + } + } + } + const real_t tmp_coords_jac_0_BLUE = 1.0 / (micro_edges_per_macro_edge_float)*1.0; + const real_t tmp_coords_jac_1_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0); + const real_t tmp_coords_jac_2_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1); + const real_t tmp_coords_jac_3_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0); + const real_t tmp_coords_jac_4_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1); + const real_t p_affine_const_0_0_BLUE = tmp_coords_jac_1_BLUE; + const real_t p_affine_const_0_1_BLUE = tmp_coords_jac_2_BLUE; + const real_t p_affine_const_1_0_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_3_BLUE; + const real_t p_affine_const_1_1_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_4_BLUE; + const real_t p_affine_const_2_0_BLUE = tmp_coords_jac_1_BLUE + tmp_coords_jac_3_BLUE; + const real_t p_affine_const_2_1_BLUE = tmp_coords_jac_2_BLUE + tmp_coords_jac_4_BLUE; + const real_t jac_affine_0_0_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_1_0_BLUE; + const real_t jac_affine_0_1_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_2_0_BLUE; + const real_t jac_affine_1_0_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_1_1_BLUE; + const real_t jac_affine_1_1_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_2_1_BLUE; + const real_t tmp_coords_jac_5_BLUE = jac_affine_0_0_BLUE*jac_affine_1_1_BLUE - jac_affine_0_1_BLUE*jac_affine_1_0_BLUE; + const real_t tmp_coords_jac_6_BLUE = 1.0 / (tmp_coords_jac_5_BLUE); + const real_t jac_affine_inv_0_0_BLUE = jac_affine_1_1_BLUE*tmp_coords_jac_6_BLUE; + const real_t jac_affine_inv_0_1_BLUE = -jac_affine_0_1_BLUE*tmp_coords_jac_6_BLUE; + const real_t jac_affine_inv_1_0_BLUE = -jac_affine_1_0_BLUE*tmp_coords_jac_6_BLUE; + const real_t jac_affine_inv_1_1_BLUE = jac_affine_0_0_BLUE*tmp_coords_jac_6_BLUE; + const real_t abs_det_jac_affine_BLUE = abs(tmp_coords_jac_5_BLUE); + const real_t tmp_moved_constant_0 = jac_affine_inv_0_0_BLUE*4.0; + const real_t tmp_moved_constant_1 = jac_affine_inv_1_0_BLUE*4.0; + const real_t tmp_moved_constant_2 = tmp_moved_constant_0 + tmp_moved_constant_1; + const real_t tmp_moved_constant_3 = jac_affine_inv_0_1_BLUE*4.0; + const real_t tmp_moved_constant_4 = jac_affine_inv_1_1_BLUE*4.0; + const real_t tmp_moved_constant_5 = tmp_moved_constant_3 + tmp_moved_constant_4; + const real_t tmp_moved_constant_6 = jac_affine_inv_0_0_BLUE*tmp_moved_constant_2 + jac_affine_inv_0_1_BLUE*tmp_moved_constant_5 + jac_affine_inv_1_0_BLUE*tmp_moved_constant_2 + jac_affine_inv_1_1_BLUE*tmp_moved_constant_5; + const real_t tmp_moved_constant_7 = (jac_affine_inv_0_0_BLUE*jac_affine_inv_0_0_BLUE)*4.0 + (jac_affine_inv_0_1_BLUE*jac_affine_inv_0_1_BLUE)*4.0; + const real_t tmp_moved_constant_8 = (jac_affine_inv_1_0_BLUE*jac_affine_inv_1_0_BLUE)*4.0 + (jac_affine_inv_1_1_BLUE*jac_affine_inv_1_1_BLUE)*4.0; + const real_t tmp_moved_constant_9 = jac_affine_inv_1_0_BLUE*8.0; + const real_t tmp_moved_constant_10 = jac_affine_inv_1_1_BLUE*8.0; + const real_t tmp_moved_constant_11 = jac_affine_inv_0_0_BLUE*tmp_moved_constant_9 + jac_affine_inv_0_1_BLUE*tmp_moved_constant_10; + const real_t tmp_moved_constant_12 = jac_affine_inv_1_0_BLUE*tmp_moved_constant_0 + jac_affine_inv_1_1_BLUE*tmp_moved_constant_3; + const real_t tmp_moved_constant_13 = jac_affine_inv_1_0_BLUE*(-tmp_moved_constant_0 - tmp_moved_constant_9) + jac_affine_inv_1_1_BLUE*(-tmp_moved_constant_10 - tmp_moved_constant_3) - tmp_moved_constant_12; + const real_t tmp_moved_constant_14 = jac_affine_inv_0_0_BLUE*(jac_affine_inv_0_0_BLUE*-8.0 - tmp_moved_constant_1) + jac_affine_inv_0_1_BLUE*(jac_affine_inv_0_1_BLUE*-8.0 - tmp_moved_constant_4) - tmp_moved_constant_12; + { + /* FaceType.BLUE */ + for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1) + { + for (int64_t ctr_0 = 0; ctr_0 < (int64_t)((-ctr_1 + micro_edges_per_macro_edge - 1) / (4)) * (4); ctr_0 += 4) + { + + const int64_t phantom_ctr_0 = ctr_0; + real_t _data_float_loop_ctr_array_dim_0[4]; + _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0; + _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1; + _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2; + _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3; + real_t _data_float_loop_ctr_array_dim_1[4]; + _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1; + + const __m256d p_affine_0_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)); + const __m256d p_affine_0_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)); + const __m256d p_affine_1_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)); + const __m256d p_affine_1_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)); + const __m256d p_affine_2_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)); + const __m256d p_affine_2_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)); + const __m256d src_dof_0 = _mm256_loadu_pd(& _data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]); + const __m256d src_dof_1 = _mm256_loadu_pd(& _data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]); + const __m256d src_dof_2 = _mm256_loadu_pd(& _data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]); + const __m256d src_dof_3 = _mm256_loadu_pd(& _data_srcEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]); + const __m256d src_dof_4 = _mm256_loadu_pd(& _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]); + const __m256d src_dof_5 = _mm256_loadu_pd(& _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]); + const __m256d diffusivity_times_delta_dof_0 = _mm256_loadu_pd(& _data_diffusivity_times_deltaVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]); + const __m256d diffusivity_times_delta_dof_1 = _mm256_loadu_pd(& _data_diffusivity_times_deltaVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]); + const __m256d diffusivity_times_delta_dof_2 = _mm256_loadu_pd(& _data_diffusivity_times_deltaVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]); + const __m256d diffusivity_times_delta_dof_3 = _mm256_loadu_pd(& _data_diffusivity_times_deltaEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]); + const __m256d diffusivity_times_delta_dof_4 = _mm256_loadu_pd(& _data_diffusivity_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]); + const __m256d diffusivity_times_delta_dof_5 = _mm256_loadu_pd(& _data_diffusivity_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]); + const __m256d wx_dof_0 = _mm256_loadu_pd(& _data_wxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]); + const __m256d wx_dof_1 = _mm256_loadu_pd(& _data_wxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]); + const __m256d wx_dof_2 = _mm256_loadu_pd(& _data_wxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]); + const __m256d wx_dof_3 = _mm256_loadu_pd(& _data_wxEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]); + const __m256d wx_dof_4 = _mm256_loadu_pd(& _data_wxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]); + const __m256d wx_dof_5 = _mm256_loadu_pd(& _data_wxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]); + const __m256d wy_dof_0 = _mm256_loadu_pd(& _data_wyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]); + const __m256d wy_dof_1 = _mm256_loadu_pd(& _data_wyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]); + const __m256d wy_dof_2 = _mm256_loadu_pd(& _data_wyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]); + const __m256d wy_dof_3 = _mm256_loadu_pd(& _data_wyEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]); + const __m256d wy_dof_4 = _mm256_loadu_pd(& _data_wyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]); + const __m256d wy_dof_5 = _mm256_loadu_pd(& _data_wyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]); + __m256d q_acc_0_0 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_1 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_2 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_3 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_4 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_0_5 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_0 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_1 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_2 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_3 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_4 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_5 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_0 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_1 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_2 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_3 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_4 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_5 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_0 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_1 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_2 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_3 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_4 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_5 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_4_0 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_4_1 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_4_2 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_4_3 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_4_4 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_4_5 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_5_0 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_5_1 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_5_2 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_5_3 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_5_4 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_5_5 = _mm256_set_pd(0.0,0.0,0.0,0.0); + for (int64_t q = 0; q < 4; q += 1) + { + const __m256d tmp_qloop_7 = _mm256_mul_pd(_mm256_set_pd(4.0,4.0,4.0,4.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])); + const __m256d tmp_qloop_8 = _mm256_mul_pd(_mm256_set_pd(4.0,4.0,4.0,4.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])); + const __m256d tmp_qloop_9 = _mm256_add_pd(_mm256_add_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),tmp_qloop_7),tmp_qloop_8); + const __m256d tmp_qloop_10 = _mm256_mul_pd(tmp_qloop_7,_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])); + const __m256d tmp_qloop_11 = _mm256_mul_pd(_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])); + const __m256d tmp_qloop_12 = _mm256_mul_pd(tmp_qloop_11,_mm256_set_pd(2.0,2.0,2.0,2.0)); + const __m256d tmp_qloop_13 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),tmp_qloop_12); + const __m256d tmp_qloop_14 = _mm256_mul_pd(_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])); + const __m256d tmp_qloop_15 = _mm256_mul_pd(tmp_qloop_14,_mm256_set_pd(2.0,2.0,2.0,2.0)); + const __m256d tmp_qloop_16 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])),tmp_qloop_15); + const __m256d tmp_qloop_17 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_14,_mm256_set_pd(-4.0,-4.0,-4.0,-4.0)),_mm256_mul_pd(tmp_qloop_10,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_8); + const __m256d tmp_qloop_18 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_11,_mm256_set_pd(-4.0,-4.0,-4.0,-4.0)),_mm256_mul_pd(tmp_qloop_10,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_7); + const __m256d tmp_qloop_19 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),tmp_qloop_10),tmp_qloop_12),tmp_qloop_15); + const __m256d tmp_qloop_20 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_10,wx_dof_3),_mm256_mul_pd(tmp_qloop_13,wx_dof_1)),_mm256_mul_pd(tmp_qloop_16,wx_dof_2)),_mm256_mul_pd(tmp_qloop_17,wx_dof_4)),_mm256_mul_pd(tmp_qloop_18,wx_dof_5)),_mm256_mul_pd(tmp_qloop_19,wx_dof_0)); + const __m256d tmp_qloop_21 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_10,wy_dof_3),_mm256_mul_pd(tmp_qloop_13,wy_dof_1)),_mm256_mul_pd(tmp_qloop_16,wy_dof_2)),_mm256_mul_pd(tmp_qloop_17,wy_dof_4)),_mm256_mul_pd(tmp_qloop_18,wy_dof_5)),_mm256_mul_pd(tmp_qloop_19,wy_dof_0)); + const __m256d tmp_qloop_22 = _mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(diffusivity_times_delta_dof_0,tmp_qloop_19),_mm256_mul_pd(diffusivity_times_delta_dof_1,tmp_qloop_13)),_mm256_mul_pd(diffusivity_times_delta_dof_2,tmp_qloop_16)),_mm256_mul_pd(diffusivity_times_delta_dof_3,tmp_qloop_10)),_mm256_mul_pd(diffusivity_times_delta_dof_4,tmp_qloop_17)),_mm256_mul_pd(diffusivity_times_delta_dof_5,tmp_qloop_18)),_mm256_set_pd(_data_q_w[q],_data_q_w[q],_data_q_w[q],_data_q_w[q])),_mm256_set_pd(abs_det_jac_affine_BLUE,abs_det_jac_affine_BLUE,abs_det_jac_affine_BLUE,abs_det_jac_affine_BLUE)); + const __m256d tmp_qloop_23 = _mm256_mul_pd(tmp_qloop_22,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_20,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_9,_mm256_set_pd(jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE)),_mm256_mul_pd(tmp_qloop_9,_mm256_set_pd(jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE)))),_mm256_mul_pd(tmp_qloop_21,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_9,_mm256_set_pd(jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE)),_mm256_mul_pd(tmp_qloop_9,_mm256_set_pd(jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE)))))); + const __m256d tmp_qloop_32 = _mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),tmp_qloop_7); + const __m256d tmp_qloop_33 = _mm256_mul_pd(tmp_qloop_22,_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_20,tmp_qloop_32),_mm256_set_pd(jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_21,tmp_qloop_32),_mm256_set_pd(jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE)))); + const __m256d tmp_qloop_34 = _mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),tmp_qloop_8); + const __m256d tmp_qloop_35 = _mm256_mul_pd(tmp_qloop_22,_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_20,tmp_qloop_34),_mm256_set_pd(jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_21,tmp_qloop_34),_mm256_set_pd(jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE)))); + const __m256d tmp_qloop_36 = _mm256_mul_pd(tmp_qloop_7,_mm256_set_pd(jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE)); + const __m256d tmp_qloop_37 = _mm256_mul_pd(tmp_qloop_8,_mm256_set_pd(jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE)); + const __m256d tmp_qloop_38 = _mm256_mul_pd(tmp_qloop_7,_mm256_set_pd(jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE)); + const __m256d tmp_qloop_39 = _mm256_mul_pd(tmp_qloop_8,_mm256_set_pd(jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE)); + const __m256d tmp_qloop_40 = _mm256_mul_pd(tmp_qloop_22,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_20,_mm256_add_pd(tmp_qloop_36,tmp_qloop_37)),_mm256_mul_pd(tmp_qloop_21,_mm256_add_pd(tmp_qloop_38,tmp_qloop_39)))); + const __m256d tmp_qloop_41 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-8.0,-8.0,-8.0,-8.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])),_mm256_mul_pd(tmp_qloop_7,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_set_pd(4.0,4.0,4.0,4.0)); + const __m256d tmp_qloop_42 = _mm256_mul_pd(tmp_qloop_22,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_20,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_37,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_41,_mm256_set_pd(jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE)))),_mm256_mul_pd(tmp_qloop_21,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_39,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_41,_mm256_set_pd(jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE)))))); + const __m256d tmp_qloop_43 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-8.0,-8.0,-8.0,-8.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(tmp_qloop_8,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_set_pd(4.0,4.0,4.0,4.0)); + const __m256d tmp_qloop_44 = _mm256_mul_pd(tmp_qloop_22,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_20,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_36,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_43,_mm256_set_pd(jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE)))),_mm256_mul_pd(tmp_qloop_21,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_38,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_43,_mm256_set_pd(jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE)))))); + const __m256d q_tmp_0_0 = _mm256_mul_pd(tmp_qloop_23,_mm256_set_pd(tmp_moved_constant_6,tmp_moved_constant_6,tmp_moved_constant_6,tmp_moved_constant_6)); + const __m256d q_tmp_0_1 = _mm256_mul_pd(tmp_qloop_23,_mm256_set_pd(tmp_moved_constant_7,tmp_moved_constant_7,tmp_moved_constant_7,tmp_moved_constant_7)); + const __m256d q_tmp_0_2 = _mm256_mul_pd(tmp_qloop_23,_mm256_set_pd(tmp_moved_constant_8,tmp_moved_constant_8,tmp_moved_constant_8,tmp_moved_constant_8)); + const __m256d q_tmp_0_3 = _mm256_mul_pd(tmp_qloop_23,_mm256_set_pd(tmp_moved_constant_11,tmp_moved_constant_11,tmp_moved_constant_11,tmp_moved_constant_11)); + const __m256d q_tmp_0_4 = _mm256_mul_pd(tmp_qloop_23,_mm256_set_pd(tmp_moved_constant_13,tmp_moved_constant_13,tmp_moved_constant_13,tmp_moved_constant_13)); + const __m256d q_tmp_0_5 = _mm256_mul_pd(tmp_qloop_23,_mm256_set_pd(tmp_moved_constant_14,tmp_moved_constant_14,tmp_moved_constant_14,tmp_moved_constant_14)); + const __m256d q_tmp_1_0 = _mm256_mul_pd(tmp_qloop_33,_mm256_set_pd(tmp_moved_constant_6,tmp_moved_constant_6,tmp_moved_constant_6,tmp_moved_constant_6)); + const __m256d q_tmp_1_1 = _mm256_mul_pd(tmp_qloop_33,_mm256_set_pd(tmp_moved_constant_7,tmp_moved_constant_7,tmp_moved_constant_7,tmp_moved_constant_7)); + const __m256d q_tmp_1_2 = _mm256_mul_pd(tmp_qloop_33,_mm256_set_pd(tmp_moved_constant_8,tmp_moved_constant_8,tmp_moved_constant_8,tmp_moved_constant_8)); + const __m256d q_tmp_1_3 = _mm256_mul_pd(tmp_qloop_33,_mm256_set_pd(tmp_moved_constant_11,tmp_moved_constant_11,tmp_moved_constant_11,tmp_moved_constant_11)); + const __m256d q_tmp_1_4 = _mm256_mul_pd(tmp_qloop_33,_mm256_set_pd(tmp_moved_constant_13,tmp_moved_constant_13,tmp_moved_constant_13,tmp_moved_constant_13)); + const __m256d q_tmp_1_5 = _mm256_mul_pd(tmp_qloop_33,_mm256_set_pd(tmp_moved_constant_14,tmp_moved_constant_14,tmp_moved_constant_14,tmp_moved_constant_14)); + const __m256d q_tmp_2_0 = _mm256_mul_pd(tmp_qloop_35,_mm256_set_pd(tmp_moved_constant_6,tmp_moved_constant_6,tmp_moved_constant_6,tmp_moved_constant_6)); + const __m256d q_tmp_2_1 = _mm256_mul_pd(tmp_qloop_35,_mm256_set_pd(tmp_moved_constant_7,tmp_moved_constant_7,tmp_moved_constant_7,tmp_moved_constant_7)); + const __m256d q_tmp_2_2 = _mm256_mul_pd(tmp_qloop_35,_mm256_set_pd(tmp_moved_constant_8,tmp_moved_constant_8,tmp_moved_constant_8,tmp_moved_constant_8)); + const __m256d q_tmp_2_3 = _mm256_mul_pd(tmp_qloop_35,_mm256_set_pd(tmp_moved_constant_11,tmp_moved_constant_11,tmp_moved_constant_11,tmp_moved_constant_11)); + const __m256d q_tmp_2_4 = _mm256_mul_pd(tmp_qloop_35,_mm256_set_pd(tmp_moved_constant_13,tmp_moved_constant_13,tmp_moved_constant_13,tmp_moved_constant_13)); + const __m256d q_tmp_2_5 = _mm256_mul_pd(tmp_qloop_35,_mm256_set_pd(tmp_moved_constant_14,tmp_moved_constant_14,tmp_moved_constant_14,tmp_moved_constant_14)); + const __m256d q_tmp_3_0 = _mm256_mul_pd(tmp_qloop_40,_mm256_set_pd(tmp_moved_constant_6,tmp_moved_constant_6,tmp_moved_constant_6,tmp_moved_constant_6)); + const __m256d q_tmp_3_1 = _mm256_mul_pd(tmp_qloop_40,_mm256_set_pd(tmp_moved_constant_7,tmp_moved_constant_7,tmp_moved_constant_7,tmp_moved_constant_7)); + const __m256d q_tmp_3_2 = _mm256_mul_pd(tmp_qloop_40,_mm256_set_pd(tmp_moved_constant_8,tmp_moved_constant_8,tmp_moved_constant_8,tmp_moved_constant_8)); + const __m256d q_tmp_3_3 = _mm256_mul_pd(tmp_qloop_40,_mm256_set_pd(tmp_moved_constant_11,tmp_moved_constant_11,tmp_moved_constant_11,tmp_moved_constant_11)); + const __m256d q_tmp_3_4 = _mm256_mul_pd(tmp_qloop_40,_mm256_set_pd(tmp_moved_constant_13,tmp_moved_constant_13,tmp_moved_constant_13,tmp_moved_constant_13)); + const __m256d q_tmp_3_5 = _mm256_mul_pd(tmp_qloop_40,_mm256_set_pd(tmp_moved_constant_14,tmp_moved_constant_14,tmp_moved_constant_14,tmp_moved_constant_14)); + const __m256d q_tmp_4_0 = _mm256_mul_pd(tmp_qloop_42,_mm256_set_pd(tmp_moved_constant_6,tmp_moved_constant_6,tmp_moved_constant_6,tmp_moved_constant_6)); + const __m256d q_tmp_4_1 = _mm256_mul_pd(tmp_qloop_42,_mm256_set_pd(tmp_moved_constant_7,tmp_moved_constant_7,tmp_moved_constant_7,tmp_moved_constant_7)); + const __m256d q_tmp_4_2 = _mm256_mul_pd(tmp_qloop_42,_mm256_set_pd(tmp_moved_constant_8,tmp_moved_constant_8,tmp_moved_constant_8,tmp_moved_constant_8)); + const __m256d q_tmp_4_3 = _mm256_mul_pd(tmp_qloop_42,_mm256_set_pd(tmp_moved_constant_11,tmp_moved_constant_11,tmp_moved_constant_11,tmp_moved_constant_11)); + const __m256d q_tmp_4_4 = _mm256_mul_pd(tmp_qloop_42,_mm256_set_pd(tmp_moved_constant_13,tmp_moved_constant_13,tmp_moved_constant_13,tmp_moved_constant_13)); + const __m256d q_tmp_4_5 = _mm256_mul_pd(tmp_qloop_42,_mm256_set_pd(tmp_moved_constant_14,tmp_moved_constant_14,tmp_moved_constant_14,tmp_moved_constant_14)); + const __m256d q_tmp_5_0 = _mm256_mul_pd(tmp_qloop_44,_mm256_set_pd(tmp_moved_constant_6,tmp_moved_constant_6,tmp_moved_constant_6,tmp_moved_constant_6)); + const __m256d q_tmp_5_1 = _mm256_mul_pd(tmp_qloop_44,_mm256_set_pd(tmp_moved_constant_7,tmp_moved_constant_7,tmp_moved_constant_7,tmp_moved_constant_7)); + const __m256d q_tmp_5_2 = _mm256_mul_pd(tmp_qloop_44,_mm256_set_pd(tmp_moved_constant_8,tmp_moved_constant_8,tmp_moved_constant_8,tmp_moved_constant_8)); + const __m256d q_tmp_5_3 = _mm256_mul_pd(tmp_qloop_44,_mm256_set_pd(tmp_moved_constant_11,tmp_moved_constant_11,tmp_moved_constant_11,tmp_moved_constant_11)); + const __m256d q_tmp_5_4 = _mm256_mul_pd(tmp_qloop_44,_mm256_set_pd(tmp_moved_constant_13,tmp_moved_constant_13,tmp_moved_constant_13,tmp_moved_constant_13)); + const __m256d q_tmp_5_5 = _mm256_mul_pd(tmp_qloop_44,_mm256_set_pd(tmp_moved_constant_14,tmp_moved_constant_14,tmp_moved_constant_14,tmp_moved_constant_14)); + q_acc_0_0 = _mm256_add_pd(q_acc_0_0,q_tmp_0_0); + q_acc_0_1 = _mm256_add_pd(q_acc_0_1,q_tmp_0_1); + q_acc_0_2 = _mm256_add_pd(q_acc_0_2,q_tmp_0_2); + q_acc_0_3 = _mm256_add_pd(q_acc_0_3,q_tmp_0_3); + q_acc_0_4 = _mm256_add_pd(q_acc_0_4,q_tmp_0_4); + q_acc_0_5 = _mm256_add_pd(q_acc_0_5,q_tmp_0_5); + q_acc_1_0 = _mm256_add_pd(q_acc_1_0,q_tmp_1_0); + q_acc_1_1 = _mm256_add_pd(q_acc_1_1,q_tmp_1_1); + q_acc_1_2 = _mm256_add_pd(q_acc_1_2,q_tmp_1_2); + q_acc_1_3 = _mm256_add_pd(q_acc_1_3,q_tmp_1_3); + q_acc_1_4 = _mm256_add_pd(q_acc_1_4,q_tmp_1_4); + q_acc_1_5 = _mm256_add_pd(q_acc_1_5,q_tmp_1_5); + q_acc_2_0 = _mm256_add_pd(q_acc_2_0,q_tmp_2_0); + q_acc_2_1 = _mm256_add_pd(q_acc_2_1,q_tmp_2_1); + q_acc_2_2 = _mm256_add_pd(q_acc_2_2,q_tmp_2_2); + q_acc_2_3 = _mm256_add_pd(q_acc_2_3,q_tmp_2_3); + q_acc_2_4 = _mm256_add_pd(q_acc_2_4,q_tmp_2_4); + q_acc_2_5 = _mm256_add_pd(q_acc_2_5,q_tmp_2_5); + q_acc_3_0 = _mm256_add_pd(q_acc_3_0,q_tmp_3_0); + q_acc_3_1 = _mm256_add_pd(q_acc_3_1,q_tmp_3_1); + q_acc_3_2 = _mm256_add_pd(q_acc_3_2,q_tmp_3_2); + q_acc_3_3 = _mm256_add_pd(q_acc_3_3,q_tmp_3_3); + q_acc_3_4 = _mm256_add_pd(q_acc_3_4,q_tmp_3_4); + q_acc_3_5 = _mm256_add_pd(q_acc_3_5,q_tmp_3_5); + q_acc_4_0 = _mm256_add_pd(q_acc_4_0,q_tmp_4_0); + q_acc_4_1 = _mm256_add_pd(q_acc_4_1,q_tmp_4_1); + q_acc_4_2 = _mm256_add_pd(q_acc_4_2,q_tmp_4_2); + q_acc_4_3 = _mm256_add_pd(q_acc_4_3,q_tmp_4_3); + q_acc_4_4 = _mm256_add_pd(q_acc_4_4,q_tmp_4_4); + q_acc_4_5 = _mm256_add_pd(q_acc_4_5,q_tmp_4_5); + q_acc_5_0 = _mm256_add_pd(q_acc_5_0,q_tmp_5_0); + q_acc_5_1 = _mm256_add_pd(q_acc_5_1,q_tmp_5_1); + q_acc_5_2 = _mm256_add_pd(q_acc_5_2,q_tmp_5_2); + q_acc_5_3 = _mm256_add_pd(q_acc_5_3,q_tmp_5_3); + q_acc_5_4 = _mm256_add_pd(q_acc_5_4,q_tmp_5_4); + q_acc_5_5 = _mm256_add_pd(q_acc_5_5,q_tmp_5_5); + } + const __m256d elMatVec_0 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_0_0,src_dof_0),_mm256_mul_pd(q_acc_0_1,src_dof_1)),_mm256_mul_pd(q_acc_0_2,src_dof_2)),_mm256_mul_pd(q_acc_0_3,src_dof_3)),_mm256_mul_pd(q_acc_0_4,src_dof_4)),_mm256_mul_pd(q_acc_0_5,src_dof_5)); + const __m256d elMatVec_1 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_1_0,src_dof_0),_mm256_mul_pd(q_acc_1_1,src_dof_1)),_mm256_mul_pd(q_acc_1_2,src_dof_2)),_mm256_mul_pd(q_acc_1_3,src_dof_3)),_mm256_mul_pd(q_acc_1_4,src_dof_4)),_mm256_mul_pd(q_acc_1_5,src_dof_5)); + const __m256d elMatVec_2 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_2_0,src_dof_0),_mm256_mul_pd(q_acc_2_1,src_dof_1)),_mm256_mul_pd(q_acc_2_2,src_dof_2)),_mm256_mul_pd(q_acc_2_3,src_dof_3)),_mm256_mul_pd(q_acc_2_4,src_dof_4)),_mm256_mul_pd(q_acc_2_5,src_dof_5)); + const __m256d elMatVec_3 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_3_0,src_dof_0),_mm256_mul_pd(q_acc_3_1,src_dof_1)),_mm256_mul_pd(q_acc_3_2,src_dof_2)),_mm256_mul_pd(q_acc_3_3,src_dof_3)),_mm256_mul_pd(q_acc_3_4,src_dof_4)),_mm256_mul_pd(q_acc_3_5,src_dof_5)); + const __m256d elMatVec_4 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_4_0,src_dof_0),_mm256_mul_pd(q_acc_4_1,src_dof_1)),_mm256_mul_pd(q_acc_4_2,src_dof_2)),_mm256_mul_pd(q_acc_4_3,src_dof_3)),_mm256_mul_pd(q_acc_4_4,src_dof_4)),_mm256_mul_pd(q_acc_4_5,src_dof_5)); + const __m256d elMatVec_5 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(q_acc_5_0,src_dof_0),_mm256_mul_pd(q_acc_5_1,src_dof_1)),_mm256_mul_pd(q_acc_5_2,src_dof_2)),_mm256_mul_pd(q_acc_5_3,src_dof_3)),_mm256_mul_pd(q_acc_5_4,src_dof_4)),_mm256_mul_pd(q_acc_5_5,src_dof_5)); + _mm256_storeu_pd(&_data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1],_mm256_add_pd(elMatVec_0,_mm256_loadu_pd(& _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]))); + _mm256_storeu_pd(&_data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))],_mm256_add_pd(elMatVec_1,_mm256_loadu_pd(& _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]))); + _mm256_storeu_pd(&_data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1],_mm256_add_pd(elMatVec_2,_mm256_loadu_pd(& _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]))); + _mm256_storeu_pd(&_data_dstEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))],_mm256_add_pd(elMatVec_3,_mm256_loadu_pd(& _data_dstEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]))); + _mm256_storeu_pd(&_data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1],_mm256_add_pd(elMatVec_4,_mm256_loadu_pd(& _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]))); + _mm256_storeu_pd(&_data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))],_mm256_add_pd(elMatVec_5,_mm256_loadu_pd(& _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]))); + } + for (int64_t ctr_0 = (int64_t)((-ctr_1 + micro_edges_per_macro_edge - 1) / (4)) * (4); ctr_0 < -ctr_1 + micro_edges_per_macro_edge - 1; ctr_0 += 1) + { + + const int64_t phantom_ctr_0 = ctr_0; + real_t _data_float_loop_ctr_array_dim_0[4]; + _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0; + _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1; + _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2; + _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3; + real_t _data_float_loop_ctr_array_dim_1[4]; + _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1; + + const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; + const real_t src_dof_0 = _data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + const real_t src_dof_1 = _data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t src_dof_2 = _data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]; + const real_t src_dof_3 = _data_srcEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t src_dof_4 = _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]; + const real_t src_dof_5 = _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t diffusivity_times_delta_dof_0 = _data_diffusivity_times_deltaVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + const real_t diffusivity_times_delta_dof_1 = _data_diffusivity_times_deltaVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t diffusivity_times_delta_dof_2 = _data_diffusivity_times_deltaVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]; + const real_t diffusivity_times_delta_dof_3 = _data_diffusivity_times_deltaEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t diffusivity_times_delta_dof_4 = _data_diffusivity_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]; + const real_t diffusivity_times_delta_dof_5 = _data_diffusivity_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t wx_dof_0 = _data_wxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + const real_t wx_dof_1 = _data_wxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t wx_dof_2 = _data_wxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]; + const real_t wx_dof_3 = _data_wxEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t wx_dof_4 = _data_wxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]; + const real_t wx_dof_5 = _data_wxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t wy_dof_0 = _data_wyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + const real_t wy_dof_1 = _data_wyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t wy_dof_2 = _data_wyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]; + const real_t wy_dof_3 = _data_wyEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t wy_dof_4 = _data_wyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]; + const real_t wy_dof_5 = _data_wyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + real_t q_acc_0_0 = 0.0; + real_t q_acc_0_1 = 0.0; + real_t q_acc_0_2 = 0.0; + real_t q_acc_0_3 = 0.0; + real_t q_acc_0_4 = 0.0; + real_t q_acc_0_5 = 0.0; + real_t q_acc_1_0 = 0.0; + real_t q_acc_1_1 = 0.0; + real_t q_acc_1_2 = 0.0; + real_t q_acc_1_3 = 0.0; + real_t q_acc_1_4 = 0.0; + real_t q_acc_1_5 = 0.0; + real_t q_acc_2_0 = 0.0; + real_t q_acc_2_1 = 0.0; + real_t q_acc_2_2 = 0.0; + real_t q_acc_2_3 = 0.0; + real_t q_acc_2_4 = 0.0; + real_t q_acc_2_5 = 0.0; + real_t q_acc_3_0 = 0.0; + real_t q_acc_3_1 = 0.0; + real_t q_acc_3_2 = 0.0; + real_t q_acc_3_3 = 0.0; + real_t q_acc_3_4 = 0.0; + real_t q_acc_3_5 = 0.0; + real_t q_acc_4_0 = 0.0; + real_t q_acc_4_1 = 0.0; + real_t q_acc_4_2 = 0.0; + real_t q_acc_4_3 = 0.0; + real_t q_acc_4_4 = 0.0; + real_t q_acc_4_5 = 0.0; + real_t q_acc_5_0 = 0.0; + real_t q_acc_5_1 = 0.0; + real_t q_acc_5_2 = 0.0; + real_t q_acc_5_3 = 0.0; + real_t q_acc_5_4 = 0.0; + real_t q_acc_5_5 = 0.0; + for (int64_t q = 0; q < 4; q += 1) + { + const real_t tmp_qloop_7 = 4.0*_data_q_p_0[q]; + const real_t tmp_qloop_8 = 4.0*_data_q_p_1[q]; + const real_t tmp_qloop_9 = tmp_qloop_7 + tmp_qloop_8 - 3.0; + const real_t tmp_qloop_10 = tmp_qloop_7*_data_q_p_1[q]; + const real_t tmp_qloop_11 = (_data_q_p_0[q]*_data_q_p_0[q]); + const real_t tmp_qloop_12 = tmp_qloop_11*2.0; + const real_t tmp_qloop_13 = tmp_qloop_12 - _data_q_p_0[q]; + const real_t tmp_qloop_14 = (_data_q_p_1[q]*_data_q_p_1[q]); + const real_t tmp_qloop_15 = tmp_qloop_14*2.0; + const real_t tmp_qloop_16 = tmp_qloop_15 - _data_q_p_1[q]; + const real_t tmp_qloop_17 = -tmp_qloop_10 + tmp_qloop_14*-4.0 + tmp_qloop_8; + const real_t tmp_qloop_18 = -tmp_qloop_10 + tmp_qloop_11*-4.0 + tmp_qloop_7; + const real_t tmp_qloop_19 = tmp_qloop_10 + tmp_qloop_12 + tmp_qloop_15 - 3.0*_data_q_p_0[q] - 3.0*_data_q_p_1[q] + 1.0; + const real_t tmp_qloop_20 = tmp_qloop_10*wx_dof_3 + tmp_qloop_13*wx_dof_1 + tmp_qloop_16*wx_dof_2 + tmp_qloop_17*wx_dof_4 + tmp_qloop_18*wx_dof_5 + tmp_qloop_19*wx_dof_0; + const real_t tmp_qloop_21 = tmp_qloop_10*wy_dof_3 + tmp_qloop_13*wy_dof_1 + tmp_qloop_16*wy_dof_2 + tmp_qloop_17*wy_dof_4 + tmp_qloop_18*wy_dof_5 + tmp_qloop_19*wy_dof_0; + const real_t tmp_qloop_22 = abs_det_jac_affine_BLUE*(diffusivity_times_delta_dof_0*tmp_qloop_19 + diffusivity_times_delta_dof_1*tmp_qloop_13 + diffusivity_times_delta_dof_2*tmp_qloop_16 + diffusivity_times_delta_dof_3*tmp_qloop_10 + diffusivity_times_delta_dof_4*tmp_qloop_17 + diffusivity_times_delta_dof_5*tmp_qloop_18)*_data_q_w[q]; + const real_t tmp_qloop_23 = tmp_qloop_22*(tmp_qloop_20*(jac_affine_inv_0_0_BLUE*tmp_qloop_9 + jac_affine_inv_1_0_BLUE*tmp_qloop_9) + tmp_qloop_21*(jac_affine_inv_0_1_BLUE*tmp_qloop_9 + jac_affine_inv_1_1_BLUE*tmp_qloop_9)); + const real_t tmp_qloop_32 = tmp_qloop_7 - 1.0; + const real_t tmp_qloop_33 = tmp_qloop_22*(jac_affine_inv_0_0_BLUE*tmp_qloop_20*tmp_qloop_32 + jac_affine_inv_0_1_BLUE*tmp_qloop_21*tmp_qloop_32); + const real_t tmp_qloop_34 = tmp_qloop_8 - 1.0; + const real_t tmp_qloop_35 = tmp_qloop_22*(jac_affine_inv_1_0_BLUE*tmp_qloop_20*tmp_qloop_34 + jac_affine_inv_1_1_BLUE*tmp_qloop_21*tmp_qloop_34); + const real_t tmp_qloop_36 = jac_affine_inv_1_0_BLUE*tmp_qloop_7; + const real_t tmp_qloop_37 = jac_affine_inv_0_0_BLUE*tmp_qloop_8; + const real_t tmp_qloop_38 = jac_affine_inv_1_1_BLUE*tmp_qloop_7; + const real_t tmp_qloop_39 = jac_affine_inv_0_1_BLUE*tmp_qloop_8; + const real_t tmp_qloop_40 = tmp_qloop_22*(tmp_qloop_20*(tmp_qloop_36 + tmp_qloop_37) + tmp_qloop_21*(tmp_qloop_38 + tmp_qloop_39)); + const real_t tmp_qloop_41 = -tmp_qloop_7 - 8.0*_data_q_p_1[q] + 4.0; + const real_t tmp_qloop_42 = tmp_qloop_22*(tmp_qloop_20*(jac_affine_inv_1_0_BLUE*tmp_qloop_41 - tmp_qloop_37) + tmp_qloop_21*(jac_affine_inv_1_1_BLUE*tmp_qloop_41 - tmp_qloop_39)); + const real_t tmp_qloop_43 = -tmp_qloop_8 - 8.0*_data_q_p_0[q] + 4.0; + const real_t tmp_qloop_44 = tmp_qloop_22*(tmp_qloop_20*(jac_affine_inv_0_0_BLUE*tmp_qloop_43 - tmp_qloop_36) + tmp_qloop_21*(jac_affine_inv_0_1_BLUE*tmp_qloop_43 - tmp_qloop_38)); + const real_t q_tmp_0_0 = tmp_moved_constant_6*tmp_qloop_23; + const real_t q_tmp_0_1 = tmp_moved_constant_7*tmp_qloop_23; + const real_t q_tmp_0_2 = tmp_moved_constant_8*tmp_qloop_23; + const real_t q_tmp_0_3 = tmp_moved_constant_11*tmp_qloop_23; + const real_t q_tmp_0_4 = tmp_moved_constant_13*tmp_qloop_23; + const real_t q_tmp_0_5 = tmp_moved_constant_14*tmp_qloop_23; + const real_t q_tmp_1_0 = tmp_moved_constant_6*tmp_qloop_33; + const real_t q_tmp_1_1 = tmp_moved_constant_7*tmp_qloop_33; + const real_t q_tmp_1_2 = tmp_moved_constant_8*tmp_qloop_33; + const real_t q_tmp_1_3 = tmp_moved_constant_11*tmp_qloop_33; + const real_t q_tmp_1_4 = tmp_moved_constant_13*tmp_qloop_33; + const real_t q_tmp_1_5 = tmp_moved_constant_14*tmp_qloop_33; + const real_t q_tmp_2_0 = tmp_moved_constant_6*tmp_qloop_35; + const real_t q_tmp_2_1 = tmp_moved_constant_7*tmp_qloop_35; + const real_t q_tmp_2_2 = tmp_moved_constant_8*tmp_qloop_35; + const real_t q_tmp_2_3 = tmp_moved_constant_11*tmp_qloop_35; + const real_t q_tmp_2_4 = tmp_moved_constant_13*tmp_qloop_35; + const real_t q_tmp_2_5 = tmp_moved_constant_14*tmp_qloop_35; + const real_t q_tmp_3_0 = tmp_moved_constant_6*tmp_qloop_40; + const real_t q_tmp_3_1 = tmp_moved_constant_7*tmp_qloop_40; + const real_t q_tmp_3_2 = tmp_moved_constant_8*tmp_qloop_40; + const real_t q_tmp_3_3 = tmp_moved_constant_11*tmp_qloop_40; + const real_t q_tmp_3_4 = tmp_moved_constant_13*tmp_qloop_40; + const real_t q_tmp_3_5 = tmp_moved_constant_14*tmp_qloop_40; + const real_t q_tmp_4_0 = tmp_moved_constant_6*tmp_qloop_42; + const real_t q_tmp_4_1 = tmp_moved_constant_7*tmp_qloop_42; + const real_t q_tmp_4_2 = tmp_moved_constant_8*tmp_qloop_42; + const real_t q_tmp_4_3 = tmp_moved_constant_11*tmp_qloop_42; + const real_t q_tmp_4_4 = tmp_moved_constant_13*tmp_qloop_42; + const real_t q_tmp_4_5 = tmp_moved_constant_14*tmp_qloop_42; + const real_t q_tmp_5_0 = tmp_moved_constant_6*tmp_qloop_44; + const real_t q_tmp_5_1 = tmp_moved_constant_7*tmp_qloop_44; + const real_t q_tmp_5_2 = tmp_moved_constant_8*tmp_qloop_44; + const real_t q_tmp_5_3 = tmp_moved_constant_11*tmp_qloop_44; + const real_t q_tmp_5_4 = tmp_moved_constant_13*tmp_qloop_44; + const real_t q_tmp_5_5 = tmp_moved_constant_14*tmp_qloop_44; + q_acc_0_0 = q_acc_0_0 + q_tmp_0_0; + q_acc_0_1 = q_acc_0_1 + q_tmp_0_1; + q_acc_0_2 = q_acc_0_2 + q_tmp_0_2; + q_acc_0_3 = q_acc_0_3 + q_tmp_0_3; + q_acc_0_4 = q_acc_0_4 + q_tmp_0_4; + q_acc_0_5 = q_acc_0_5 + q_tmp_0_5; + q_acc_1_0 = q_acc_1_0 + q_tmp_1_0; + q_acc_1_1 = q_acc_1_1 + q_tmp_1_1; + q_acc_1_2 = q_acc_1_2 + q_tmp_1_2; + q_acc_1_3 = q_acc_1_3 + q_tmp_1_3; + q_acc_1_4 = q_acc_1_4 + q_tmp_1_4; + q_acc_1_5 = q_acc_1_5 + q_tmp_1_5; + q_acc_2_0 = q_acc_2_0 + q_tmp_2_0; + q_acc_2_1 = q_acc_2_1 + q_tmp_2_1; + q_acc_2_2 = q_acc_2_2 + q_tmp_2_2; + q_acc_2_3 = q_acc_2_3 + q_tmp_2_3; + q_acc_2_4 = q_acc_2_4 + q_tmp_2_4; + q_acc_2_5 = q_acc_2_5 + q_tmp_2_5; + q_acc_3_0 = q_acc_3_0 + q_tmp_3_0; + q_acc_3_1 = q_acc_3_1 + q_tmp_3_1; + q_acc_3_2 = q_acc_3_2 + q_tmp_3_2; + q_acc_3_3 = q_acc_3_3 + q_tmp_3_3; + q_acc_3_4 = q_acc_3_4 + q_tmp_3_4; + q_acc_3_5 = q_acc_3_5 + q_tmp_3_5; + q_acc_4_0 = q_acc_4_0 + q_tmp_4_0; + q_acc_4_1 = q_acc_4_1 + q_tmp_4_1; + q_acc_4_2 = q_acc_4_2 + q_tmp_4_2; + q_acc_4_3 = q_acc_4_3 + q_tmp_4_3; + q_acc_4_4 = q_acc_4_4 + q_tmp_4_4; + q_acc_4_5 = q_acc_4_5 + q_tmp_4_5; + q_acc_5_0 = q_acc_5_0 + q_tmp_5_0; + q_acc_5_1 = q_acc_5_1 + q_tmp_5_1; + q_acc_5_2 = q_acc_5_2 + q_tmp_5_2; + q_acc_5_3 = q_acc_5_3 + q_tmp_5_3; + q_acc_5_4 = q_acc_5_4 + q_tmp_5_4; + q_acc_5_5 = q_acc_5_5 + q_tmp_5_5; + } + const real_t elMatVec_0 = q_acc_0_0*src_dof_0 + q_acc_0_1*src_dof_1 + q_acc_0_2*src_dof_2 + q_acc_0_3*src_dof_3 + q_acc_0_4*src_dof_4 + q_acc_0_5*src_dof_5; + const real_t elMatVec_1 = q_acc_1_0*src_dof_0 + q_acc_1_1*src_dof_1 + q_acc_1_2*src_dof_2 + q_acc_1_3*src_dof_3 + q_acc_1_4*src_dof_4 + q_acc_1_5*src_dof_5; + const real_t elMatVec_2 = q_acc_2_0*src_dof_0 + q_acc_2_1*src_dof_1 + q_acc_2_2*src_dof_2 + q_acc_2_3*src_dof_3 + q_acc_2_4*src_dof_4 + q_acc_2_5*src_dof_5; + const real_t elMatVec_3 = q_acc_3_0*src_dof_0 + q_acc_3_1*src_dof_1 + q_acc_3_2*src_dof_2 + q_acc_3_3*src_dof_3 + q_acc_3_4*src_dof_4 + q_acc_3_5*src_dof_5; + const real_t elMatVec_4 = q_acc_4_0*src_dof_0 + q_acc_4_1*src_dof_1 + q_acc_4_2*src_dof_2 + q_acc_4_3*src_dof_3 + q_acc_4_4*src_dof_4 + q_acc_4_5*src_dof_5; + const real_t elMatVec_5 = q_acc_5_0*src_dof_0 + q_acc_5_1*src_dof_1 + q_acc_5_2*src_dof_2 + q_acc_5_3*src_dof_3 + q_acc_5_4*src_dof_4 + q_acc_5_5*src_dof_5; + _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1] = elMatVec_0 + _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatVec_1 + _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1] = elMatVec_2 + _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]; + _data_dstEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatVec_3 + _data_dstEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1] = elMatVec_4 + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]; + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))] = elMatVec_5 + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + } + } + } + } +} +} // namespace operatorgeneration + +} // namespace hyteg diff --git a/operators/supg_diffusion/avx/P2ElementwiseSupgDiffusion_computeInverseDiagonalOperatorValues_P2ElementwiseSupgDiffusion_macro_2D.cpp b/operators/supg_diffusion/avx/P2ElementwiseSupgDiffusion_computeInverseDiagonalOperatorValues_P2ElementwiseSupgDiffusion_macro_2D.cpp new file mode 100644 index 0000000000000000000000000000000000000000..8487d9b0ad3dd713264c18c9f12521aa93a867ee --- /dev/null +++ b/operators/supg_diffusion/avx/P2ElementwiseSupgDiffusion_computeInverseDiagonalOperatorValues_P2ElementwiseSupgDiffusion_macro_2D.cpp @@ -0,0 +1,534 @@ +/* +* Copyright (c) 2017-2024 Nils Kohl, Daniel Bauer, Fabian Böhm. +* +* This file is part of HyTeG +* (see https://i10git.cs.fau.de/hyteg/hyteg). +* +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. +* +* You should have received a copy of the GNU General Public License +* along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +/* +* The entire file was generated with the HyTeG Operator Generator. +* +* Avoid modifying this file. If buggy, consider fixing the generator itself. +*/ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +#include "../P2ElementwiseSupgDiffusion.hpp" + +#define FUNC_PREFIX + +namespace hyteg { + +namespace operatorgeneration { + +void P2ElementwiseSupgDiffusion::computeInverseDiagonalOperatorValues_P2ElementwiseSupgDiffusion_macro_2D( real_t * RESTRICT _data_diffusivity_times_deltaEdge, real_t * RESTRICT _data_diffusivity_times_deltaVertex, real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_wxEdge, real_t * RESTRICT _data_wxVertex, real_t * RESTRICT _data_wyEdge, real_t * RESTRICT _data_wyVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +{ + { + const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669}; + + const real_t _data_q_p_0 [] = {0.33333333333333331, 0.20000000000000001, 0.59999999999999998, 0.20000000000000001}; + + const real_t _data_q_p_1 [] = {0.33333333333333331, 0.59999999999999998, 0.20000000000000001, 0.20000000000000001}; + + const real_t tmp_coords_jac_0_GRAY = 1.0 / (micro_edges_per_macro_edge_float)*1.0; + const real_t p_affine_const_0_0_GRAY = macro_vertex_coord_id_0comp0; + const real_t p_affine_const_0_1_GRAY = macro_vertex_coord_id_0comp1; + const real_t p_affine_const_1_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0); + const real_t p_affine_const_1_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1); + const real_t p_affine_const_2_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0); + const real_t p_affine_const_2_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1); + const real_t jac_affine_0_0_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_1_0_GRAY; + const real_t jac_affine_0_1_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_2_0_GRAY; + const real_t jac_affine_1_0_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_1_1_GRAY; + const real_t jac_affine_1_1_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_2_1_GRAY; + const real_t tmp_coords_jac_1_GRAY = jac_affine_0_0_GRAY*jac_affine_1_1_GRAY - jac_affine_0_1_GRAY*jac_affine_1_0_GRAY; + const real_t tmp_coords_jac_2_GRAY = 1.0 / (tmp_coords_jac_1_GRAY); + const real_t jac_affine_inv_0_0_GRAY = jac_affine_1_1_GRAY*tmp_coords_jac_2_GRAY; + const real_t jac_affine_inv_0_1_GRAY = -jac_affine_0_1_GRAY*tmp_coords_jac_2_GRAY; + const real_t jac_affine_inv_1_0_GRAY = -jac_affine_1_0_GRAY*tmp_coords_jac_2_GRAY; + const real_t jac_affine_inv_1_1_GRAY = jac_affine_0_0_GRAY*tmp_coords_jac_2_GRAY; + const real_t abs_det_jac_affine_GRAY = abs(tmp_coords_jac_1_GRAY); + const real_t tmp_qloop_15 = jac_affine_inv_0_0_GRAY*4.0; + const real_t tmp_qloop_16 = jac_affine_inv_1_0_GRAY*4.0; + const real_t tmp_qloop_17 = tmp_qloop_15 + tmp_qloop_16; + const real_t tmp_qloop_18 = jac_affine_inv_0_1_GRAY*4.0; + const real_t tmp_qloop_19 = jac_affine_inv_1_1_GRAY*4.0; + const real_t tmp_qloop_20 = tmp_qloop_18 + tmp_qloop_19; + const real_t tmp_qloop_28 = jac_affine_inv_1_0_GRAY*8.0; + const real_t tmp_qloop_29 = jac_affine_inv_1_1_GRAY*8.0; + const real_t tmp_qloop_31 = jac_affine_inv_1_0_GRAY*tmp_qloop_15 + jac_affine_inv_1_1_GRAY*tmp_qloop_18; + { + /* FaceType.GRAY */ + for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1) + { + for (int64_t ctr_0 = 0; ctr_0 < (int64_t)((-ctr_1 + micro_edges_per_macro_edge) / (4)) * (4); ctr_0 += 4) + { + + const int64_t phantom_ctr_0 = ctr_0; + real_t _data_float_loop_ctr_array_dim_0[4]; + _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0; + _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1; + _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2; + _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3; + real_t _data_float_loop_ctr_array_dim_1[4]; + _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1; + + const __m256d p_affine_0_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)); + const __m256d p_affine_0_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)); + const __m256d p_affine_1_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)); + const __m256d p_affine_1_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)); + const __m256d p_affine_2_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)); + const __m256d p_affine_2_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)); + const __m256d diffusivity_times_delta_dof_0 = _mm256_loadu_pd(& _data_diffusivity_times_deltaVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]); + const __m256d diffusivity_times_delta_dof_1 = _mm256_loadu_pd(& _data_diffusivity_times_deltaVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]); + const __m256d diffusivity_times_delta_dof_2 = _mm256_loadu_pd(& _data_diffusivity_times_deltaVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]); + const __m256d diffusivity_times_delta_dof_3 = _mm256_loadu_pd(& _data_diffusivity_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]); + const __m256d diffusivity_times_delta_dof_4 = _mm256_loadu_pd(& _data_diffusivity_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]); + const __m256d diffusivity_times_delta_dof_5 = _mm256_loadu_pd(& _data_diffusivity_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]); + const __m256d wx_dof_0 = _mm256_loadu_pd(& _data_wxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]); + const __m256d wx_dof_1 = _mm256_loadu_pd(& _data_wxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]); + const __m256d wx_dof_2 = _mm256_loadu_pd(& _data_wxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]); + const __m256d wx_dof_3 = _mm256_loadu_pd(& _data_wxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]); + const __m256d wx_dof_4 = _mm256_loadu_pd(& _data_wxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]); + const __m256d wx_dof_5 = _mm256_loadu_pd(& _data_wxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]); + const __m256d wy_dof_0 = _mm256_loadu_pd(& _data_wyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]); + const __m256d wy_dof_1 = _mm256_loadu_pd(& _data_wyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]); + const __m256d wy_dof_2 = _mm256_loadu_pd(& _data_wyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]); + const __m256d wy_dof_3 = _mm256_loadu_pd(& _data_wyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]); + const __m256d wy_dof_4 = _mm256_loadu_pd(& _data_wyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]); + const __m256d wy_dof_5 = _mm256_loadu_pd(& _data_wyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]); + __m256d q_acc_0_0 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_1 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_2 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_3 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_4_4 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_5_5 = _mm256_set_pd(0.0,0.0,0.0,0.0); + for (int64_t q = 0; q < 4; q += 1) + { + const __m256d tmp_qloop_0 = _mm256_mul_pd(_mm256_set_pd(4.0,4.0,4.0,4.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])); + const __m256d tmp_qloop_1 = _mm256_mul_pd(_mm256_set_pd(4.0,4.0,4.0,4.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])); + const __m256d tmp_qloop_2 = _mm256_add_pd(_mm256_add_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),tmp_qloop_0),tmp_qloop_1); + const __m256d tmp_qloop_3 = _mm256_mul_pd(tmp_qloop_0,_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])); + const __m256d tmp_qloop_4 = _mm256_mul_pd(_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])); + const __m256d tmp_qloop_5 = _mm256_mul_pd(tmp_qloop_4,_mm256_set_pd(2.0,2.0,2.0,2.0)); + const __m256d tmp_qloop_6 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),tmp_qloop_5); + const __m256d tmp_qloop_7 = _mm256_mul_pd(_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])); + const __m256d tmp_qloop_8 = _mm256_mul_pd(tmp_qloop_7,_mm256_set_pd(2.0,2.0,2.0,2.0)); + const __m256d tmp_qloop_9 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])),tmp_qloop_8); + const __m256d tmp_qloop_10 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_7,_mm256_set_pd(-4.0,-4.0,-4.0,-4.0)),_mm256_mul_pd(tmp_qloop_3,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_1); + const __m256d tmp_qloop_11 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_4,_mm256_set_pd(-4.0,-4.0,-4.0,-4.0)),_mm256_mul_pd(tmp_qloop_3,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_0); + const __m256d tmp_qloop_12 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),tmp_qloop_3),tmp_qloop_5),tmp_qloop_8); + const __m256d tmp_qloop_13 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_10,wx_dof_4),_mm256_mul_pd(tmp_qloop_11,wx_dof_5)),_mm256_mul_pd(tmp_qloop_12,wx_dof_0)),_mm256_mul_pd(tmp_qloop_3,wx_dof_3)),_mm256_mul_pd(tmp_qloop_6,wx_dof_1)),_mm256_mul_pd(tmp_qloop_9,wx_dof_2)); + const __m256d tmp_qloop_14 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_10,wy_dof_4),_mm256_mul_pd(tmp_qloop_11,wy_dof_5)),_mm256_mul_pd(tmp_qloop_12,wy_dof_0)),_mm256_mul_pd(tmp_qloop_3,wy_dof_3)),_mm256_mul_pd(tmp_qloop_6,wy_dof_1)),_mm256_mul_pd(tmp_qloop_9,wy_dof_2)); + const __m256d tmp_qloop_21 = _mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(diffusivity_times_delta_dof_0,tmp_qloop_12),_mm256_mul_pd(diffusivity_times_delta_dof_1,tmp_qloop_6)),_mm256_mul_pd(diffusivity_times_delta_dof_2,tmp_qloop_9)),_mm256_mul_pd(diffusivity_times_delta_dof_3,tmp_qloop_3)),_mm256_mul_pd(diffusivity_times_delta_dof_4,tmp_qloop_10)),_mm256_mul_pd(diffusivity_times_delta_dof_5,tmp_qloop_11)),_mm256_set_pd(_data_q_w[q],_data_q_w[q],_data_q_w[q],_data_q_w[q])),_mm256_set_pd(abs_det_jac_affine_GRAY,abs_det_jac_affine_GRAY,abs_det_jac_affine_GRAY,abs_det_jac_affine_GRAY)); + const __m256d tmp_qloop_22 = _mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),tmp_qloop_0); + const __m256d tmp_qloop_23 = _mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),tmp_qloop_1); + const __m256d tmp_qloop_24 = _mm256_mul_pd(tmp_qloop_0,_mm256_set_pd(jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY)); + const __m256d tmp_qloop_25 = _mm256_mul_pd(tmp_qloop_1,_mm256_set_pd(jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY)); + const __m256d tmp_qloop_26 = _mm256_mul_pd(tmp_qloop_0,_mm256_set_pd(jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY)); + const __m256d tmp_qloop_27 = _mm256_mul_pd(tmp_qloop_1,_mm256_set_pd(jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY)); + const __m256d tmp_qloop_30 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-8.0,-8.0,-8.0,-8.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])),_mm256_mul_pd(tmp_qloop_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_set_pd(4.0,4.0,4.0,4.0)); + const __m256d tmp_qloop_32 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-8.0,-8.0,-8.0,-8.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(tmp_qloop_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_set_pd(4.0,4.0,4.0,4.0)); + const __m256d q_tmp_0_0 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_21,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_13,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_2,_mm256_set_pd(jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY)),_mm256_mul_pd(tmp_qloop_2,_mm256_set_pd(jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY)))),_mm256_mul_pd(tmp_qloop_14,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_2,_mm256_set_pd(jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY)),_mm256_mul_pd(tmp_qloop_2,_mm256_set_pd(jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY)))))),_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17)),_mm256_mul_pd(_mm256_set_pd(jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY),_mm256_set_pd(tmp_qloop_20,tmp_qloop_20,tmp_qloop_20,tmp_qloop_20))),_mm256_mul_pd(_mm256_set_pd(jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY),_mm256_set_pd(tmp_qloop_17,tmp_qloop_17,tmp_qloop_17,tmp_qloop_17))),_mm256_mul_pd(_mm256_set_pd(jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY),_mm256_set_pd(tmp_qloop_20,tmp_qloop_20,tmp_qloop_20,tmp_qloop_20)))); + const __m256d q_tmp_1_1 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_21,_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(4.0,4.0,4.0,4.0),_mm256_mul_pd(_mm256_set_pd(jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY),_mm256_set_pd(jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY))),_mm256_mul_pd(_mm256_set_pd(4.0,4.0,4.0,4.0),_mm256_mul_pd(_mm256_set_pd(jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY),_mm256_set_pd(jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY))))),_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_13,tmp_qloop_22),_mm256_set_pd(jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_14,tmp_qloop_22),_mm256_set_pd(jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY)))); + const __m256d q_tmp_2_2 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_21,_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(4.0,4.0,4.0,4.0),_mm256_mul_pd(_mm256_set_pd(jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY),_mm256_set_pd(jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY))),_mm256_mul_pd(_mm256_set_pd(4.0,4.0,4.0,4.0),_mm256_mul_pd(_mm256_set_pd(jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY),_mm256_set_pd(jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY))))),_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_13,tmp_qloop_23),_mm256_set_pd(jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_14,tmp_qloop_23),_mm256_set_pd(jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY)))); + const __m256d q_tmp_3_3 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_21,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_13,_mm256_add_pd(tmp_qloop_24,tmp_qloop_25)),_mm256_mul_pd(tmp_qloop_14,_mm256_add_pd(tmp_qloop_26,tmp_qloop_27)))),_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY),_mm256_set_pd(tmp_qloop_28,tmp_qloop_28,tmp_qloop_28,tmp_qloop_28)),_mm256_mul_pd(_mm256_set_pd(jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY),_mm256_set_pd(tmp_qloop_29,tmp_qloop_29,tmp_qloop_29,tmp_qloop_29)))); + const __m256d q_tmp_4_4 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_21,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_13,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_25,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_30,_mm256_set_pd(jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY)))),_mm256_mul_pd(tmp_qloop_14,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_27,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_30,_mm256_set_pd(jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY)))))),_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_qloop_15,tmp_qloop_15,tmp_qloop_15,tmp_qloop_15)),_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_qloop_28,tmp_qloop_28,tmp_qloop_28,tmp_qloop_28))),_mm256_set_pd(jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY,jac_affine_inv_1_0_GRAY)),_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_qloop_18,tmp_qloop_18,tmp_qloop_18,tmp_qloop_18)),_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_qloop_29,tmp_qloop_29,tmp_qloop_29,tmp_qloop_29))),_mm256_set_pd(jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY,jac_affine_inv_1_1_GRAY))),_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_qloop_31,tmp_qloop_31,tmp_qloop_31,tmp_qloop_31)))); + const __m256d q_tmp_5_5 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_21,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_13,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_24,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_32,_mm256_set_pd(jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY)))),_mm256_mul_pd(tmp_qloop_14,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_26,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_32,_mm256_set_pd(jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY)))))),_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_qloop_16,tmp_qloop_16,tmp_qloop_16,tmp_qloop_16)),_mm256_mul_pd(_mm256_set_pd(-8.0,-8.0,-8.0,-8.0),_mm256_set_pd(jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY))),_mm256_set_pd(jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY,jac_affine_inv_0_0_GRAY)),_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_qloop_19,tmp_qloop_19,tmp_qloop_19,tmp_qloop_19)),_mm256_mul_pd(_mm256_set_pd(-8.0,-8.0,-8.0,-8.0),_mm256_set_pd(jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY))),_mm256_set_pd(jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY,jac_affine_inv_0_1_GRAY))),_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_qloop_31,tmp_qloop_31,tmp_qloop_31,tmp_qloop_31)))); + q_acc_0_0 = _mm256_add_pd(q_acc_0_0,q_tmp_0_0); + q_acc_1_1 = _mm256_add_pd(q_acc_1_1,q_tmp_1_1); + q_acc_2_2 = _mm256_add_pd(q_acc_2_2,q_tmp_2_2); + q_acc_3_3 = _mm256_add_pd(q_acc_3_3,q_tmp_3_3); + q_acc_4_4 = _mm256_add_pd(q_acc_4_4,q_tmp_4_4); + q_acc_5_5 = _mm256_add_pd(q_acc_5_5,q_tmp_5_5); + } + const __m256d elMatDiag_0 = q_acc_0_0; + const __m256d elMatDiag_1 = q_acc_1_1; + const __m256d elMatDiag_2 = q_acc_2_2; + const __m256d elMatDiag_3 = q_acc_3_3; + const __m256d elMatDiag_4 = q_acc_4_4; + const __m256d elMatDiag_5 = q_acc_5_5; + _mm256_storeu_pd(&_data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))],_mm256_add_pd(elMatDiag_0,_mm256_loadu_pd(& _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]))); + _mm256_storeu_pd(&_data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1],_mm256_add_pd(elMatDiag_1,_mm256_loadu_pd(& _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]))); + _mm256_storeu_pd(&_data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))],_mm256_add_pd(elMatDiag_2,_mm256_loadu_pd(& _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]))); + _mm256_storeu_pd(&_data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))],_mm256_add_pd(elMatDiag_3,_mm256_loadu_pd(& _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]))); + _mm256_storeu_pd(&_data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))],_mm256_add_pd(elMatDiag_4,_mm256_loadu_pd(& _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]))); + _mm256_storeu_pd(&_data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))],_mm256_add_pd(elMatDiag_5,_mm256_loadu_pd(& _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]))); + } + for (int64_t ctr_0 = (int64_t)((-ctr_1 + micro_edges_per_macro_edge) / (4)) * (4); ctr_0 < -ctr_1 + micro_edges_per_macro_edge; ctr_0 += 1) + { + + const int64_t phantom_ctr_0 = ctr_0; + real_t _data_float_loop_ctr_array_dim_0[4]; + _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0; + _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1; + _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2; + _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3; + real_t _data_float_loop_ctr_array_dim_1[4]; + _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1; + + const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; + const real_t diffusivity_times_delta_dof_0 = _data_diffusivity_times_deltaVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]; + const real_t diffusivity_times_delta_dof_1 = _data_diffusivity_times_deltaVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + const real_t diffusivity_times_delta_dof_2 = _data_diffusivity_times_deltaVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t diffusivity_times_delta_dof_3 = _data_diffusivity_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t diffusivity_times_delta_dof_4 = _data_diffusivity_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t diffusivity_times_delta_dof_5 = _data_diffusivity_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]; + const real_t wx_dof_0 = _data_wxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]; + const real_t wx_dof_1 = _data_wxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + const real_t wx_dof_2 = _data_wxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t wx_dof_3 = _data_wxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t wx_dof_4 = _data_wxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t wx_dof_5 = _data_wxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]; + const real_t wy_dof_0 = _data_wyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]; + const real_t wy_dof_1 = _data_wyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + const real_t wy_dof_2 = _data_wyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t wy_dof_3 = _data_wyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t wy_dof_4 = _data_wyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t wy_dof_5 = _data_wyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]; + real_t q_acc_0_0 = 0.0; + real_t q_acc_1_1 = 0.0; + real_t q_acc_2_2 = 0.0; + real_t q_acc_3_3 = 0.0; + real_t q_acc_4_4 = 0.0; + real_t q_acc_5_5 = 0.0; + for (int64_t q = 0; q < 4; q += 1) + { + const real_t tmp_qloop_0 = 4.0*_data_q_p_0[q]; + const real_t tmp_qloop_1 = 4.0*_data_q_p_1[q]; + const real_t tmp_qloop_2 = tmp_qloop_0 + tmp_qloop_1 - 3.0; + const real_t tmp_qloop_3 = tmp_qloop_0*_data_q_p_1[q]; + const real_t tmp_qloop_4 = (_data_q_p_0[q]*_data_q_p_0[q]); + const real_t tmp_qloop_5 = tmp_qloop_4*2.0; + const real_t tmp_qloop_6 = tmp_qloop_5 - _data_q_p_0[q]; + const real_t tmp_qloop_7 = (_data_q_p_1[q]*_data_q_p_1[q]); + const real_t tmp_qloop_8 = tmp_qloop_7*2.0; + const real_t tmp_qloop_9 = tmp_qloop_8 - _data_q_p_1[q]; + const real_t tmp_qloop_10 = tmp_qloop_1 - tmp_qloop_3 + tmp_qloop_7*-4.0; + const real_t tmp_qloop_11 = tmp_qloop_0 - tmp_qloop_3 + tmp_qloop_4*-4.0; + const real_t tmp_qloop_12 = tmp_qloop_3 + tmp_qloop_5 + tmp_qloop_8 - 3.0*_data_q_p_0[q] - 3.0*_data_q_p_1[q] + 1.0; + const real_t tmp_qloop_13 = tmp_qloop_10*wx_dof_4 + tmp_qloop_11*wx_dof_5 + tmp_qloop_12*wx_dof_0 + tmp_qloop_3*wx_dof_3 + tmp_qloop_6*wx_dof_1 + tmp_qloop_9*wx_dof_2; + const real_t tmp_qloop_14 = tmp_qloop_10*wy_dof_4 + tmp_qloop_11*wy_dof_5 + tmp_qloop_12*wy_dof_0 + tmp_qloop_3*wy_dof_3 + tmp_qloop_6*wy_dof_1 + tmp_qloop_9*wy_dof_2; + const real_t tmp_qloop_21 = abs_det_jac_affine_GRAY*(diffusivity_times_delta_dof_0*tmp_qloop_12 + diffusivity_times_delta_dof_1*tmp_qloop_6 + diffusivity_times_delta_dof_2*tmp_qloop_9 + diffusivity_times_delta_dof_3*tmp_qloop_3 + diffusivity_times_delta_dof_4*tmp_qloop_10 + diffusivity_times_delta_dof_5*tmp_qloop_11)*_data_q_w[q]; + const real_t tmp_qloop_22 = tmp_qloop_0 - 1.0; + const real_t tmp_qloop_23 = tmp_qloop_1 - 1.0; + const real_t tmp_qloop_24 = jac_affine_inv_1_0_GRAY*tmp_qloop_0; + const real_t tmp_qloop_25 = jac_affine_inv_0_0_GRAY*tmp_qloop_1; + const real_t tmp_qloop_26 = jac_affine_inv_1_1_GRAY*tmp_qloop_0; + const real_t tmp_qloop_27 = jac_affine_inv_0_1_GRAY*tmp_qloop_1; + const real_t tmp_qloop_30 = -tmp_qloop_0 - 8.0*_data_q_p_1[q] + 4.0; + const real_t tmp_qloop_32 = -tmp_qloop_1 - 8.0*_data_q_p_0[q] + 4.0; + const real_t q_tmp_0_0 = tmp_qloop_21*(tmp_qloop_13*(jac_affine_inv_0_0_GRAY*tmp_qloop_2 + jac_affine_inv_1_0_GRAY*tmp_qloop_2) + tmp_qloop_14*(jac_affine_inv_0_1_GRAY*tmp_qloop_2 + jac_affine_inv_1_1_GRAY*tmp_qloop_2))*(jac_affine_inv_0_0_GRAY*tmp_qloop_17 + jac_affine_inv_0_1_GRAY*tmp_qloop_20 + jac_affine_inv_1_0_GRAY*tmp_qloop_17 + jac_affine_inv_1_1_GRAY*tmp_qloop_20); + const real_t q_tmp_1_1 = tmp_qloop_21*((jac_affine_inv_0_0_GRAY*jac_affine_inv_0_0_GRAY)*4.0 + (jac_affine_inv_0_1_GRAY*jac_affine_inv_0_1_GRAY)*4.0)*(jac_affine_inv_0_0_GRAY*tmp_qloop_13*tmp_qloop_22 + jac_affine_inv_0_1_GRAY*tmp_qloop_14*tmp_qloop_22); + const real_t q_tmp_2_2 = tmp_qloop_21*((jac_affine_inv_1_0_GRAY*jac_affine_inv_1_0_GRAY)*4.0 + (jac_affine_inv_1_1_GRAY*jac_affine_inv_1_1_GRAY)*4.0)*(jac_affine_inv_1_0_GRAY*tmp_qloop_13*tmp_qloop_23 + jac_affine_inv_1_1_GRAY*tmp_qloop_14*tmp_qloop_23); + const real_t q_tmp_3_3 = tmp_qloop_21*(jac_affine_inv_0_0_GRAY*tmp_qloop_28 + jac_affine_inv_0_1_GRAY*tmp_qloop_29)*(tmp_qloop_13*(tmp_qloop_24 + tmp_qloop_25) + tmp_qloop_14*(tmp_qloop_26 + tmp_qloop_27)); + const real_t q_tmp_4_4 = tmp_qloop_21*(tmp_qloop_13*(jac_affine_inv_1_0_GRAY*tmp_qloop_30 - tmp_qloop_25) + tmp_qloop_14*(jac_affine_inv_1_1_GRAY*tmp_qloop_30 - tmp_qloop_27))*(jac_affine_inv_1_0_GRAY*(-tmp_qloop_15 - tmp_qloop_28) + jac_affine_inv_1_1_GRAY*(-tmp_qloop_18 - tmp_qloop_29) - tmp_qloop_31); + const real_t q_tmp_5_5 = tmp_qloop_21*(tmp_qloop_13*(jac_affine_inv_0_0_GRAY*tmp_qloop_32 - tmp_qloop_24) + tmp_qloop_14*(jac_affine_inv_0_1_GRAY*tmp_qloop_32 - tmp_qloop_26))*(jac_affine_inv_0_0_GRAY*(jac_affine_inv_0_0_GRAY*-8.0 - tmp_qloop_16) + jac_affine_inv_0_1_GRAY*(jac_affine_inv_0_1_GRAY*-8.0 - tmp_qloop_19) - tmp_qloop_31); + q_acc_0_0 = q_acc_0_0 + q_tmp_0_0; + q_acc_1_1 = q_acc_1_1 + q_tmp_1_1; + q_acc_2_2 = q_acc_2_2 + q_tmp_2_2; + q_acc_3_3 = q_acc_3_3 + q_tmp_3_3; + q_acc_4_4 = q_acc_4_4 + q_tmp_4_4; + q_acc_5_5 = q_acc_5_5 + q_tmp_5_5; + } + const real_t elMatDiag_0 = q_acc_0_0; + const real_t elMatDiag_1 = q_acc_1_1; + const real_t elMatDiag_2 = q_acc_2_2; + const real_t elMatDiag_3 = q_acc_3_3; + const real_t elMatDiag_4 = q_acc_4_4; + const real_t elMatDiag_5 = q_acc_5_5; + _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))] = elMatDiag_0 + _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]; + _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1] = elMatDiag_1 + _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatDiag_2 + _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))] = elMatDiag_3 + _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))] = elMatDiag_4 + _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))] = elMatDiag_5 + _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]; + } + } + } + const real_t tmp_coords_jac_0_BLUE = 1.0 / (micro_edges_per_macro_edge_float)*1.0; + const real_t tmp_coords_jac_1_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0); + const real_t tmp_coords_jac_2_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1); + const real_t tmp_coords_jac_3_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0); + const real_t tmp_coords_jac_4_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1); + const real_t p_affine_const_0_0_BLUE = tmp_coords_jac_1_BLUE; + const real_t p_affine_const_0_1_BLUE = tmp_coords_jac_2_BLUE; + const real_t p_affine_const_1_0_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_3_BLUE; + const real_t p_affine_const_1_1_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_4_BLUE; + const real_t p_affine_const_2_0_BLUE = tmp_coords_jac_1_BLUE + tmp_coords_jac_3_BLUE; + const real_t p_affine_const_2_1_BLUE = tmp_coords_jac_2_BLUE + tmp_coords_jac_4_BLUE; + const real_t jac_affine_0_0_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_1_0_BLUE; + const real_t jac_affine_0_1_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_2_0_BLUE; + const real_t jac_affine_1_0_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_1_1_BLUE; + const real_t jac_affine_1_1_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_2_1_BLUE; + const real_t tmp_coords_jac_5_BLUE = jac_affine_0_0_BLUE*jac_affine_1_1_BLUE - jac_affine_0_1_BLUE*jac_affine_1_0_BLUE; + const real_t tmp_coords_jac_6_BLUE = 1.0 / (tmp_coords_jac_5_BLUE); + const real_t jac_affine_inv_0_0_BLUE = jac_affine_1_1_BLUE*tmp_coords_jac_6_BLUE; + const real_t jac_affine_inv_0_1_BLUE = -jac_affine_0_1_BLUE*tmp_coords_jac_6_BLUE; + const real_t jac_affine_inv_1_0_BLUE = -jac_affine_1_0_BLUE*tmp_coords_jac_6_BLUE; + const real_t jac_affine_inv_1_1_BLUE = jac_affine_0_0_BLUE*tmp_coords_jac_6_BLUE; + const real_t abs_det_jac_affine_BLUE = abs(tmp_coords_jac_5_BLUE); + const real_t tmp_moved_constant_0 = jac_affine_inv_0_0_BLUE*4.0; + const real_t tmp_moved_constant_1 = jac_affine_inv_1_0_BLUE*4.0; + const real_t tmp_moved_constant_2 = tmp_moved_constant_0 + tmp_moved_constant_1; + const real_t tmp_moved_constant_3 = jac_affine_inv_0_1_BLUE*4.0; + const real_t tmp_moved_constant_4 = jac_affine_inv_1_1_BLUE*4.0; + const real_t tmp_moved_constant_5 = tmp_moved_constant_3 + tmp_moved_constant_4; + const real_t tmp_moved_constant_6 = jac_affine_inv_1_0_BLUE*8.0; + const real_t tmp_moved_constant_7 = jac_affine_inv_1_1_BLUE*8.0; + const real_t tmp_moved_constant_8 = jac_affine_inv_1_0_BLUE*tmp_moved_constant_0 + jac_affine_inv_1_1_BLUE*tmp_moved_constant_3; + { + /* FaceType.BLUE */ + for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1) + { + for (int64_t ctr_0 = 0; ctr_0 < (int64_t)((-ctr_1 + micro_edges_per_macro_edge - 1) / (4)) * (4); ctr_0 += 4) + { + + const int64_t phantom_ctr_0 = ctr_0; + real_t _data_float_loop_ctr_array_dim_0[4]; + _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0; + _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1; + _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2; + _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3; + real_t _data_float_loop_ctr_array_dim_1[4]; + _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1; + + const __m256d p_affine_0_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)); + const __m256d p_affine_0_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)); + const __m256d p_affine_1_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)); + const __m256d p_affine_1_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)); + const __m256d p_affine_2_0 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0,macro_vertex_coord_id_1comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)),_mm256_set_pd(macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0,macro_vertex_coord_id_2comp0)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0,macro_vertex_coord_id_0comp0)); + const __m256d p_affine_2_1 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1,macro_vertex_coord_id_1comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float))),_mm256_mul_pd(_mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)),_mm256_set_pd(macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1,macro_vertex_coord_id_2comp1)),_mm256_add_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_loadu_pd(& _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),_mm256_div_pd(_mm256_set_pd(1.0,1.0,1.0,1.0),_mm256_set_pd(micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float,micro_edges_per_macro_edge_float)))),_mm256_set_pd(macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1,macro_vertex_coord_id_0comp1)); + const __m256d diffusivity_times_delta_dof_0 = _mm256_loadu_pd(& _data_diffusivity_times_deltaVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]); + const __m256d diffusivity_times_delta_dof_1 = _mm256_loadu_pd(& _data_diffusivity_times_deltaVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]); + const __m256d diffusivity_times_delta_dof_2 = _mm256_loadu_pd(& _data_diffusivity_times_deltaVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]); + const __m256d diffusivity_times_delta_dof_3 = _mm256_loadu_pd(& _data_diffusivity_times_deltaEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]); + const __m256d diffusivity_times_delta_dof_4 = _mm256_loadu_pd(& _data_diffusivity_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]); + const __m256d diffusivity_times_delta_dof_5 = _mm256_loadu_pd(& _data_diffusivity_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]); + const __m256d wx_dof_0 = _mm256_loadu_pd(& _data_wxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]); + const __m256d wx_dof_1 = _mm256_loadu_pd(& _data_wxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]); + const __m256d wx_dof_2 = _mm256_loadu_pd(& _data_wxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]); + const __m256d wx_dof_3 = _mm256_loadu_pd(& _data_wxEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]); + const __m256d wx_dof_4 = _mm256_loadu_pd(& _data_wxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]); + const __m256d wx_dof_5 = _mm256_loadu_pd(& _data_wxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]); + const __m256d wy_dof_0 = _mm256_loadu_pd(& _data_wyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]); + const __m256d wy_dof_1 = _mm256_loadu_pd(& _data_wyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]); + const __m256d wy_dof_2 = _mm256_loadu_pd(& _data_wyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]); + const __m256d wy_dof_3 = _mm256_loadu_pd(& _data_wyEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]); + const __m256d wy_dof_4 = _mm256_loadu_pd(& _data_wyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]); + const __m256d wy_dof_5 = _mm256_loadu_pd(& _data_wyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]); + __m256d q_acc_0_0 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_1_1 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_2_2 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_3_3 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_4_4 = _mm256_set_pd(0.0,0.0,0.0,0.0); + __m256d q_acc_5_5 = _mm256_set_pd(0.0,0.0,0.0,0.0); + for (int64_t q = 0; q < 4; q += 1) + { + const __m256d tmp_qloop_0 = _mm256_mul_pd(_mm256_set_pd(4.0,4.0,4.0,4.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])); + const __m256d tmp_qloop_1 = _mm256_mul_pd(_mm256_set_pd(4.0,4.0,4.0,4.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])); + const __m256d tmp_qloop_2 = _mm256_add_pd(_mm256_add_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),tmp_qloop_0),tmp_qloop_1); + const __m256d tmp_qloop_3 = _mm256_mul_pd(tmp_qloop_0,_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])); + const __m256d tmp_qloop_4 = _mm256_mul_pd(_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q]),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])); + const __m256d tmp_qloop_5 = _mm256_mul_pd(tmp_qloop_4,_mm256_set_pd(2.0,2.0,2.0,2.0)); + const __m256d tmp_qloop_6 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),tmp_qloop_5); + const __m256d tmp_qloop_7 = _mm256_mul_pd(_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])); + const __m256d tmp_qloop_8 = _mm256_mul_pd(tmp_qloop_7,_mm256_set_pd(2.0,2.0,2.0,2.0)); + const __m256d tmp_qloop_9 = _mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])),tmp_qloop_8); + const __m256d tmp_qloop_10 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_7,_mm256_set_pd(-4.0,-4.0,-4.0,-4.0)),_mm256_mul_pd(tmp_qloop_3,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_1); + const __m256d tmp_qloop_11 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_4,_mm256_set_pd(-4.0,-4.0,-4.0,-4.0)),_mm256_mul_pd(tmp_qloop_3,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),tmp_qloop_0); + const __m256d tmp_qloop_12 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(_mm256_set_pd(-3.0,-3.0,-3.0,-3.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q]))),_mm256_set_pd(1.0,1.0,1.0,1.0)),tmp_qloop_3),tmp_qloop_5),tmp_qloop_8); + const __m256d tmp_qloop_13 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_10,wx_dof_4),_mm256_mul_pd(tmp_qloop_11,wx_dof_5)),_mm256_mul_pd(tmp_qloop_12,wx_dof_0)),_mm256_mul_pd(tmp_qloop_3,wx_dof_3)),_mm256_mul_pd(tmp_qloop_6,wx_dof_1)),_mm256_mul_pd(tmp_qloop_9,wx_dof_2)); + const __m256d tmp_qloop_14 = _mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(tmp_qloop_10,wy_dof_4),_mm256_mul_pd(tmp_qloop_11,wy_dof_5)),_mm256_mul_pd(tmp_qloop_12,wy_dof_0)),_mm256_mul_pd(tmp_qloop_3,wy_dof_3)),_mm256_mul_pd(tmp_qloop_6,wy_dof_1)),_mm256_mul_pd(tmp_qloop_9,wy_dof_2)); + const __m256d tmp_qloop_21 = _mm256_mul_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(diffusivity_times_delta_dof_0,tmp_qloop_12),_mm256_mul_pd(diffusivity_times_delta_dof_1,tmp_qloop_6)),_mm256_mul_pd(diffusivity_times_delta_dof_2,tmp_qloop_9)),_mm256_mul_pd(diffusivity_times_delta_dof_3,tmp_qloop_3)),_mm256_mul_pd(diffusivity_times_delta_dof_4,tmp_qloop_10)),_mm256_mul_pd(diffusivity_times_delta_dof_5,tmp_qloop_11)),_mm256_set_pd(_data_q_w[q],_data_q_w[q],_data_q_w[q],_data_q_w[q])),_mm256_set_pd(abs_det_jac_affine_BLUE,abs_det_jac_affine_BLUE,abs_det_jac_affine_BLUE,abs_det_jac_affine_BLUE)); + const __m256d tmp_qloop_22 = _mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),tmp_qloop_0); + const __m256d tmp_qloop_23 = _mm256_add_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),tmp_qloop_1); + const __m256d tmp_qloop_24 = _mm256_mul_pd(tmp_qloop_0,_mm256_set_pd(jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE)); + const __m256d tmp_qloop_25 = _mm256_mul_pd(tmp_qloop_1,_mm256_set_pd(jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE)); + const __m256d tmp_qloop_26 = _mm256_mul_pd(tmp_qloop_0,_mm256_set_pd(jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE)); + const __m256d tmp_qloop_27 = _mm256_mul_pd(tmp_qloop_1,_mm256_set_pd(jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE)); + const __m256d tmp_qloop_30 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-8.0,-8.0,-8.0,-8.0),_mm256_set_pd(_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q],_data_q_p_1[q])),_mm256_mul_pd(tmp_qloop_0,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_set_pd(4.0,4.0,4.0,4.0)); + const __m256d tmp_qloop_32 = _mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-8.0,-8.0,-8.0,-8.0),_mm256_set_pd(_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q],_data_q_p_0[q])),_mm256_mul_pd(tmp_qloop_1,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0))),_mm256_set_pd(4.0,4.0,4.0,4.0)); + const __m256d q_tmp_0_0 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_21,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_13,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_2,_mm256_set_pd(jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE)),_mm256_mul_pd(tmp_qloop_2,_mm256_set_pd(jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE)))),_mm256_mul_pd(tmp_qloop_14,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_2,_mm256_set_pd(jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE)),_mm256_mul_pd(tmp_qloop_2,_mm256_set_pd(jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE)))))),_mm256_add_pd(_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE),_mm256_set_pd(tmp_moved_constant_2,tmp_moved_constant_2,tmp_moved_constant_2,tmp_moved_constant_2)),_mm256_mul_pd(_mm256_set_pd(jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE),_mm256_set_pd(tmp_moved_constant_5,tmp_moved_constant_5,tmp_moved_constant_5,tmp_moved_constant_5))),_mm256_mul_pd(_mm256_set_pd(jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE),_mm256_set_pd(tmp_moved_constant_2,tmp_moved_constant_2,tmp_moved_constant_2,tmp_moved_constant_2))),_mm256_mul_pd(_mm256_set_pd(jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE),_mm256_set_pd(tmp_moved_constant_5,tmp_moved_constant_5,tmp_moved_constant_5,tmp_moved_constant_5)))); + const __m256d q_tmp_1_1 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_21,_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(4.0,4.0,4.0,4.0),_mm256_mul_pd(_mm256_set_pd(jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE),_mm256_set_pd(jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE))),_mm256_mul_pd(_mm256_set_pd(4.0,4.0,4.0,4.0),_mm256_mul_pd(_mm256_set_pd(jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE),_mm256_set_pd(jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE))))),_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_13,tmp_qloop_22),_mm256_set_pd(jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_14,tmp_qloop_22),_mm256_set_pd(jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE)))); + const __m256d q_tmp_2_2 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_21,_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(4.0,4.0,4.0,4.0),_mm256_mul_pd(_mm256_set_pd(jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE),_mm256_set_pd(jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE))),_mm256_mul_pd(_mm256_set_pd(4.0,4.0,4.0,4.0),_mm256_mul_pd(_mm256_set_pd(jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE),_mm256_set_pd(jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE))))),_mm256_add_pd(_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_13,tmp_qloop_23),_mm256_set_pd(jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE)),_mm256_mul_pd(_mm256_mul_pd(tmp_qloop_14,tmp_qloop_23),_mm256_set_pd(jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE)))); + const __m256d q_tmp_3_3 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_21,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_13,_mm256_add_pd(tmp_qloop_24,tmp_qloop_25)),_mm256_mul_pd(tmp_qloop_14,_mm256_add_pd(tmp_qloop_26,tmp_qloop_27)))),_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE),_mm256_set_pd(tmp_moved_constant_6,tmp_moved_constant_6,tmp_moved_constant_6,tmp_moved_constant_6)),_mm256_mul_pd(_mm256_set_pd(jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE),_mm256_set_pd(tmp_moved_constant_7,tmp_moved_constant_7,tmp_moved_constant_7,tmp_moved_constant_7)))); + const __m256d q_tmp_4_4 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_21,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_13,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_25,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_30,_mm256_set_pd(jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE)))),_mm256_mul_pd(tmp_qloop_14,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_27,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_30,_mm256_set_pd(jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE)))))),_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_moved_constant_0,tmp_moved_constant_0,tmp_moved_constant_0,tmp_moved_constant_0)),_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_moved_constant_6,tmp_moved_constant_6,tmp_moved_constant_6,tmp_moved_constant_6))),_mm256_set_pd(jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE,jac_affine_inv_1_0_BLUE)),_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_moved_constant_3,tmp_moved_constant_3,tmp_moved_constant_3,tmp_moved_constant_3)),_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_moved_constant_7,tmp_moved_constant_7,tmp_moved_constant_7,tmp_moved_constant_7))),_mm256_set_pd(jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE,jac_affine_inv_1_1_BLUE))),_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_moved_constant_8,tmp_moved_constant_8,tmp_moved_constant_8,tmp_moved_constant_8)))); + const __m256d q_tmp_5_5 = _mm256_mul_pd(_mm256_mul_pd(tmp_qloop_21,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_13,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_24,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_32,_mm256_set_pd(jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE)))),_mm256_mul_pd(tmp_qloop_14,_mm256_add_pd(_mm256_mul_pd(tmp_qloop_26,_mm256_set_pd(-1.0,-1.0,-1.0,-1.0)),_mm256_mul_pd(tmp_qloop_32,_mm256_set_pd(jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE)))))),_mm256_add_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_moved_constant_1,tmp_moved_constant_1,tmp_moved_constant_1,tmp_moved_constant_1)),_mm256_mul_pd(_mm256_set_pd(-8.0,-8.0,-8.0,-8.0),_mm256_set_pd(jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE))),_mm256_set_pd(jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE,jac_affine_inv_0_0_BLUE)),_mm256_mul_pd(_mm256_add_pd(_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_moved_constant_4,tmp_moved_constant_4,tmp_moved_constant_4,tmp_moved_constant_4)),_mm256_mul_pd(_mm256_set_pd(-8.0,-8.0,-8.0,-8.0),_mm256_set_pd(jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE))),_mm256_set_pd(jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE,jac_affine_inv_0_1_BLUE))),_mm256_mul_pd(_mm256_set_pd(-1.0,-1.0,-1.0,-1.0),_mm256_set_pd(tmp_moved_constant_8,tmp_moved_constant_8,tmp_moved_constant_8,tmp_moved_constant_8)))); + q_acc_0_0 = _mm256_add_pd(q_acc_0_0,q_tmp_0_0); + q_acc_1_1 = _mm256_add_pd(q_acc_1_1,q_tmp_1_1); + q_acc_2_2 = _mm256_add_pd(q_acc_2_2,q_tmp_2_2); + q_acc_3_3 = _mm256_add_pd(q_acc_3_3,q_tmp_3_3); + q_acc_4_4 = _mm256_add_pd(q_acc_4_4,q_tmp_4_4); + q_acc_5_5 = _mm256_add_pd(q_acc_5_5,q_tmp_5_5); + } + const __m256d elMatDiag_0 = q_acc_0_0; + const __m256d elMatDiag_1 = q_acc_1_1; + const __m256d elMatDiag_2 = q_acc_2_2; + const __m256d elMatDiag_3 = q_acc_3_3; + const __m256d elMatDiag_4 = q_acc_4_4; + const __m256d elMatDiag_5 = q_acc_5_5; + _mm256_storeu_pd(&_data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1],_mm256_add_pd(elMatDiag_0,_mm256_loadu_pd(& _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]))); + _mm256_storeu_pd(&_data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))],_mm256_add_pd(elMatDiag_1,_mm256_loadu_pd(& _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]))); + _mm256_storeu_pd(&_data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1],_mm256_add_pd(elMatDiag_2,_mm256_loadu_pd(& _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]))); + _mm256_storeu_pd(&_data_invDiag_Edge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))],_mm256_add_pd(elMatDiag_3,_mm256_loadu_pd(& _data_invDiag_Edge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]))); + _mm256_storeu_pd(&_data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1],_mm256_add_pd(elMatDiag_4,_mm256_loadu_pd(& _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]))); + _mm256_storeu_pd(&_data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))],_mm256_add_pd(elMatDiag_5,_mm256_loadu_pd(& _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]))); + } + for (int64_t ctr_0 = (int64_t)((-ctr_1 + micro_edges_per_macro_edge - 1) / (4)) * (4); ctr_0 < -ctr_1 + micro_edges_per_macro_edge - 1; ctr_0 += 1) + { + + const int64_t phantom_ctr_0 = ctr_0; + real_t _data_float_loop_ctr_array_dim_0[4]; + _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0; + _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1; + _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2; + _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3; + real_t _data_float_loop_ctr_array_dim_1[4]; + _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1; + + const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; + const real_t diffusivity_times_delta_dof_0 = _data_diffusivity_times_deltaVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + const real_t diffusivity_times_delta_dof_1 = _data_diffusivity_times_deltaVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t diffusivity_times_delta_dof_2 = _data_diffusivity_times_deltaVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]; + const real_t diffusivity_times_delta_dof_3 = _data_diffusivity_times_deltaEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t diffusivity_times_delta_dof_4 = _data_diffusivity_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]; + const real_t diffusivity_times_delta_dof_5 = _data_diffusivity_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t wx_dof_0 = _data_wxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + const real_t wx_dof_1 = _data_wxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t wx_dof_2 = _data_wxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]; + const real_t wx_dof_3 = _data_wxEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t wx_dof_4 = _data_wxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]; + const real_t wx_dof_5 = _data_wxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t wy_dof_0 = _data_wyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + const real_t wy_dof_1 = _data_wyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t wy_dof_2 = _data_wyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]; + const real_t wy_dof_3 = _data_wyEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t wy_dof_4 = _data_wyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]; + const real_t wy_dof_5 = _data_wyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + real_t q_acc_0_0 = 0.0; + real_t q_acc_1_1 = 0.0; + real_t q_acc_2_2 = 0.0; + real_t q_acc_3_3 = 0.0; + real_t q_acc_4_4 = 0.0; + real_t q_acc_5_5 = 0.0; + for (int64_t q = 0; q < 4; q += 1) + { + const real_t tmp_qloop_0 = 4.0*_data_q_p_0[q]; + const real_t tmp_qloop_1 = 4.0*_data_q_p_1[q]; + const real_t tmp_qloop_2 = tmp_qloop_0 + tmp_qloop_1 - 3.0; + const real_t tmp_qloop_3 = tmp_qloop_0*_data_q_p_1[q]; + const real_t tmp_qloop_4 = (_data_q_p_0[q]*_data_q_p_0[q]); + const real_t tmp_qloop_5 = tmp_qloop_4*2.0; + const real_t tmp_qloop_6 = tmp_qloop_5 - _data_q_p_0[q]; + const real_t tmp_qloop_7 = (_data_q_p_1[q]*_data_q_p_1[q]); + const real_t tmp_qloop_8 = tmp_qloop_7*2.0; + const real_t tmp_qloop_9 = tmp_qloop_8 - _data_q_p_1[q]; + const real_t tmp_qloop_10 = tmp_qloop_1 - tmp_qloop_3 + tmp_qloop_7*-4.0; + const real_t tmp_qloop_11 = tmp_qloop_0 - tmp_qloop_3 + tmp_qloop_4*-4.0; + const real_t tmp_qloop_12 = tmp_qloop_3 + tmp_qloop_5 + tmp_qloop_8 - 3.0*_data_q_p_0[q] - 3.0*_data_q_p_1[q] + 1.0; + const real_t tmp_qloop_13 = tmp_qloop_10*wx_dof_4 + tmp_qloop_11*wx_dof_5 + tmp_qloop_12*wx_dof_0 + tmp_qloop_3*wx_dof_3 + tmp_qloop_6*wx_dof_1 + tmp_qloop_9*wx_dof_2; + const real_t tmp_qloop_14 = tmp_qloop_10*wy_dof_4 + tmp_qloop_11*wy_dof_5 + tmp_qloop_12*wy_dof_0 + tmp_qloop_3*wy_dof_3 + tmp_qloop_6*wy_dof_1 + tmp_qloop_9*wy_dof_2; + const real_t tmp_qloop_21 = abs_det_jac_affine_BLUE*(diffusivity_times_delta_dof_0*tmp_qloop_12 + diffusivity_times_delta_dof_1*tmp_qloop_6 + diffusivity_times_delta_dof_2*tmp_qloop_9 + diffusivity_times_delta_dof_3*tmp_qloop_3 + diffusivity_times_delta_dof_4*tmp_qloop_10 + diffusivity_times_delta_dof_5*tmp_qloop_11)*_data_q_w[q]; + const real_t tmp_qloop_22 = tmp_qloop_0 - 1.0; + const real_t tmp_qloop_23 = tmp_qloop_1 - 1.0; + const real_t tmp_qloop_24 = jac_affine_inv_1_0_BLUE*tmp_qloop_0; + const real_t tmp_qloop_25 = jac_affine_inv_0_0_BLUE*tmp_qloop_1; + const real_t tmp_qloop_26 = jac_affine_inv_1_1_BLUE*tmp_qloop_0; + const real_t tmp_qloop_27 = jac_affine_inv_0_1_BLUE*tmp_qloop_1; + const real_t tmp_qloop_30 = -tmp_qloop_0 - 8.0*_data_q_p_1[q] + 4.0; + const real_t tmp_qloop_32 = -tmp_qloop_1 - 8.0*_data_q_p_0[q] + 4.0; + const real_t q_tmp_0_0 = tmp_qloop_21*(tmp_qloop_13*(jac_affine_inv_0_0_BLUE*tmp_qloop_2 + jac_affine_inv_1_0_BLUE*tmp_qloop_2) + tmp_qloop_14*(jac_affine_inv_0_1_BLUE*tmp_qloop_2 + jac_affine_inv_1_1_BLUE*tmp_qloop_2))*(jac_affine_inv_0_0_BLUE*tmp_moved_constant_2 + jac_affine_inv_0_1_BLUE*tmp_moved_constant_5 + jac_affine_inv_1_0_BLUE*tmp_moved_constant_2 + jac_affine_inv_1_1_BLUE*tmp_moved_constant_5); + const real_t q_tmp_1_1 = tmp_qloop_21*((jac_affine_inv_0_0_BLUE*jac_affine_inv_0_0_BLUE)*4.0 + (jac_affine_inv_0_1_BLUE*jac_affine_inv_0_1_BLUE)*4.0)*(jac_affine_inv_0_0_BLUE*tmp_qloop_13*tmp_qloop_22 + jac_affine_inv_0_1_BLUE*tmp_qloop_14*tmp_qloop_22); + const real_t q_tmp_2_2 = tmp_qloop_21*((jac_affine_inv_1_0_BLUE*jac_affine_inv_1_0_BLUE)*4.0 + (jac_affine_inv_1_1_BLUE*jac_affine_inv_1_1_BLUE)*4.0)*(jac_affine_inv_1_0_BLUE*tmp_qloop_13*tmp_qloop_23 + jac_affine_inv_1_1_BLUE*tmp_qloop_14*tmp_qloop_23); + const real_t q_tmp_3_3 = tmp_qloop_21*(jac_affine_inv_0_0_BLUE*tmp_moved_constant_6 + jac_affine_inv_0_1_BLUE*tmp_moved_constant_7)*(tmp_qloop_13*(tmp_qloop_24 + tmp_qloop_25) + tmp_qloop_14*(tmp_qloop_26 + tmp_qloop_27)); + const real_t q_tmp_4_4 = tmp_qloop_21*(tmp_qloop_13*(jac_affine_inv_1_0_BLUE*tmp_qloop_30 - tmp_qloop_25) + tmp_qloop_14*(jac_affine_inv_1_1_BLUE*tmp_qloop_30 - tmp_qloop_27))*(jac_affine_inv_1_0_BLUE*(-tmp_moved_constant_0 - tmp_moved_constant_6) + jac_affine_inv_1_1_BLUE*(-tmp_moved_constant_3 - tmp_moved_constant_7) - tmp_moved_constant_8); + const real_t q_tmp_5_5 = tmp_qloop_21*(tmp_qloop_13*(jac_affine_inv_0_0_BLUE*tmp_qloop_32 - tmp_qloop_24) + tmp_qloop_14*(jac_affine_inv_0_1_BLUE*tmp_qloop_32 - tmp_qloop_26))*(jac_affine_inv_0_0_BLUE*(jac_affine_inv_0_0_BLUE*-8.0 - tmp_moved_constant_1) + jac_affine_inv_0_1_BLUE*(jac_affine_inv_0_1_BLUE*-8.0 - tmp_moved_constant_4) - tmp_moved_constant_8); + q_acc_0_0 = q_acc_0_0 + q_tmp_0_0; + q_acc_1_1 = q_acc_1_1 + q_tmp_1_1; + q_acc_2_2 = q_acc_2_2 + q_tmp_2_2; + q_acc_3_3 = q_acc_3_3 + q_tmp_3_3; + q_acc_4_4 = q_acc_4_4 + q_tmp_4_4; + q_acc_5_5 = q_acc_5_5 + q_tmp_5_5; + } + const real_t elMatDiag_0 = q_acc_0_0; + const real_t elMatDiag_1 = q_acc_1_1; + const real_t elMatDiag_2 = q_acc_2_2; + const real_t elMatDiag_3 = q_acc_3_3; + const real_t elMatDiag_4 = q_acc_4_4; + const real_t elMatDiag_5 = q_acc_5_5; + _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1] = elMatDiag_0 + _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatDiag_1 + _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1] = elMatDiag_2 + _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]; + _data_invDiag_Edge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatDiag_3 + _data_invDiag_Edge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1] = elMatDiag_4 + _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]; + _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))] = elMatDiag_5 + _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + } + } + } + } +} +} // namespace operatorgeneration + +} // namespace hyteg diff --git a/operators/supg_diffusion/noarch/P2ElementwiseSupgDiffusionAnnulusMap_apply_P2ElementwiseSupgDiffusionAnnulusMap_macro_2D.cpp b/operators/supg_diffusion/noarch/P2ElementwiseSupgDiffusionAnnulusMap_apply_P2ElementwiseSupgDiffusionAnnulusMap_macro_2D.cpp new file mode 100644 index 0000000000000000000000000000000000000000..e916dd3df26a9612c30ab78cea0fae817c0a5c03 --- /dev/null +++ b/operators/supg_diffusion/noarch/P2ElementwiseSupgDiffusionAnnulusMap_apply_P2ElementwiseSupgDiffusionAnnulusMap_macro_2D.cpp @@ -0,0 +1,767 @@ +/* +* Copyright (c) 2017-2024 Nils Kohl, Daniel Bauer, Fabian Böhm. +* +* This file is part of HyTeG +* (see https://i10git.cs.fau.de/hyteg/hyteg). +* +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. +* +* You should have received a copy of the GNU General Public License +* along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +/* +* The entire file was generated with the HyTeG Operator Generator. +* +* Avoid modifying this file. If buggy, consider fixing the generator itself. +*/ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +#include "../P2ElementwiseSupgDiffusionAnnulusMap.hpp" + +#define FUNC_PREFIX + +namespace hyteg { + +namespace operatorgeneration { + +void P2ElementwiseSupgDiffusionAnnulusMap::apply_P2ElementwiseSupgDiffusionAnnulusMap_macro_2D( real_t * RESTRICT _data_diffusivity_times_deltaEdge, real_t * RESTRICT _data_diffusivity_times_deltaVertex, real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t * RESTRICT _data_wxEdge, real_t * RESTRICT _data_wxVertex, real_t * RESTRICT _data_wyEdge, real_t * RESTRICT _data_wyVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const +{ + { + const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669}; + + const real_t _data_q_p_0 [] = {0.33333333333333331, 0.20000000000000001, 0.59999999999999998, 0.20000000000000001}; + + const real_t _data_q_p_1 [] = {0.33333333333333331, 0.59999999999999998, 0.20000000000000001, 0.20000000000000001}; + + const real_t tmp_coords_jac_0_GRAY = 1.0 / (micro_edges_per_macro_edge_float)*1.0; + const real_t p_affine_const_0_0_GRAY = macro_vertex_coord_id_0comp0; + const real_t p_affine_const_0_1_GRAY = macro_vertex_coord_id_0comp1; + const real_t p_affine_const_1_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0); + const real_t p_affine_const_1_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1); + const real_t p_affine_const_2_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0); + const real_t p_affine_const_2_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1); + const real_t jac_affine_0_0_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_1_0_GRAY; + const real_t jac_affine_0_1_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_2_0_GRAY; + const real_t jac_affine_1_0_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_1_1_GRAY; + const real_t jac_affine_1_1_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_2_1_GRAY; + const real_t tmp_coords_jac_1_GRAY = jac_affine_0_0_GRAY*jac_affine_1_1_GRAY - jac_affine_0_1_GRAY*jac_affine_1_0_GRAY; + const real_t tmp_coords_jac_2_GRAY = 1.0 / (tmp_coords_jac_1_GRAY); + const real_t jac_affine_inv_0_0_GRAY = jac_affine_1_1_GRAY*tmp_coords_jac_2_GRAY; + const real_t jac_affine_inv_0_1_GRAY = -jac_affine_0_1_GRAY*tmp_coords_jac_2_GRAY; + const real_t jac_affine_inv_1_0_GRAY = -jac_affine_1_0_GRAY*tmp_coords_jac_2_GRAY; + const real_t jac_affine_inv_1_1_GRAY = jac_affine_0_0_GRAY*tmp_coords_jac_2_GRAY; + const real_t abs_det_jac_affine_GRAY = abs(tmp_coords_jac_1_GRAY); + const real_t tmp_qloop_1 = -rayVertex_1 + thrVertex_1; + const real_t tmp_qloop_7 = rayVertex_0 - thrVertex_0; + const real_t tmp_qloop_8 = -tmp_qloop_7; + const real_t tmp_qloop_9 = 1.0 / (tmp_qloop_1*(-rayVertex_0 + refVertex_0) - tmp_qloop_8*(-rayVertex_1 + refVertex_1)); + const real_t tmp_qloop_10 = -radRayVertex + radRefVertex; + const real_t tmp_qloop_11 = tmp_qloop_10*tmp_qloop_9; + const real_t tmp_qloop_12 = tmp_qloop_11*1.0; + const real_t tmp_qloop_37 = jac_affine_inv_0_0_GRAY*4.0; + const real_t tmp_qloop_38 = jac_affine_inv_1_0_GRAY*4.0; + const real_t tmp_qloop_39 = tmp_qloop_37 + tmp_qloop_38; + const real_t tmp_qloop_40 = jac_affine_inv_0_0_GRAY*tmp_qloop_39 + jac_affine_inv_1_0_GRAY*tmp_qloop_39; + const real_t tmp_qloop_41 = jac_affine_inv_0_1_GRAY*4.0; + const real_t tmp_qloop_42 = jac_affine_inv_1_1_GRAY*4.0; + const real_t tmp_qloop_43 = tmp_qloop_41 + tmp_qloop_42; + const real_t tmp_qloop_44 = jac_affine_inv_0_0_GRAY*tmp_qloop_43 + jac_affine_inv_1_0_GRAY*tmp_qloop_43; + const real_t tmp_qloop_45 = jac_affine_inv_0_1_GRAY*tmp_qloop_39 + jac_affine_inv_1_1_GRAY*tmp_qloop_39; + const real_t tmp_qloop_46 = jac_affine_inv_0_1_GRAY*tmp_qloop_43 + jac_affine_inv_1_1_GRAY*tmp_qloop_43; + const real_t tmp_qloop_83 = jac_affine_inv_0_1_GRAY*tmp_qloop_37; + const real_t tmp_qloop_84 = (jac_affine_inv_0_0_GRAY*jac_affine_inv_0_0_GRAY)*4.0; + const real_t tmp_qloop_85 = (jac_affine_inv_0_1_GRAY*jac_affine_inv_0_1_GRAY)*4.0; + const real_t tmp_qloop_90 = jac_affine_inv_1_1_GRAY*tmp_qloop_38; + const real_t tmp_qloop_91 = (jac_affine_inv_1_0_GRAY*jac_affine_inv_1_0_GRAY)*4.0; + const real_t tmp_qloop_92 = (jac_affine_inv_1_1_GRAY*jac_affine_inv_1_1_GRAY)*4.0; + const real_t tmp_qloop_97 = jac_affine_inv_1_0_GRAY*8.0; + const real_t tmp_qloop_98 = jac_affine_inv_0_0_GRAY*tmp_qloop_97; + const real_t tmp_qloop_99 = jac_affine_inv_1_1_GRAY*tmp_qloop_37; + const real_t tmp_qloop_100 = jac_affine_inv_0_1_GRAY*tmp_qloop_38; + const real_t tmp_qloop_101 = tmp_qloop_100 + tmp_qloop_99; + const real_t tmp_qloop_102 = jac_affine_inv_1_1_GRAY*8.0; + const real_t tmp_qloop_103 = jac_affine_inv_0_1_GRAY*tmp_qloop_102; + const real_t tmp_qloop_111 = jac_affine_inv_1_0_GRAY*tmp_qloop_37; + const real_t tmp_qloop_112 = -tmp_qloop_37 - tmp_qloop_97; + const real_t tmp_qloop_113 = jac_affine_inv_1_0_GRAY*tmp_qloop_112 - tmp_qloop_111; + const real_t tmp_qloop_114 = -tmp_qloop_102 - tmp_qloop_41; + const real_t tmp_qloop_115 = jac_affine_inv_1_0_GRAY*tmp_qloop_114 - tmp_qloop_99; + const real_t tmp_qloop_116 = jac_affine_inv_1_1_GRAY*tmp_qloop_112 - tmp_qloop_100; + const real_t tmp_qloop_117 = jac_affine_inv_1_1_GRAY*tmp_qloop_41; + const real_t tmp_qloop_118 = jac_affine_inv_1_1_GRAY*tmp_qloop_114 - tmp_qloop_117; + const real_t tmp_qloop_123 = jac_affine_inv_0_0_GRAY*-8.0 - tmp_qloop_38; + const real_t tmp_qloop_124 = jac_affine_inv_0_0_GRAY*tmp_qloop_123 - tmp_qloop_111; + const real_t tmp_qloop_125 = jac_affine_inv_0_1_GRAY*-8.0 - tmp_qloop_42; + const real_t tmp_qloop_126 = jac_affine_inv_0_0_GRAY*tmp_qloop_125 - tmp_qloop_100; + const real_t tmp_qloop_127 = jac_affine_inv_0_1_GRAY*tmp_qloop_123 - tmp_qloop_99; + const real_t tmp_qloop_128 = jac_affine_inv_0_1_GRAY*tmp_qloop_125 - tmp_qloop_117; + { + /* FaceType.GRAY */ + for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1) + for (int64_t ctr_0 = 0; ctr_0 < -ctr_1 + micro_edges_per_macro_edge; ctr_0 += 1) + { + + const int64_t phantom_ctr_0 = ctr_0; + real_t _data_float_loop_ctr_array_dim_0[4]; + _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0; + _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1; + _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2; + _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3; + real_t _data_float_loop_ctr_array_dim_1[4]; + _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1; + + const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; + const real_t src_dof_0 = _data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]; + const real_t src_dof_1 = _data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + const real_t src_dof_2 = _data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t src_dof_3 = _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t src_dof_4 = _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t src_dof_5 = _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]; + const real_t diffusivity_times_delta_dof_0 = _data_diffusivity_times_deltaVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]; + const real_t diffusivity_times_delta_dof_1 = _data_diffusivity_times_deltaVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + const real_t diffusivity_times_delta_dof_2 = _data_diffusivity_times_deltaVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t diffusivity_times_delta_dof_3 = _data_diffusivity_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t diffusivity_times_delta_dof_4 = _data_diffusivity_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t diffusivity_times_delta_dof_5 = _data_diffusivity_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]; + const real_t wx_dof_0 = _data_wxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]; + const real_t wx_dof_1 = _data_wxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + const real_t wx_dof_2 = _data_wxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t wx_dof_3 = _data_wxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t wx_dof_4 = _data_wxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t wx_dof_5 = _data_wxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]; + const real_t wy_dof_0 = _data_wyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]; + const real_t wy_dof_1 = _data_wyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + const real_t wy_dof_2 = _data_wyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t wy_dof_3 = _data_wyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t wy_dof_4 = _data_wyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t wy_dof_5 = _data_wyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]; + real_t q_acc_0_0 = 0.0; + real_t q_acc_0_1 = 0.0; + real_t q_acc_0_2 = 0.0; + real_t q_acc_0_3 = 0.0; + real_t q_acc_0_4 = 0.0; + real_t q_acc_0_5 = 0.0; + real_t q_acc_1_0 = 0.0; + real_t q_acc_1_1 = 0.0; + real_t q_acc_1_2 = 0.0; + real_t q_acc_1_3 = 0.0; + real_t q_acc_1_4 = 0.0; + real_t q_acc_1_5 = 0.0; + real_t q_acc_2_0 = 0.0; + real_t q_acc_2_1 = 0.0; + real_t q_acc_2_2 = 0.0; + real_t q_acc_2_3 = 0.0; + real_t q_acc_2_4 = 0.0; + real_t q_acc_2_5 = 0.0; + real_t q_acc_3_0 = 0.0; + real_t q_acc_3_1 = 0.0; + real_t q_acc_3_2 = 0.0; + real_t q_acc_3_3 = 0.0; + real_t q_acc_3_4 = 0.0; + real_t q_acc_3_5 = 0.0; + real_t q_acc_4_0 = 0.0; + real_t q_acc_4_1 = 0.0; + real_t q_acc_4_2 = 0.0; + real_t q_acc_4_3 = 0.0; + real_t q_acc_4_4 = 0.0; + real_t q_acc_4_5 = 0.0; + real_t q_acc_5_0 = 0.0; + real_t q_acc_5_1 = 0.0; + real_t q_acc_5_2 = 0.0; + real_t q_acc_5_3 = 0.0; + real_t q_acc_5_4 = 0.0; + real_t q_acc_5_5 = 0.0; + for (int64_t q = 0; q < 4; q += 1) + { + const real_t tmp_qloop_0 = p_affine_0_0 + (-p_affine_0_0 + p_affine_1_0)*_data_q_p_0[q] + (-p_affine_0_0 + p_affine_2_0)*_data_q_p_1[q]; + const real_t tmp_qloop_2 = (tmp_qloop_0*tmp_qloop_0); + const real_t tmp_qloop_3 = p_affine_0_1 + (-p_affine_0_1 + p_affine_1_1)*_data_q_p_0[q] + (-p_affine_0_1 + p_affine_2_1)*_data_q_p_1[q]; + const real_t tmp_qloop_4 = (tmp_qloop_3*tmp_qloop_3); + const real_t tmp_qloop_5 = tmp_qloop_2 + tmp_qloop_4; + const real_t tmp_qloop_6 = pow(tmp_qloop_5, -0.50000000000000000); + const real_t tmp_qloop_13 = tmp_qloop_12*tmp_qloop_6; + const real_t tmp_qloop_14 = tmp_qloop_1*tmp_qloop_13; + const real_t tmp_qloop_15 = pow(tmp_qloop_5, -1.5000000000000000); + const real_t tmp_qloop_16 = radRayVertex + tmp_qloop_11*(tmp_qloop_1*(-rayVertex_0 + tmp_qloop_0) - tmp_qloop_8*(-rayVertex_1 + tmp_qloop_3)); + const real_t tmp_qloop_17 = tmp_qloop_15*tmp_qloop_16; + const real_t tmp_qloop_18 = tmp_qloop_17*1.0; + const real_t tmp_qloop_19 = tmp_qloop_13*tmp_qloop_8; + const real_t tmp_qloop_20 = tmp_qloop_0*tmp_qloop_18; + const real_t tmp_qloop_23 = tmp_qloop_12*tmp_qloop_15; + const real_t tmp_qloop_24 = tmp_qloop_1*tmp_qloop_23; + const real_t tmp_qloop_25 = tmp_qloop_2*tmp_qloop_24; + const real_t tmp_qloop_26 = tmp_qloop_16*pow(tmp_qloop_5, -2.5000000000000000)*3.0; + const real_t tmp_qloop_27 = tmp_qloop_0*tmp_qloop_26*tmp_qloop_4; + const real_t tmp_qloop_28 = tmp_qloop_24*tmp_qloop_4 - tmp_qloop_27; + const real_t tmp_qloop_29 = tmp_qloop_23*tmp_qloop_7; + const real_t tmp_qloop_30 = tmp_qloop_0*tmp_qloop_3; + const real_t tmp_qloop_31 = tmp_qloop_24*tmp_qloop_30; + const real_t tmp_qloop_32 = tmp_qloop_17*2.0; + const real_t tmp_qloop_33 = tmp_qloop_2*tmp_qloop_26*tmp_qloop_3; + const real_t tmp_qloop_34 = tmp_qloop_18*tmp_qloop_3 - tmp_qloop_33; + const real_t tmp_qloop_35 = tmp_qloop_29*tmp_qloop_30; + const real_t tmp_qloop_36 = tmp_qloop_23*tmp_qloop_30*tmp_qloop_8; + const real_t tmp_qloop_47 = 4.0*_data_q_p_0[q]; + const real_t tmp_qloop_48 = 4.0*_data_q_p_1[q]; + const real_t tmp_qloop_49 = tmp_qloop_47 + tmp_qloop_48 - 3.0; + const real_t tmp_qloop_50 = jac_affine_inv_0_0_GRAY*tmp_qloop_49 + jac_affine_inv_1_0_GRAY*tmp_qloop_49; + const real_t tmp_qloop_54 = jac_affine_inv_0_1_GRAY*tmp_qloop_49 + jac_affine_inv_1_1_GRAY*tmp_qloop_49; + const real_t tmp_qloop_69 = tmp_qloop_47*_data_q_p_1[q]; + const real_t tmp_qloop_70 = (_data_q_p_0[q]*_data_q_p_0[q]); + const real_t tmp_qloop_71 = tmp_qloop_70*2.0; + const real_t tmp_qloop_72 = tmp_qloop_71 - _data_q_p_0[q]; + const real_t tmp_qloop_73 = (_data_q_p_1[q]*_data_q_p_1[q]); + const real_t tmp_qloop_74 = tmp_qloop_73*2.0; + const real_t tmp_qloop_75 = tmp_qloop_74 - _data_q_p_1[q]; + const real_t tmp_qloop_76 = tmp_qloop_48 - tmp_qloop_69 + tmp_qloop_73*-4.0; + const real_t tmp_qloop_77 = tmp_qloop_47 - tmp_qloop_69 + tmp_qloop_70*-4.0; + const real_t tmp_qloop_78 = tmp_qloop_69 + tmp_qloop_71 + tmp_qloop_74 - 3.0*_data_q_p_0[q] - 3.0*_data_q_p_1[q] + 1.0; + const real_t tmp_qloop_79 = tmp_qloop_69*wx_dof_3 + tmp_qloop_72*wx_dof_1 + tmp_qloop_75*wx_dof_2 + tmp_qloop_76*wx_dof_4 + tmp_qloop_77*wx_dof_5 + tmp_qloop_78*wx_dof_0; + const real_t tmp_qloop_80 = tmp_qloop_69*wy_dof_3 + tmp_qloop_72*wy_dof_1 + tmp_qloop_75*wy_dof_2 + tmp_qloop_76*wy_dof_4 + tmp_qloop_77*wy_dof_5 + tmp_qloop_78*wy_dof_0; + const real_t tmp_qloop_86 = tmp_qloop_47 - 1.0; + const real_t tmp_qloop_87 = jac_affine_inv_0_0_GRAY*tmp_qloop_86; + const real_t tmp_qloop_88 = jac_affine_inv_0_1_GRAY*tmp_qloop_86; + const real_t tmp_qloop_93 = tmp_qloop_48 - 1.0; + const real_t tmp_qloop_94 = jac_affine_inv_1_0_GRAY*tmp_qloop_93; + const real_t tmp_qloop_95 = jac_affine_inv_1_1_GRAY*tmp_qloop_93; + const real_t tmp_qloop_104 = jac_affine_inv_1_0_GRAY*tmp_qloop_47; + const real_t tmp_qloop_105 = jac_affine_inv_0_0_GRAY*tmp_qloop_48; + const real_t tmp_qloop_106 = tmp_qloop_104 + tmp_qloop_105; + const real_t tmp_qloop_107 = jac_affine_inv_1_1_GRAY*tmp_qloop_47; + const real_t tmp_qloop_108 = jac_affine_inv_0_1_GRAY*tmp_qloop_48; + const real_t tmp_qloop_109 = tmp_qloop_107 + tmp_qloop_108; + const real_t tmp_qloop_119 = -tmp_qloop_47 - 8.0*_data_q_p_1[q] + 4.0; + const real_t tmp_qloop_120 = jac_affine_inv_1_0_GRAY*tmp_qloop_119 - tmp_qloop_105; + const real_t tmp_qloop_121 = jac_affine_inv_1_1_GRAY*tmp_qloop_119 - tmp_qloop_108; + const real_t tmp_qloop_129 = -tmp_qloop_48 - 8.0*_data_q_p_0[q] + 4.0; + const real_t tmp_qloop_130 = jac_affine_inv_0_0_GRAY*tmp_qloop_129 - tmp_qloop_104; + const real_t tmp_qloop_131 = jac_affine_inv_0_1_GRAY*tmp_qloop_129 - tmp_qloop_107; + const real_t jac_blending_0_0 = tmp_qloop_0*tmp_qloop_14 + tmp_qloop_18*tmp_qloop_4; + const real_t jac_blending_0_1 = -tmp_qloop_0*tmp_qloop_15*tmp_qloop_16*tmp_qloop_3 - tmp_qloop_0*tmp_qloop_19; + const real_t jac_blending_1_0 = tmp_qloop_14*tmp_qloop_3 - tmp_qloop_20*tmp_qloop_3; + const real_t jac_blending_1_1 = tmp_qloop_15*tmp_qloop_16*tmp_qloop_2*1.0 - tmp_qloop_19*tmp_qloop_3; + const real_t tmp_qloop_21 = jac_blending_0_0*jac_blending_1_1 - jac_blending_0_1*jac_blending_1_0; + const real_t tmp_qloop_22 = 1.0 / (tmp_qloop_21); + const real_t abs_det_jac_blending = tmp_qloop_21; + const real_t tmp_qloop_81 = abs_det_jac_affine_GRAY*abs_det_jac_blending*(diffusivity_times_delta_dof_0*tmp_qloop_78 + diffusivity_times_delta_dof_1*tmp_qloop_72 + diffusivity_times_delta_dof_2*tmp_qloop_75 + diffusivity_times_delta_dof_3*tmp_qloop_69 + diffusivity_times_delta_dof_4*tmp_qloop_76 + diffusivity_times_delta_dof_5*tmp_qloop_77)*_data_q_w[q]; + const real_t jac_blending_inv_0_0 = jac_blending_1_1*tmp_qloop_22; + const real_t jac_blending_inv_0_1 = -jac_blending_0_1*tmp_qloop_22; + const real_t jac_blending_inv_1_0 = -jac_blending_1_0*tmp_qloop_22; + const real_t jac_blending_inv_1_1 = jac_blending_0_0*tmp_qloop_22; + const real_t tmp_qloop_82 = tmp_qloop_81*(tmp_qloop_79*(jac_blending_inv_0_0*tmp_qloop_50 + jac_blending_inv_1_0*tmp_qloop_54) + tmp_qloop_80*(jac_blending_inv_0_1*tmp_qloop_50 + jac_blending_inv_1_1*tmp_qloop_54)); + const real_t tmp_qloop_133 = tmp_qloop_81*(tmp_qloop_79*(jac_blending_inv_0_0*tmp_qloop_87 + jac_blending_inv_1_0*tmp_qloop_88) + tmp_qloop_80*(jac_blending_inv_0_1*tmp_qloop_87 + jac_blending_inv_1_1*tmp_qloop_88)); + const real_t tmp_qloop_134 = tmp_qloop_81*(tmp_qloop_79*(jac_blending_inv_0_0*tmp_qloop_94 + jac_blending_inv_1_0*tmp_qloop_95) + tmp_qloop_80*(jac_blending_inv_0_1*tmp_qloop_94 + jac_blending_inv_1_1*tmp_qloop_95)); + const real_t tmp_qloop_135 = tmp_qloop_81*(tmp_qloop_79*(jac_blending_inv_0_0*tmp_qloop_106 + jac_blending_inv_1_0*tmp_qloop_109) + tmp_qloop_80*(jac_blending_inv_0_1*tmp_qloop_106 + jac_blending_inv_1_1*tmp_qloop_109)); + const real_t tmp_qloop_136 = tmp_qloop_81*(tmp_qloop_79*(jac_blending_inv_0_0*tmp_qloop_120 + jac_blending_inv_1_0*tmp_qloop_121) + tmp_qloop_80*(jac_blending_inv_0_1*tmp_qloop_120 + jac_blending_inv_1_1*tmp_qloop_121)); + const real_t tmp_qloop_137 = tmp_qloop_81*(tmp_qloop_79*(jac_blending_inv_0_0*tmp_qloop_130 + jac_blending_inv_1_0*tmp_qloop_131) + tmp_qloop_80*(jac_blending_inv_0_1*tmp_qloop_130 + jac_blending_inv_1_1*tmp_qloop_131)); + const real_t hessian_blending_0_0_0 = tmp_qloop_14 - tmp_qloop_25 + tmp_qloop_28; + const real_t hessian_blending_1_0_0 = -tmp_qloop_26*(tmp_qloop_3*tmp_qloop_3*tmp_qloop_3) + tmp_qloop_29*tmp_qloop_4 + tmp_qloop_3*tmp_qloop_32 - tmp_qloop_31; + const real_t hessian_blending_0_0_1 = tmp_qloop_1*tmp_qloop_11*tmp_qloop_15*tmp_qloop_30*-2.0 - tmp_qloop_34; + const real_t hessian_blending_1_0_1 = tmp_qloop_1*tmp_qloop_10*tmp_qloop_6*tmp_qloop_9*1.0 - tmp_qloop_20 - tmp_qloop_28 - tmp_qloop_35; + const real_t hessian_blending_0_1_0 = tmp_qloop_10*tmp_qloop_15*tmp_qloop_2*tmp_qloop_8*tmp_qloop_9*1.0 - tmp_qloop_19 - tmp_qloop_31 - tmp_qloop_34; + const real_t tmp_qloop_51 = -hessian_blending_0_0_0*jac_blending_inv_0_0 - hessian_blending_0_1_0*jac_blending_inv_1_0; + const real_t tmp_qloop_60 = -hessian_blending_0_0_0*jac_blending_inv_0_1 - hessian_blending_0_1_0*jac_blending_inv_1_1; + const real_t hessian_blending_1_1_0 = -tmp_qloop_20 + tmp_qloop_27 - tmp_qloop_35 + tmp_qloop_36; + const real_t tmp_qloop_56 = -hessian_blending_1_0_0*jac_blending_inv_0_0 - hessian_blending_1_1_0*jac_blending_inv_1_0; + const real_t tmp_qloop_64 = -hessian_blending_1_0_0*jac_blending_inv_0_1 - hessian_blending_1_1_0*jac_blending_inv_1_1; + const real_t hessian_blending_0_1_1 = tmp_qloop_0*tmp_qloop_32 - (tmp_qloop_0*tmp_qloop_0*tmp_qloop_0)*tmp_qloop_26 + tmp_qloop_25 + tmp_qloop_36; + const real_t tmp_qloop_52 = -hessian_blending_0_0_1*jac_blending_inv_0_0 - hessian_blending_0_1_1*jac_blending_inv_1_0; + const real_t tmp_qloop_53 = jac_blending_inv_0_0*tmp_qloop_51 + jac_blending_inv_0_1*tmp_qloop_52; + const real_t tmp_qloop_55 = jac_blending_inv_1_0*tmp_qloop_51 + jac_blending_inv_1_1*tmp_qloop_52; + const real_t tmp_qloop_61 = -hessian_blending_0_0_1*jac_blending_inv_0_1 - hessian_blending_0_1_1*jac_blending_inv_1_1; + const real_t tmp_qloop_62 = jac_blending_inv_0_0*tmp_qloop_60 + jac_blending_inv_0_1*tmp_qloop_61; + const real_t tmp_qloop_63 = jac_blending_inv_1_0*tmp_qloop_60 + jac_blending_inv_1_1*tmp_qloop_61; + const real_t hessian_blending_1_1_1 = tmp_qloop_10*tmp_qloop_15*tmp_qloop_2*tmp_qloop_7*tmp_qloop_9*1.0 + tmp_qloop_10*tmp_qloop_15*tmp_qloop_4*tmp_qloop_8*tmp_qloop_9*1.0 - tmp_qloop_19 - tmp_qloop_33; + const real_t tmp_qloop_57 = -hessian_blending_1_0_1*jac_blending_inv_0_0 - hessian_blending_1_1_1*jac_blending_inv_1_0; + const real_t tmp_qloop_58 = jac_blending_inv_0_0*tmp_qloop_56 + jac_blending_inv_0_1*tmp_qloop_57; + const real_t tmp_qloop_59 = jac_blending_inv_1_0*tmp_qloop_56 + jac_blending_inv_1_1*tmp_qloop_57; + const real_t tmp_qloop_65 = -hessian_blending_1_0_1*jac_blending_inv_0_1 - hessian_blending_1_1_1*jac_blending_inv_1_1; + const real_t tmp_qloop_66 = jac_blending_inv_0_0*tmp_qloop_64 + jac_blending_inv_0_1*tmp_qloop_65; + const real_t tmp_qloop_67 = jac_blending_inv_1_0*tmp_qloop_64 + jac_blending_inv_1_1*tmp_qloop_65; + const real_t tmp_qloop_68 = jac_blending_inv_0_0*(jac_blending_inv_0_0*tmp_qloop_40 + jac_blending_inv_1_0*tmp_qloop_44) + jac_blending_inv_0_0*(tmp_qloop_50*tmp_qloop_53 + tmp_qloop_54*tmp_qloop_55) + jac_blending_inv_0_1*(jac_blending_inv_0_1*tmp_qloop_40 + jac_blending_inv_1_1*tmp_qloop_44) + jac_blending_inv_0_1*(tmp_qloop_50*tmp_qloop_58 + tmp_qloop_54*tmp_qloop_59) + jac_blending_inv_1_0*(jac_blending_inv_0_0*tmp_qloop_45 + jac_blending_inv_1_0*tmp_qloop_46) + jac_blending_inv_1_0*(tmp_qloop_50*tmp_qloop_62 + tmp_qloop_54*tmp_qloop_63) + jac_blending_inv_1_1*(jac_blending_inv_0_1*tmp_qloop_45 + jac_blending_inv_1_1*tmp_qloop_46) + jac_blending_inv_1_1*(tmp_qloop_50*tmp_qloop_66 + tmp_qloop_54*tmp_qloop_67); + const real_t tmp_qloop_89 = jac_blending_inv_0_0*(jac_blending_inv_0_0*tmp_qloop_84 + jac_blending_inv_1_0*tmp_qloop_83) + jac_blending_inv_0_0*(tmp_qloop_53*tmp_qloop_87 + tmp_qloop_55*tmp_qloop_88) + jac_blending_inv_0_1*(jac_blending_inv_0_1*tmp_qloop_84 + jac_blending_inv_1_1*tmp_qloop_83) + jac_blending_inv_0_1*(tmp_qloop_58*tmp_qloop_87 + tmp_qloop_59*tmp_qloop_88) + jac_blending_inv_1_0*(jac_blending_inv_0_0*tmp_qloop_83 + jac_blending_inv_1_0*tmp_qloop_85) + jac_blending_inv_1_0*(tmp_qloop_62*tmp_qloop_87 + tmp_qloop_63*tmp_qloop_88) + jac_blending_inv_1_1*(jac_blending_inv_0_1*tmp_qloop_83 + jac_blending_inv_1_1*tmp_qloop_85) + jac_blending_inv_1_1*(tmp_qloop_66*tmp_qloop_87 + tmp_qloop_67*tmp_qloop_88); + const real_t tmp_qloop_96 = jac_blending_inv_0_0*(jac_blending_inv_0_0*tmp_qloop_91 + jac_blending_inv_1_0*tmp_qloop_90) + jac_blending_inv_0_0*(tmp_qloop_53*tmp_qloop_94 + tmp_qloop_55*tmp_qloop_95) + jac_blending_inv_0_1*(jac_blending_inv_0_1*tmp_qloop_91 + jac_blending_inv_1_1*tmp_qloop_90) + jac_blending_inv_0_1*(tmp_qloop_58*tmp_qloop_94 + tmp_qloop_59*tmp_qloop_95) + jac_blending_inv_1_0*(jac_blending_inv_0_0*tmp_qloop_90 + jac_blending_inv_1_0*tmp_qloop_92) + jac_blending_inv_1_0*(tmp_qloop_62*tmp_qloop_94 + tmp_qloop_63*tmp_qloop_95) + jac_blending_inv_1_1*(jac_blending_inv_0_1*tmp_qloop_90 + jac_blending_inv_1_1*tmp_qloop_92) + jac_blending_inv_1_1*(tmp_qloop_66*tmp_qloop_94 + tmp_qloop_67*tmp_qloop_95); + const real_t tmp_qloop_110 = jac_blending_inv_0_0*(jac_blending_inv_0_0*tmp_qloop_98 + jac_blending_inv_1_0*tmp_qloop_101) + jac_blending_inv_0_0*(tmp_qloop_106*tmp_qloop_53 + tmp_qloop_109*tmp_qloop_55) + jac_blending_inv_0_1*(jac_blending_inv_0_1*tmp_qloop_98 + jac_blending_inv_1_1*tmp_qloop_101) + jac_blending_inv_0_1*(tmp_qloop_106*tmp_qloop_58 + tmp_qloop_109*tmp_qloop_59) + jac_blending_inv_1_0*(jac_blending_inv_0_0*tmp_qloop_101 + jac_blending_inv_1_0*tmp_qloop_103) + jac_blending_inv_1_0*(tmp_qloop_106*tmp_qloop_62 + tmp_qloop_109*tmp_qloop_63) + jac_blending_inv_1_1*(jac_blending_inv_0_1*tmp_qloop_101 + jac_blending_inv_1_1*tmp_qloop_103) + jac_blending_inv_1_1*(tmp_qloop_106*tmp_qloop_66 + tmp_qloop_109*tmp_qloop_67); + const real_t tmp_qloop_122 = jac_blending_inv_0_0*(jac_blending_inv_0_0*tmp_qloop_113 + jac_blending_inv_1_0*tmp_qloop_115) + jac_blending_inv_0_0*(tmp_qloop_120*tmp_qloop_53 + tmp_qloop_121*tmp_qloop_55) + jac_blending_inv_0_1*(jac_blending_inv_0_1*tmp_qloop_113 + jac_blending_inv_1_1*tmp_qloop_115) + jac_blending_inv_0_1*(tmp_qloop_120*tmp_qloop_58 + tmp_qloop_121*tmp_qloop_59) + jac_blending_inv_1_0*(jac_blending_inv_0_0*tmp_qloop_116 + jac_blending_inv_1_0*tmp_qloop_118) + jac_blending_inv_1_0*(tmp_qloop_120*tmp_qloop_62 + tmp_qloop_121*tmp_qloop_63) + jac_blending_inv_1_1*(jac_blending_inv_0_1*tmp_qloop_116 + jac_blending_inv_1_1*tmp_qloop_118) + jac_blending_inv_1_1*(tmp_qloop_120*tmp_qloop_66 + tmp_qloop_121*tmp_qloop_67); + const real_t tmp_qloop_132 = jac_blending_inv_0_0*(jac_blending_inv_0_0*tmp_qloop_124 + jac_blending_inv_1_0*tmp_qloop_126) + jac_blending_inv_0_0*(tmp_qloop_130*tmp_qloop_53 + tmp_qloop_131*tmp_qloop_55) + jac_blending_inv_0_1*(jac_blending_inv_0_1*tmp_qloop_124 + jac_blending_inv_1_1*tmp_qloop_126) + jac_blending_inv_0_1*(tmp_qloop_130*tmp_qloop_58 + tmp_qloop_131*tmp_qloop_59) + jac_blending_inv_1_0*(jac_blending_inv_0_0*tmp_qloop_127 + jac_blending_inv_1_0*tmp_qloop_128) + jac_blending_inv_1_0*(tmp_qloop_130*tmp_qloop_62 + tmp_qloop_131*tmp_qloop_63) + jac_blending_inv_1_1*(jac_blending_inv_0_1*tmp_qloop_127 + jac_blending_inv_1_1*tmp_qloop_128) + jac_blending_inv_1_1*(tmp_qloop_130*tmp_qloop_66 + tmp_qloop_131*tmp_qloop_67); + const real_t q_tmp_0_0 = tmp_qloop_68*tmp_qloop_82; + const real_t q_tmp_0_1 = tmp_qloop_82*tmp_qloop_89; + const real_t q_tmp_0_2 = tmp_qloop_82*tmp_qloop_96; + const real_t q_tmp_0_3 = tmp_qloop_110*tmp_qloop_82; + const real_t q_tmp_0_4 = tmp_qloop_122*tmp_qloop_82; + const real_t q_tmp_0_5 = tmp_qloop_132*tmp_qloop_82; + const real_t q_tmp_1_0 = tmp_qloop_133*tmp_qloop_68; + const real_t q_tmp_1_1 = tmp_qloop_133*tmp_qloop_89; + const real_t q_tmp_1_2 = tmp_qloop_133*tmp_qloop_96; + const real_t q_tmp_1_3 = tmp_qloop_110*tmp_qloop_133; + const real_t q_tmp_1_4 = tmp_qloop_122*tmp_qloop_133; + const real_t q_tmp_1_5 = tmp_qloop_132*tmp_qloop_133; + const real_t q_tmp_2_0 = tmp_qloop_134*tmp_qloop_68; + const real_t q_tmp_2_1 = tmp_qloop_134*tmp_qloop_89; + const real_t q_tmp_2_2 = tmp_qloop_134*tmp_qloop_96; + const real_t q_tmp_2_3 = tmp_qloop_110*tmp_qloop_134; + const real_t q_tmp_2_4 = tmp_qloop_122*tmp_qloop_134; + const real_t q_tmp_2_5 = tmp_qloop_132*tmp_qloop_134; + const real_t q_tmp_3_0 = tmp_qloop_135*tmp_qloop_68; + const real_t q_tmp_3_1 = tmp_qloop_135*tmp_qloop_89; + const real_t q_tmp_3_2 = tmp_qloop_135*tmp_qloop_96; + const real_t q_tmp_3_3 = tmp_qloop_110*tmp_qloop_135; + const real_t q_tmp_3_4 = tmp_qloop_122*tmp_qloop_135; + const real_t q_tmp_3_5 = tmp_qloop_132*tmp_qloop_135; + const real_t q_tmp_4_0 = tmp_qloop_136*tmp_qloop_68; + const real_t q_tmp_4_1 = tmp_qloop_136*tmp_qloop_89; + const real_t q_tmp_4_2 = tmp_qloop_136*tmp_qloop_96; + const real_t q_tmp_4_3 = tmp_qloop_110*tmp_qloop_136; + const real_t q_tmp_4_4 = tmp_qloop_122*tmp_qloop_136; + const real_t q_tmp_4_5 = tmp_qloop_132*tmp_qloop_136; + const real_t q_tmp_5_0 = tmp_qloop_137*tmp_qloop_68; + const real_t q_tmp_5_1 = tmp_qloop_137*tmp_qloop_89; + const real_t q_tmp_5_2 = tmp_qloop_137*tmp_qloop_96; + const real_t q_tmp_5_3 = tmp_qloop_110*tmp_qloop_137; + const real_t q_tmp_5_4 = tmp_qloop_122*tmp_qloop_137; + const real_t q_tmp_5_5 = tmp_qloop_132*tmp_qloop_137; + q_acc_0_0 = q_acc_0_0 + q_tmp_0_0; + q_acc_0_1 = q_acc_0_1 + q_tmp_0_1; + q_acc_0_2 = q_acc_0_2 + q_tmp_0_2; + q_acc_0_3 = q_acc_0_3 + q_tmp_0_3; + q_acc_0_4 = q_acc_0_4 + q_tmp_0_4; + q_acc_0_5 = q_acc_0_5 + q_tmp_0_5; + q_acc_1_0 = q_acc_1_0 + q_tmp_1_0; + q_acc_1_1 = q_acc_1_1 + q_tmp_1_1; + q_acc_1_2 = q_acc_1_2 + q_tmp_1_2; + q_acc_1_3 = q_acc_1_3 + q_tmp_1_3; + q_acc_1_4 = q_acc_1_4 + q_tmp_1_4; + q_acc_1_5 = q_acc_1_5 + q_tmp_1_5; + q_acc_2_0 = q_acc_2_0 + q_tmp_2_0; + q_acc_2_1 = q_acc_2_1 + q_tmp_2_1; + q_acc_2_2 = q_acc_2_2 + q_tmp_2_2; + q_acc_2_3 = q_acc_2_3 + q_tmp_2_3; + q_acc_2_4 = q_acc_2_4 + q_tmp_2_4; + q_acc_2_5 = q_acc_2_5 + q_tmp_2_5; + q_acc_3_0 = q_acc_3_0 + q_tmp_3_0; + q_acc_3_1 = q_acc_3_1 + q_tmp_3_1; + q_acc_3_2 = q_acc_3_2 + q_tmp_3_2; + q_acc_3_3 = q_acc_3_3 + q_tmp_3_3; + q_acc_3_4 = q_acc_3_4 + q_tmp_3_4; + q_acc_3_5 = q_acc_3_5 + q_tmp_3_5; + q_acc_4_0 = q_acc_4_0 + q_tmp_4_0; + q_acc_4_1 = q_acc_4_1 + q_tmp_4_1; + q_acc_4_2 = q_acc_4_2 + q_tmp_4_2; + q_acc_4_3 = q_acc_4_3 + q_tmp_4_3; + q_acc_4_4 = q_acc_4_4 + q_tmp_4_4; + q_acc_4_5 = q_acc_4_5 + q_tmp_4_5; + q_acc_5_0 = q_acc_5_0 + q_tmp_5_0; + q_acc_5_1 = q_acc_5_1 + q_tmp_5_1; + q_acc_5_2 = q_acc_5_2 + q_tmp_5_2; + q_acc_5_3 = q_acc_5_3 + q_tmp_5_3; + q_acc_5_4 = q_acc_5_4 + q_tmp_5_4; + q_acc_5_5 = q_acc_5_5 + q_tmp_5_5; + } + const real_t elMatVec_0 = q_acc_0_0*src_dof_0 + q_acc_0_1*src_dof_1 + q_acc_0_2*src_dof_2 + q_acc_0_3*src_dof_3 + q_acc_0_4*src_dof_4 + q_acc_0_5*src_dof_5; + const real_t elMatVec_1 = q_acc_1_0*src_dof_0 + q_acc_1_1*src_dof_1 + q_acc_1_2*src_dof_2 + q_acc_1_3*src_dof_3 + q_acc_1_4*src_dof_4 + q_acc_1_5*src_dof_5; + const real_t elMatVec_2 = q_acc_2_0*src_dof_0 + q_acc_2_1*src_dof_1 + q_acc_2_2*src_dof_2 + q_acc_2_3*src_dof_3 + q_acc_2_4*src_dof_4 + q_acc_2_5*src_dof_5; + const real_t elMatVec_3 = q_acc_3_0*src_dof_0 + q_acc_3_1*src_dof_1 + q_acc_3_2*src_dof_2 + q_acc_3_3*src_dof_3 + q_acc_3_4*src_dof_4 + q_acc_3_5*src_dof_5; + const real_t elMatVec_4 = q_acc_4_0*src_dof_0 + q_acc_4_1*src_dof_1 + q_acc_4_2*src_dof_2 + q_acc_4_3*src_dof_3 + q_acc_4_4*src_dof_4 + q_acc_4_5*src_dof_5; + const real_t elMatVec_5 = q_acc_5_0*src_dof_0 + q_acc_5_1*src_dof_1 + q_acc_5_2*src_dof_2 + q_acc_5_3*src_dof_3 + q_acc_5_4*src_dof_4 + q_acc_5_5*src_dof_5; + _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))] = elMatVec_0 + _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]; + _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1] = elMatVec_1 + _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatVec_2 + _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))] = elMatVec_3 + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))] = elMatVec_4 + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))] = elMatVec_5 + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]; + } + } + const real_t tmp_coords_jac_0_BLUE = 1.0 / (micro_edges_per_macro_edge_float)*1.0; + const real_t tmp_coords_jac_1_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0); + const real_t tmp_coords_jac_2_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1); + const real_t tmp_coords_jac_3_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0); + const real_t tmp_coords_jac_4_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1); + const real_t p_affine_const_0_0_BLUE = tmp_coords_jac_1_BLUE; + const real_t p_affine_const_0_1_BLUE = tmp_coords_jac_2_BLUE; + const real_t p_affine_const_1_0_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_3_BLUE; + const real_t p_affine_const_1_1_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_4_BLUE; + const real_t p_affine_const_2_0_BLUE = tmp_coords_jac_1_BLUE + tmp_coords_jac_3_BLUE; + const real_t p_affine_const_2_1_BLUE = tmp_coords_jac_2_BLUE + tmp_coords_jac_4_BLUE; + const real_t jac_affine_0_0_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_1_0_BLUE; + const real_t jac_affine_0_1_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_2_0_BLUE; + const real_t jac_affine_1_0_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_1_1_BLUE; + const real_t jac_affine_1_1_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_2_1_BLUE; + const real_t tmp_coords_jac_5_BLUE = jac_affine_0_0_BLUE*jac_affine_1_1_BLUE - jac_affine_0_1_BLUE*jac_affine_1_0_BLUE; + const real_t tmp_coords_jac_6_BLUE = 1.0 / (tmp_coords_jac_5_BLUE); + const real_t jac_affine_inv_0_0_BLUE = jac_affine_1_1_BLUE*tmp_coords_jac_6_BLUE; + const real_t jac_affine_inv_0_1_BLUE = -jac_affine_0_1_BLUE*tmp_coords_jac_6_BLUE; + const real_t jac_affine_inv_1_0_BLUE = -jac_affine_1_0_BLUE*tmp_coords_jac_6_BLUE; + const real_t jac_affine_inv_1_1_BLUE = jac_affine_0_0_BLUE*tmp_coords_jac_6_BLUE; + const real_t abs_det_jac_affine_BLUE = abs(tmp_coords_jac_5_BLUE); + const real_t tmp_moved_constant_0 = jac_affine_inv_0_0_BLUE*4.0; + const real_t tmp_moved_constant_1 = jac_affine_inv_1_0_BLUE*4.0; + const real_t tmp_moved_constant_2 = tmp_moved_constant_0 + tmp_moved_constant_1; + const real_t tmp_moved_constant_3 = jac_affine_inv_0_0_BLUE*tmp_moved_constant_2 + jac_affine_inv_1_0_BLUE*tmp_moved_constant_2; + const real_t tmp_moved_constant_4 = jac_affine_inv_0_1_BLUE*4.0; + const real_t tmp_moved_constant_5 = jac_affine_inv_1_1_BLUE*4.0; + const real_t tmp_moved_constant_6 = tmp_moved_constant_4 + tmp_moved_constant_5; + const real_t tmp_moved_constant_7 = jac_affine_inv_0_0_BLUE*tmp_moved_constant_6 + jac_affine_inv_1_0_BLUE*tmp_moved_constant_6; + const real_t tmp_moved_constant_8 = jac_affine_inv_0_1_BLUE*tmp_moved_constant_2 + jac_affine_inv_1_1_BLUE*tmp_moved_constant_2; + const real_t tmp_moved_constant_9 = jac_affine_inv_0_1_BLUE*tmp_moved_constant_6 + jac_affine_inv_1_1_BLUE*tmp_moved_constant_6; + const real_t tmp_moved_constant_10 = jac_affine_inv_0_1_BLUE*tmp_moved_constant_0; + const real_t tmp_moved_constant_11 = (jac_affine_inv_0_0_BLUE*jac_affine_inv_0_0_BLUE)*4.0; + const real_t tmp_moved_constant_12 = (jac_affine_inv_0_1_BLUE*jac_affine_inv_0_1_BLUE)*4.0; + const real_t tmp_moved_constant_13 = jac_affine_inv_1_1_BLUE*tmp_moved_constant_1; + const real_t tmp_moved_constant_14 = (jac_affine_inv_1_0_BLUE*jac_affine_inv_1_0_BLUE)*4.0; + const real_t tmp_moved_constant_15 = (jac_affine_inv_1_1_BLUE*jac_affine_inv_1_1_BLUE)*4.0; + const real_t tmp_moved_constant_16 = jac_affine_inv_1_0_BLUE*8.0; + const real_t tmp_moved_constant_17 = jac_affine_inv_0_0_BLUE*tmp_moved_constant_16; + const real_t tmp_moved_constant_18 = jac_affine_inv_1_1_BLUE*tmp_moved_constant_0; + const real_t tmp_moved_constant_19 = jac_affine_inv_0_1_BLUE*tmp_moved_constant_1; + const real_t tmp_moved_constant_20 = tmp_moved_constant_18 + tmp_moved_constant_19; + const real_t tmp_moved_constant_21 = jac_affine_inv_1_1_BLUE*8.0; + const real_t tmp_moved_constant_22 = jac_affine_inv_0_1_BLUE*tmp_moved_constant_21; + const real_t tmp_moved_constant_23 = jac_affine_inv_1_0_BLUE*tmp_moved_constant_0; + const real_t tmp_moved_constant_24 = -tmp_moved_constant_0 - tmp_moved_constant_16; + const real_t tmp_moved_constant_25 = jac_affine_inv_1_0_BLUE*tmp_moved_constant_24 - tmp_moved_constant_23; + const real_t tmp_moved_constant_26 = -tmp_moved_constant_21 - tmp_moved_constant_4; + const real_t tmp_moved_constant_27 = jac_affine_inv_1_0_BLUE*tmp_moved_constant_26 - tmp_moved_constant_18; + const real_t tmp_moved_constant_28 = jac_affine_inv_1_1_BLUE*tmp_moved_constant_24 - tmp_moved_constant_19; + const real_t tmp_moved_constant_29 = jac_affine_inv_1_1_BLUE*tmp_moved_constant_4; + const real_t tmp_moved_constant_30 = jac_affine_inv_1_1_BLUE*tmp_moved_constant_26 - tmp_moved_constant_29; + const real_t tmp_moved_constant_31 = jac_affine_inv_0_0_BLUE*-8.0 - tmp_moved_constant_1; + const real_t tmp_moved_constant_32 = jac_affine_inv_0_0_BLUE*tmp_moved_constant_31 - tmp_moved_constant_23; + const real_t tmp_moved_constant_33 = jac_affine_inv_0_1_BLUE*-8.0 - tmp_moved_constant_5; + const real_t tmp_moved_constant_34 = jac_affine_inv_0_0_BLUE*tmp_moved_constant_33 - tmp_moved_constant_19; + const real_t tmp_moved_constant_35 = jac_affine_inv_0_1_BLUE*tmp_moved_constant_31 - tmp_moved_constant_18; + const real_t tmp_moved_constant_36 = jac_affine_inv_0_1_BLUE*tmp_moved_constant_33 - tmp_moved_constant_29; + { + /* FaceType.BLUE */ + for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1) + for (int64_t ctr_0 = 0; ctr_0 < -ctr_1 + micro_edges_per_macro_edge - 1; ctr_0 += 1) + { + + const int64_t phantom_ctr_0 = ctr_0; + real_t _data_float_loop_ctr_array_dim_0[4]; + _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0; + _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1; + _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2; + _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3; + real_t _data_float_loop_ctr_array_dim_1[4]; + _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1; + + const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; + const real_t src_dof_0 = _data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + const real_t src_dof_1 = _data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t src_dof_2 = _data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]; + const real_t src_dof_3 = _data_srcEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t src_dof_4 = _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]; + const real_t src_dof_5 = _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t diffusivity_times_delta_dof_0 = _data_diffusivity_times_deltaVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + const real_t diffusivity_times_delta_dof_1 = _data_diffusivity_times_deltaVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t diffusivity_times_delta_dof_2 = _data_diffusivity_times_deltaVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]; + const real_t diffusivity_times_delta_dof_3 = _data_diffusivity_times_deltaEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t diffusivity_times_delta_dof_4 = _data_diffusivity_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]; + const real_t diffusivity_times_delta_dof_5 = _data_diffusivity_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t wx_dof_0 = _data_wxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + const real_t wx_dof_1 = _data_wxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t wx_dof_2 = _data_wxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]; + const real_t wx_dof_3 = _data_wxEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t wx_dof_4 = _data_wxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]; + const real_t wx_dof_5 = _data_wxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t wy_dof_0 = _data_wyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + const real_t wy_dof_1 = _data_wyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t wy_dof_2 = _data_wyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]; + const real_t wy_dof_3 = _data_wyEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t wy_dof_4 = _data_wyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]; + const real_t wy_dof_5 = _data_wyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + real_t q_acc_0_0 = 0.0; + real_t q_acc_0_1 = 0.0; + real_t q_acc_0_2 = 0.0; + real_t q_acc_0_3 = 0.0; + real_t q_acc_0_4 = 0.0; + real_t q_acc_0_5 = 0.0; + real_t q_acc_1_0 = 0.0; + real_t q_acc_1_1 = 0.0; + real_t q_acc_1_2 = 0.0; + real_t q_acc_1_3 = 0.0; + real_t q_acc_1_4 = 0.0; + real_t q_acc_1_5 = 0.0; + real_t q_acc_2_0 = 0.0; + real_t q_acc_2_1 = 0.0; + real_t q_acc_2_2 = 0.0; + real_t q_acc_2_3 = 0.0; + real_t q_acc_2_4 = 0.0; + real_t q_acc_2_5 = 0.0; + real_t q_acc_3_0 = 0.0; + real_t q_acc_3_1 = 0.0; + real_t q_acc_3_2 = 0.0; + real_t q_acc_3_3 = 0.0; + real_t q_acc_3_4 = 0.0; + real_t q_acc_3_5 = 0.0; + real_t q_acc_4_0 = 0.0; + real_t q_acc_4_1 = 0.0; + real_t q_acc_4_2 = 0.0; + real_t q_acc_4_3 = 0.0; + real_t q_acc_4_4 = 0.0; + real_t q_acc_4_5 = 0.0; + real_t q_acc_5_0 = 0.0; + real_t q_acc_5_1 = 0.0; + real_t q_acc_5_2 = 0.0; + real_t q_acc_5_3 = 0.0; + real_t q_acc_5_4 = 0.0; + real_t q_acc_5_5 = 0.0; + for (int64_t q = 0; q < 4; q += 1) + { + const real_t tmp_qloop_0 = p_affine_0_0 + (-p_affine_0_0 + p_affine_1_0)*_data_q_p_0[q] + (-p_affine_0_0 + p_affine_2_0)*_data_q_p_1[q]; + const real_t tmp_qloop_2 = (tmp_qloop_0*tmp_qloop_0); + const real_t tmp_qloop_3 = p_affine_0_1 + (-p_affine_0_1 + p_affine_1_1)*_data_q_p_0[q] + (-p_affine_0_1 + p_affine_2_1)*_data_q_p_1[q]; + const real_t tmp_qloop_4 = (tmp_qloop_3*tmp_qloop_3); + const real_t tmp_qloop_5 = tmp_qloop_2 + tmp_qloop_4; + const real_t tmp_qloop_6 = pow(tmp_qloop_5, -0.50000000000000000); + const real_t tmp_qloop_13 = tmp_qloop_12*tmp_qloop_6; + const real_t tmp_qloop_14 = tmp_qloop_1*tmp_qloop_13; + const real_t tmp_qloop_15 = pow(tmp_qloop_5, -1.5000000000000000); + const real_t tmp_qloop_16 = radRayVertex + tmp_qloop_11*(tmp_qloop_1*(-rayVertex_0 + tmp_qloop_0) - tmp_qloop_8*(-rayVertex_1 + tmp_qloop_3)); + const real_t tmp_qloop_17 = tmp_qloop_15*tmp_qloop_16; + const real_t tmp_qloop_18 = tmp_qloop_17*1.0; + const real_t tmp_qloop_19 = tmp_qloop_13*tmp_qloop_8; + const real_t tmp_qloop_20 = tmp_qloop_0*tmp_qloop_18; + const real_t tmp_qloop_23 = tmp_qloop_12*tmp_qloop_15; + const real_t tmp_qloop_24 = tmp_qloop_1*tmp_qloop_23; + const real_t tmp_qloop_25 = tmp_qloop_2*tmp_qloop_24; + const real_t tmp_qloop_26 = tmp_qloop_16*pow(tmp_qloop_5, -2.5000000000000000)*3.0; + const real_t tmp_qloop_27 = tmp_qloop_0*tmp_qloop_26*tmp_qloop_4; + const real_t tmp_qloop_28 = tmp_qloop_24*tmp_qloop_4 - tmp_qloop_27; + const real_t tmp_qloop_29 = tmp_qloop_23*tmp_qloop_7; + const real_t tmp_qloop_30 = tmp_qloop_0*tmp_qloop_3; + const real_t tmp_qloop_31 = tmp_qloop_24*tmp_qloop_30; + const real_t tmp_qloop_32 = tmp_qloop_17*2.0; + const real_t tmp_qloop_33 = tmp_qloop_2*tmp_qloop_26*tmp_qloop_3; + const real_t tmp_qloop_34 = tmp_qloop_18*tmp_qloop_3 - tmp_qloop_33; + const real_t tmp_qloop_35 = tmp_qloop_29*tmp_qloop_30; + const real_t tmp_qloop_36 = tmp_qloop_23*tmp_qloop_30*tmp_qloop_8; + const real_t tmp_qloop_47 = 4.0*_data_q_p_0[q]; + const real_t tmp_qloop_48 = 4.0*_data_q_p_1[q]; + const real_t tmp_qloop_49 = tmp_qloop_47 + tmp_qloop_48 - 3.0; + const real_t tmp_qloop_50 = jac_affine_inv_0_0_BLUE*tmp_qloop_49 + jac_affine_inv_1_0_BLUE*tmp_qloop_49; + const real_t tmp_qloop_54 = jac_affine_inv_0_1_BLUE*tmp_qloop_49 + jac_affine_inv_1_1_BLUE*tmp_qloop_49; + const real_t tmp_qloop_69 = tmp_qloop_47*_data_q_p_1[q]; + const real_t tmp_qloop_70 = (_data_q_p_0[q]*_data_q_p_0[q]); + const real_t tmp_qloop_71 = tmp_qloop_70*2.0; + const real_t tmp_qloop_72 = tmp_qloop_71 - _data_q_p_0[q]; + const real_t tmp_qloop_73 = (_data_q_p_1[q]*_data_q_p_1[q]); + const real_t tmp_qloop_74 = tmp_qloop_73*2.0; + const real_t tmp_qloop_75 = tmp_qloop_74 - _data_q_p_1[q]; + const real_t tmp_qloop_76 = tmp_qloop_48 - tmp_qloop_69 + tmp_qloop_73*-4.0; + const real_t tmp_qloop_77 = tmp_qloop_47 - tmp_qloop_69 + tmp_qloop_70*-4.0; + const real_t tmp_qloop_78 = tmp_qloop_69 + tmp_qloop_71 + tmp_qloop_74 - 3.0*_data_q_p_0[q] - 3.0*_data_q_p_1[q] + 1.0; + const real_t tmp_qloop_79 = tmp_qloop_69*wx_dof_3 + tmp_qloop_72*wx_dof_1 + tmp_qloop_75*wx_dof_2 + tmp_qloop_76*wx_dof_4 + tmp_qloop_77*wx_dof_5 + tmp_qloop_78*wx_dof_0; + const real_t tmp_qloop_80 = tmp_qloop_69*wy_dof_3 + tmp_qloop_72*wy_dof_1 + tmp_qloop_75*wy_dof_2 + tmp_qloop_76*wy_dof_4 + tmp_qloop_77*wy_dof_5 + tmp_qloop_78*wy_dof_0; + const real_t tmp_qloop_86 = tmp_qloop_47 - 1.0; + const real_t tmp_qloop_87 = jac_affine_inv_0_0_BLUE*tmp_qloop_86; + const real_t tmp_qloop_88 = jac_affine_inv_0_1_BLUE*tmp_qloop_86; + const real_t tmp_qloop_93 = tmp_qloop_48 - 1.0; + const real_t tmp_qloop_94 = jac_affine_inv_1_0_BLUE*tmp_qloop_93; + const real_t tmp_qloop_95 = jac_affine_inv_1_1_BLUE*tmp_qloop_93; + const real_t tmp_qloop_104 = jac_affine_inv_1_0_BLUE*tmp_qloop_47; + const real_t tmp_qloop_105 = jac_affine_inv_0_0_BLUE*tmp_qloop_48; + const real_t tmp_qloop_106 = tmp_qloop_104 + tmp_qloop_105; + const real_t tmp_qloop_107 = jac_affine_inv_1_1_BLUE*tmp_qloop_47; + const real_t tmp_qloop_108 = jac_affine_inv_0_1_BLUE*tmp_qloop_48; + const real_t tmp_qloop_109 = tmp_qloop_107 + tmp_qloop_108; + const real_t tmp_qloop_119 = -tmp_qloop_47 - 8.0*_data_q_p_1[q] + 4.0; + const real_t tmp_qloop_120 = jac_affine_inv_1_0_BLUE*tmp_qloop_119 - tmp_qloop_105; + const real_t tmp_qloop_121 = jac_affine_inv_1_1_BLUE*tmp_qloop_119 - tmp_qloop_108; + const real_t tmp_qloop_129 = -tmp_qloop_48 - 8.0*_data_q_p_0[q] + 4.0; + const real_t tmp_qloop_130 = jac_affine_inv_0_0_BLUE*tmp_qloop_129 - tmp_qloop_104; + const real_t tmp_qloop_131 = jac_affine_inv_0_1_BLUE*tmp_qloop_129 - tmp_qloop_107; + const real_t jac_blending_0_0 = tmp_qloop_0*tmp_qloop_14 + tmp_qloop_18*tmp_qloop_4; + const real_t jac_blending_0_1 = -tmp_qloop_0*tmp_qloop_15*tmp_qloop_16*tmp_qloop_3 - tmp_qloop_0*tmp_qloop_19; + const real_t jac_blending_1_0 = tmp_qloop_14*tmp_qloop_3 - tmp_qloop_20*tmp_qloop_3; + const real_t jac_blending_1_1 = tmp_qloop_15*tmp_qloop_16*tmp_qloop_2*1.0 - tmp_qloop_19*tmp_qloop_3; + const real_t tmp_qloop_21 = jac_blending_0_0*jac_blending_1_1 - jac_blending_0_1*jac_blending_1_0; + const real_t tmp_qloop_22 = 1.0 / (tmp_qloop_21); + const real_t abs_det_jac_blending = tmp_qloop_21; + const real_t tmp_qloop_81 = abs_det_jac_affine_BLUE*abs_det_jac_blending*(diffusivity_times_delta_dof_0*tmp_qloop_78 + diffusivity_times_delta_dof_1*tmp_qloop_72 + diffusivity_times_delta_dof_2*tmp_qloop_75 + diffusivity_times_delta_dof_3*tmp_qloop_69 + diffusivity_times_delta_dof_4*tmp_qloop_76 + diffusivity_times_delta_dof_5*tmp_qloop_77)*_data_q_w[q]; + const real_t jac_blending_inv_0_0 = jac_blending_1_1*tmp_qloop_22; + const real_t jac_blending_inv_0_1 = -jac_blending_0_1*tmp_qloop_22; + const real_t jac_blending_inv_1_0 = -jac_blending_1_0*tmp_qloop_22; + const real_t jac_blending_inv_1_1 = jac_blending_0_0*tmp_qloop_22; + const real_t tmp_qloop_82 = tmp_qloop_81*(tmp_qloop_79*(jac_blending_inv_0_0*tmp_qloop_50 + jac_blending_inv_1_0*tmp_qloop_54) + tmp_qloop_80*(jac_blending_inv_0_1*tmp_qloop_50 + jac_blending_inv_1_1*tmp_qloop_54)); + const real_t tmp_qloop_133 = tmp_qloop_81*(tmp_qloop_79*(jac_blending_inv_0_0*tmp_qloop_87 + jac_blending_inv_1_0*tmp_qloop_88) + tmp_qloop_80*(jac_blending_inv_0_1*tmp_qloop_87 + jac_blending_inv_1_1*tmp_qloop_88)); + const real_t tmp_qloop_134 = tmp_qloop_81*(tmp_qloop_79*(jac_blending_inv_0_0*tmp_qloop_94 + jac_blending_inv_1_0*tmp_qloop_95) + tmp_qloop_80*(jac_blending_inv_0_1*tmp_qloop_94 + jac_blending_inv_1_1*tmp_qloop_95)); + const real_t tmp_qloop_135 = tmp_qloop_81*(tmp_qloop_79*(jac_blending_inv_0_0*tmp_qloop_106 + jac_blending_inv_1_0*tmp_qloop_109) + tmp_qloop_80*(jac_blending_inv_0_1*tmp_qloop_106 + jac_blending_inv_1_1*tmp_qloop_109)); + const real_t tmp_qloop_136 = tmp_qloop_81*(tmp_qloop_79*(jac_blending_inv_0_0*tmp_qloop_120 + jac_blending_inv_1_0*tmp_qloop_121) + tmp_qloop_80*(jac_blending_inv_0_1*tmp_qloop_120 + jac_blending_inv_1_1*tmp_qloop_121)); + const real_t tmp_qloop_137 = tmp_qloop_81*(tmp_qloop_79*(jac_blending_inv_0_0*tmp_qloop_130 + jac_blending_inv_1_0*tmp_qloop_131) + tmp_qloop_80*(jac_blending_inv_0_1*tmp_qloop_130 + jac_blending_inv_1_1*tmp_qloop_131)); + const real_t hessian_blending_0_0_0 = tmp_qloop_14 - tmp_qloop_25 + tmp_qloop_28; + const real_t hessian_blending_1_0_0 = -tmp_qloop_26*(tmp_qloop_3*tmp_qloop_3*tmp_qloop_3) + tmp_qloop_29*tmp_qloop_4 + tmp_qloop_3*tmp_qloop_32 - tmp_qloop_31; + const real_t hessian_blending_0_0_1 = tmp_qloop_1*tmp_qloop_11*tmp_qloop_15*tmp_qloop_30*-2.0 - tmp_qloop_34; + const real_t hessian_blending_1_0_1 = tmp_qloop_1*tmp_qloop_10*tmp_qloop_6*tmp_qloop_9*1.0 - tmp_qloop_20 - tmp_qloop_28 - tmp_qloop_35; + const real_t hessian_blending_0_1_0 = tmp_qloop_10*tmp_qloop_15*tmp_qloop_2*tmp_qloop_8*tmp_qloop_9*1.0 - tmp_qloop_19 - tmp_qloop_31 - tmp_qloop_34; + const real_t tmp_qloop_51 = -hessian_blending_0_0_0*jac_blending_inv_0_0 - hessian_blending_0_1_0*jac_blending_inv_1_0; + const real_t tmp_qloop_60 = -hessian_blending_0_0_0*jac_blending_inv_0_1 - hessian_blending_0_1_0*jac_blending_inv_1_1; + const real_t hessian_blending_1_1_0 = -tmp_qloop_20 + tmp_qloop_27 - tmp_qloop_35 + tmp_qloop_36; + const real_t tmp_qloop_56 = -hessian_blending_1_0_0*jac_blending_inv_0_0 - hessian_blending_1_1_0*jac_blending_inv_1_0; + const real_t tmp_qloop_64 = -hessian_blending_1_0_0*jac_blending_inv_0_1 - hessian_blending_1_1_0*jac_blending_inv_1_1; + const real_t hessian_blending_0_1_1 = tmp_qloop_0*tmp_qloop_32 - (tmp_qloop_0*tmp_qloop_0*tmp_qloop_0)*tmp_qloop_26 + tmp_qloop_25 + tmp_qloop_36; + const real_t tmp_qloop_52 = -hessian_blending_0_0_1*jac_blending_inv_0_0 - hessian_blending_0_1_1*jac_blending_inv_1_0; + const real_t tmp_qloop_53 = jac_blending_inv_0_0*tmp_qloop_51 + jac_blending_inv_0_1*tmp_qloop_52; + const real_t tmp_qloop_55 = jac_blending_inv_1_0*tmp_qloop_51 + jac_blending_inv_1_1*tmp_qloop_52; + const real_t tmp_qloop_61 = -hessian_blending_0_0_1*jac_blending_inv_0_1 - hessian_blending_0_1_1*jac_blending_inv_1_1; + const real_t tmp_qloop_62 = jac_blending_inv_0_0*tmp_qloop_60 + jac_blending_inv_0_1*tmp_qloop_61; + const real_t tmp_qloop_63 = jac_blending_inv_1_0*tmp_qloop_60 + jac_blending_inv_1_1*tmp_qloop_61; + const real_t hessian_blending_1_1_1 = tmp_qloop_10*tmp_qloop_15*tmp_qloop_2*tmp_qloop_7*tmp_qloop_9*1.0 + tmp_qloop_10*tmp_qloop_15*tmp_qloop_4*tmp_qloop_8*tmp_qloop_9*1.0 - tmp_qloop_19 - tmp_qloop_33; + const real_t tmp_qloop_57 = -hessian_blending_1_0_1*jac_blending_inv_0_0 - hessian_blending_1_1_1*jac_blending_inv_1_0; + const real_t tmp_qloop_58 = jac_blending_inv_0_0*tmp_qloop_56 + jac_blending_inv_0_1*tmp_qloop_57; + const real_t tmp_qloop_59 = jac_blending_inv_1_0*tmp_qloop_56 + jac_blending_inv_1_1*tmp_qloop_57; + const real_t tmp_qloop_65 = -hessian_blending_1_0_1*jac_blending_inv_0_1 - hessian_blending_1_1_1*jac_blending_inv_1_1; + const real_t tmp_qloop_66 = jac_blending_inv_0_0*tmp_qloop_64 + jac_blending_inv_0_1*tmp_qloop_65; + const real_t tmp_qloop_67 = jac_blending_inv_1_0*tmp_qloop_64 + jac_blending_inv_1_1*tmp_qloop_65; + const real_t tmp_qloop_68 = jac_blending_inv_0_0*(jac_blending_inv_0_0*tmp_moved_constant_3 + jac_blending_inv_1_0*tmp_moved_constant_7) + jac_blending_inv_0_0*(tmp_qloop_50*tmp_qloop_53 + tmp_qloop_54*tmp_qloop_55) + jac_blending_inv_0_1*(jac_blending_inv_0_1*tmp_moved_constant_3 + jac_blending_inv_1_1*tmp_moved_constant_7) + jac_blending_inv_0_1*(tmp_qloop_50*tmp_qloop_58 + tmp_qloop_54*tmp_qloop_59) + jac_blending_inv_1_0*(jac_blending_inv_0_0*tmp_moved_constant_8 + jac_blending_inv_1_0*tmp_moved_constant_9) + jac_blending_inv_1_0*(tmp_qloop_50*tmp_qloop_62 + tmp_qloop_54*tmp_qloop_63) + jac_blending_inv_1_1*(jac_blending_inv_0_1*tmp_moved_constant_8 + jac_blending_inv_1_1*tmp_moved_constant_9) + jac_blending_inv_1_1*(tmp_qloop_50*tmp_qloop_66 + tmp_qloop_54*tmp_qloop_67); + const real_t tmp_qloop_89 = jac_blending_inv_0_0*(jac_blending_inv_0_0*tmp_moved_constant_11 + jac_blending_inv_1_0*tmp_moved_constant_10) + jac_blending_inv_0_0*(tmp_qloop_53*tmp_qloop_87 + tmp_qloop_55*tmp_qloop_88) + jac_blending_inv_0_1*(jac_blending_inv_0_1*tmp_moved_constant_11 + jac_blending_inv_1_1*tmp_moved_constant_10) + jac_blending_inv_0_1*(tmp_qloop_58*tmp_qloop_87 + tmp_qloop_59*tmp_qloop_88) + jac_blending_inv_1_0*(jac_blending_inv_0_0*tmp_moved_constant_10 + jac_blending_inv_1_0*tmp_moved_constant_12) + jac_blending_inv_1_0*(tmp_qloop_62*tmp_qloop_87 + tmp_qloop_63*tmp_qloop_88) + jac_blending_inv_1_1*(jac_blending_inv_0_1*tmp_moved_constant_10 + jac_blending_inv_1_1*tmp_moved_constant_12) + jac_blending_inv_1_1*(tmp_qloop_66*tmp_qloop_87 + tmp_qloop_67*tmp_qloop_88); + const real_t tmp_qloop_96 = jac_blending_inv_0_0*(jac_blending_inv_0_0*tmp_moved_constant_14 + jac_blending_inv_1_0*tmp_moved_constant_13) + jac_blending_inv_0_0*(tmp_qloop_53*tmp_qloop_94 + tmp_qloop_55*tmp_qloop_95) + jac_blending_inv_0_1*(jac_blending_inv_0_1*tmp_moved_constant_14 + jac_blending_inv_1_1*tmp_moved_constant_13) + jac_blending_inv_0_1*(tmp_qloop_58*tmp_qloop_94 + tmp_qloop_59*tmp_qloop_95) + jac_blending_inv_1_0*(jac_blending_inv_0_0*tmp_moved_constant_13 + jac_blending_inv_1_0*tmp_moved_constant_15) + jac_blending_inv_1_0*(tmp_qloop_62*tmp_qloop_94 + tmp_qloop_63*tmp_qloop_95) + jac_blending_inv_1_1*(jac_blending_inv_0_1*tmp_moved_constant_13 + jac_blending_inv_1_1*tmp_moved_constant_15) + jac_blending_inv_1_1*(tmp_qloop_66*tmp_qloop_94 + tmp_qloop_67*tmp_qloop_95); + const real_t tmp_qloop_110 = jac_blending_inv_0_0*(jac_blending_inv_0_0*tmp_moved_constant_17 + jac_blending_inv_1_0*tmp_moved_constant_20) + jac_blending_inv_0_0*(tmp_qloop_106*tmp_qloop_53 + tmp_qloop_109*tmp_qloop_55) + jac_blending_inv_0_1*(jac_blending_inv_0_1*tmp_moved_constant_17 + jac_blending_inv_1_1*tmp_moved_constant_20) + jac_blending_inv_0_1*(tmp_qloop_106*tmp_qloop_58 + tmp_qloop_109*tmp_qloop_59) + jac_blending_inv_1_0*(jac_blending_inv_0_0*tmp_moved_constant_20 + jac_blending_inv_1_0*tmp_moved_constant_22) + jac_blending_inv_1_0*(tmp_qloop_106*tmp_qloop_62 + tmp_qloop_109*tmp_qloop_63) + jac_blending_inv_1_1*(jac_blending_inv_0_1*tmp_moved_constant_20 + jac_blending_inv_1_1*tmp_moved_constant_22) + jac_blending_inv_1_1*(tmp_qloop_106*tmp_qloop_66 + tmp_qloop_109*tmp_qloop_67); + const real_t tmp_qloop_122 = jac_blending_inv_0_0*(jac_blending_inv_0_0*tmp_moved_constant_25 + jac_blending_inv_1_0*tmp_moved_constant_27) + jac_blending_inv_0_0*(tmp_qloop_120*tmp_qloop_53 + tmp_qloop_121*tmp_qloop_55) + jac_blending_inv_0_1*(jac_blending_inv_0_1*tmp_moved_constant_25 + jac_blending_inv_1_1*tmp_moved_constant_27) + jac_blending_inv_0_1*(tmp_qloop_120*tmp_qloop_58 + tmp_qloop_121*tmp_qloop_59) + jac_blending_inv_1_0*(jac_blending_inv_0_0*tmp_moved_constant_28 + jac_blending_inv_1_0*tmp_moved_constant_30) + jac_blending_inv_1_0*(tmp_qloop_120*tmp_qloop_62 + tmp_qloop_121*tmp_qloop_63) + jac_blending_inv_1_1*(jac_blending_inv_0_1*tmp_moved_constant_28 + jac_blending_inv_1_1*tmp_moved_constant_30) + jac_blending_inv_1_1*(tmp_qloop_120*tmp_qloop_66 + tmp_qloop_121*tmp_qloop_67); + const real_t tmp_qloop_132 = jac_blending_inv_0_0*(jac_blending_inv_0_0*tmp_moved_constant_32 + jac_blending_inv_1_0*tmp_moved_constant_34) + jac_blending_inv_0_0*(tmp_qloop_130*tmp_qloop_53 + tmp_qloop_131*tmp_qloop_55) + jac_blending_inv_0_1*(jac_blending_inv_0_1*tmp_moved_constant_32 + jac_blending_inv_1_1*tmp_moved_constant_34) + jac_blending_inv_0_1*(tmp_qloop_130*tmp_qloop_58 + tmp_qloop_131*tmp_qloop_59) + jac_blending_inv_1_0*(jac_blending_inv_0_0*tmp_moved_constant_35 + jac_blending_inv_1_0*tmp_moved_constant_36) + jac_blending_inv_1_0*(tmp_qloop_130*tmp_qloop_62 + tmp_qloop_131*tmp_qloop_63) + jac_blending_inv_1_1*(jac_blending_inv_0_1*tmp_moved_constant_35 + jac_blending_inv_1_1*tmp_moved_constant_36) + jac_blending_inv_1_1*(tmp_qloop_130*tmp_qloop_66 + tmp_qloop_131*tmp_qloop_67); + const real_t q_tmp_0_0 = tmp_qloop_68*tmp_qloop_82; + const real_t q_tmp_0_1 = tmp_qloop_82*tmp_qloop_89; + const real_t q_tmp_0_2 = tmp_qloop_82*tmp_qloop_96; + const real_t q_tmp_0_3 = tmp_qloop_110*tmp_qloop_82; + const real_t q_tmp_0_4 = tmp_qloop_122*tmp_qloop_82; + const real_t q_tmp_0_5 = tmp_qloop_132*tmp_qloop_82; + const real_t q_tmp_1_0 = tmp_qloop_133*tmp_qloop_68; + const real_t q_tmp_1_1 = tmp_qloop_133*tmp_qloop_89; + const real_t q_tmp_1_2 = tmp_qloop_133*tmp_qloop_96; + const real_t q_tmp_1_3 = tmp_qloop_110*tmp_qloop_133; + const real_t q_tmp_1_4 = tmp_qloop_122*tmp_qloop_133; + const real_t q_tmp_1_5 = tmp_qloop_132*tmp_qloop_133; + const real_t q_tmp_2_0 = tmp_qloop_134*tmp_qloop_68; + const real_t q_tmp_2_1 = tmp_qloop_134*tmp_qloop_89; + const real_t q_tmp_2_2 = tmp_qloop_134*tmp_qloop_96; + const real_t q_tmp_2_3 = tmp_qloop_110*tmp_qloop_134; + const real_t q_tmp_2_4 = tmp_qloop_122*tmp_qloop_134; + const real_t q_tmp_2_5 = tmp_qloop_132*tmp_qloop_134; + const real_t q_tmp_3_0 = tmp_qloop_135*tmp_qloop_68; + const real_t q_tmp_3_1 = tmp_qloop_135*tmp_qloop_89; + const real_t q_tmp_3_2 = tmp_qloop_135*tmp_qloop_96; + const real_t q_tmp_3_3 = tmp_qloop_110*tmp_qloop_135; + const real_t q_tmp_3_4 = tmp_qloop_122*tmp_qloop_135; + const real_t q_tmp_3_5 = tmp_qloop_132*tmp_qloop_135; + const real_t q_tmp_4_0 = tmp_qloop_136*tmp_qloop_68; + const real_t q_tmp_4_1 = tmp_qloop_136*tmp_qloop_89; + const real_t q_tmp_4_2 = tmp_qloop_136*tmp_qloop_96; + const real_t q_tmp_4_3 = tmp_qloop_110*tmp_qloop_136; + const real_t q_tmp_4_4 = tmp_qloop_122*tmp_qloop_136; + const real_t q_tmp_4_5 = tmp_qloop_132*tmp_qloop_136; + const real_t q_tmp_5_0 = tmp_qloop_137*tmp_qloop_68; + const real_t q_tmp_5_1 = tmp_qloop_137*tmp_qloop_89; + const real_t q_tmp_5_2 = tmp_qloop_137*tmp_qloop_96; + const real_t q_tmp_5_3 = tmp_qloop_110*tmp_qloop_137; + const real_t q_tmp_5_4 = tmp_qloop_122*tmp_qloop_137; + const real_t q_tmp_5_5 = tmp_qloop_132*tmp_qloop_137; + q_acc_0_0 = q_acc_0_0 + q_tmp_0_0; + q_acc_0_1 = q_acc_0_1 + q_tmp_0_1; + q_acc_0_2 = q_acc_0_2 + q_tmp_0_2; + q_acc_0_3 = q_acc_0_3 + q_tmp_0_3; + q_acc_0_4 = q_acc_0_4 + q_tmp_0_4; + q_acc_0_5 = q_acc_0_5 + q_tmp_0_5; + q_acc_1_0 = q_acc_1_0 + q_tmp_1_0; + q_acc_1_1 = q_acc_1_1 + q_tmp_1_1; + q_acc_1_2 = q_acc_1_2 + q_tmp_1_2; + q_acc_1_3 = q_acc_1_3 + q_tmp_1_3; + q_acc_1_4 = q_acc_1_4 + q_tmp_1_4; + q_acc_1_5 = q_acc_1_5 + q_tmp_1_5; + q_acc_2_0 = q_acc_2_0 + q_tmp_2_0; + q_acc_2_1 = q_acc_2_1 + q_tmp_2_1; + q_acc_2_2 = q_acc_2_2 + q_tmp_2_2; + q_acc_2_3 = q_acc_2_3 + q_tmp_2_3; + q_acc_2_4 = q_acc_2_4 + q_tmp_2_4; + q_acc_2_5 = q_acc_2_5 + q_tmp_2_5; + q_acc_3_0 = q_acc_3_0 + q_tmp_3_0; + q_acc_3_1 = q_acc_3_1 + q_tmp_3_1; + q_acc_3_2 = q_acc_3_2 + q_tmp_3_2; + q_acc_3_3 = q_acc_3_3 + q_tmp_3_3; + q_acc_3_4 = q_acc_3_4 + q_tmp_3_4; + q_acc_3_5 = q_acc_3_5 + q_tmp_3_5; + q_acc_4_0 = q_acc_4_0 + q_tmp_4_0; + q_acc_4_1 = q_acc_4_1 + q_tmp_4_1; + q_acc_4_2 = q_acc_4_2 + q_tmp_4_2; + q_acc_4_3 = q_acc_4_3 + q_tmp_4_3; + q_acc_4_4 = q_acc_4_4 + q_tmp_4_4; + q_acc_4_5 = q_acc_4_5 + q_tmp_4_5; + q_acc_5_0 = q_acc_5_0 + q_tmp_5_0; + q_acc_5_1 = q_acc_5_1 + q_tmp_5_1; + q_acc_5_2 = q_acc_5_2 + q_tmp_5_2; + q_acc_5_3 = q_acc_5_3 + q_tmp_5_3; + q_acc_5_4 = q_acc_5_4 + q_tmp_5_4; + q_acc_5_5 = q_acc_5_5 + q_tmp_5_5; + } + const real_t elMatVec_0 = q_acc_0_0*src_dof_0 + q_acc_0_1*src_dof_1 + q_acc_0_2*src_dof_2 + q_acc_0_3*src_dof_3 + q_acc_0_4*src_dof_4 + q_acc_0_5*src_dof_5; + const real_t elMatVec_1 = q_acc_1_0*src_dof_0 + q_acc_1_1*src_dof_1 + q_acc_1_2*src_dof_2 + q_acc_1_3*src_dof_3 + q_acc_1_4*src_dof_4 + q_acc_1_5*src_dof_5; + const real_t elMatVec_2 = q_acc_2_0*src_dof_0 + q_acc_2_1*src_dof_1 + q_acc_2_2*src_dof_2 + q_acc_2_3*src_dof_3 + q_acc_2_4*src_dof_4 + q_acc_2_5*src_dof_5; + const real_t elMatVec_3 = q_acc_3_0*src_dof_0 + q_acc_3_1*src_dof_1 + q_acc_3_2*src_dof_2 + q_acc_3_3*src_dof_3 + q_acc_3_4*src_dof_4 + q_acc_3_5*src_dof_5; + const real_t elMatVec_4 = q_acc_4_0*src_dof_0 + q_acc_4_1*src_dof_1 + q_acc_4_2*src_dof_2 + q_acc_4_3*src_dof_3 + q_acc_4_4*src_dof_4 + q_acc_4_5*src_dof_5; + const real_t elMatVec_5 = q_acc_5_0*src_dof_0 + q_acc_5_1*src_dof_1 + q_acc_5_2*src_dof_2 + q_acc_5_3*src_dof_3 + q_acc_5_4*src_dof_4 + q_acc_5_5*src_dof_5; + _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1] = elMatVec_0 + _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatVec_1 + _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1] = elMatVec_2 + _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]; + _data_dstEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatVec_3 + _data_dstEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1] = elMatVec_4 + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]; + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))] = elMatVec_5 + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + } + } + } +} +} // namespace operatorgeneration + +} // namespace hyteg diff --git a/operators/supg_diffusion/noarch/P2ElementwiseSupgDiffusionAnnulusMap_computeInverseDiagonalOperatorValues_P2ElementwiseSupgDiffusionAnnulusMap_macro_2D.cpp b/operators/supg_diffusion/noarch/P2ElementwiseSupgDiffusionAnnulusMap_computeInverseDiagonalOperatorValues_P2ElementwiseSupgDiffusionAnnulusMap_macro_2D.cpp new file mode 100644 index 0000000000000000000000000000000000000000..745bff4f979a187f635bd208043455623a25a13a --- /dev/null +++ b/operators/supg_diffusion/noarch/P2ElementwiseSupgDiffusionAnnulusMap_computeInverseDiagonalOperatorValues_P2ElementwiseSupgDiffusionAnnulusMap_macro_2D.cpp @@ -0,0 +1,551 @@ +/* +* Copyright (c) 2017-2024 Nils Kohl, Daniel Bauer, Fabian Böhm. +* +* This file is part of HyTeG +* (see https://i10git.cs.fau.de/hyteg/hyteg). +* +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. +* +* You should have received a copy of the GNU General Public License +* along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +/* +* The entire file was generated with the HyTeG Operator Generator. +* +* Avoid modifying this file. If buggy, consider fixing the generator itself. +*/ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +#include "../P2ElementwiseSupgDiffusionAnnulusMap.hpp" + +#define FUNC_PREFIX + +namespace hyteg { + +namespace operatorgeneration { + +void P2ElementwiseSupgDiffusionAnnulusMap::computeInverseDiagonalOperatorValues_P2ElementwiseSupgDiffusionAnnulusMap_macro_2D( real_t * RESTRICT _data_diffusivity_times_deltaEdge, real_t * RESTRICT _data_diffusivity_times_deltaVertex, real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_wxEdge, real_t * RESTRICT _data_wxVertex, real_t * RESTRICT _data_wyEdge, real_t * RESTRICT _data_wyVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const +{ + { + const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669}; + + const real_t _data_q_p_0 [] = {0.33333333333333331, 0.20000000000000001, 0.59999999999999998, 0.20000000000000001}; + + const real_t _data_q_p_1 [] = {0.33333333333333331, 0.59999999999999998, 0.20000000000000001, 0.20000000000000001}; + + const real_t tmp_coords_jac_0_GRAY = 1.0 / (micro_edges_per_macro_edge_float)*1.0; + const real_t p_affine_const_0_0_GRAY = macro_vertex_coord_id_0comp0; + const real_t p_affine_const_0_1_GRAY = macro_vertex_coord_id_0comp1; + const real_t p_affine_const_1_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0); + const real_t p_affine_const_1_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1); + const real_t p_affine_const_2_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0); + const real_t p_affine_const_2_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1); + const real_t jac_affine_0_0_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_1_0_GRAY; + const real_t jac_affine_0_1_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_2_0_GRAY; + const real_t jac_affine_1_0_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_1_1_GRAY; + const real_t jac_affine_1_1_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_2_1_GRAY; + const real_t tmp_coords_jac_1_GRAY = jac_affine_0_0_GRAY*jac_affine_1_1_GRAY - jac_affine_0_1_GRAY*jac_affine_1_0_GRAY; + const real_t tmp_coords_jac_2_GRAY = 1.0 / (tmp_coords_jac_1_GRAY); + const real_t jac_affine_inv_0_0_GRAY = jac_affine_1_1_GRAY*tmp_coords_jac_2_GRAY; + const real_t jac_affine_inv_0_1_GRAY = -jac_affine_0_1_GRAY*tmp_coords_jac_2_GRAY; + const real_t jac_affine_inv_1_0_GRAY = -jac_affine_1_0_GRAY*tmp_coords_jac_2_GRAY; + const real_t jac_affine_inv_1_1_GRAY = jac_affine_0_0_GRAY*tmp_coords_jac_2_GRAY; + const real_t abs_det_jac_affine_GRAY = abs(tmp_coords_jac_1_GRAY); + const real_t tmp_qloop_1 = -rayVertex_1 + thrVertex_1; + const real_t tmp_qloop_7 = rayVertex_0 - thrVertex_0; + const real_t tmp_qloop_8 = -tmp_qloop_7; + const real_t tmp_qloop_9 = 1.0 / (tmp_qloop_1*(-rayVertex_0 + refVertex_0) - tmp_qloop_8*(-rayVertex_1 + refVertex_1)); + const real_t tmp_qloop_10 = -radRayVertex + radRefVertex; + const real_t tmp_qloop_11 = tmp_qloop_10*tmp_qloop_9; + const real_t tmp_qloop_12 = tmp_qloop_11*1.0; + const real_t tmp_qloop_54 = jac_affine_inv_0_0_GRAY*4.0; + const real_t tmp_qloop_55 = jac_affine_inv_1_0_GRAY*4.0; + const real_t tmp_qloop_56 = tmp_qloop_54 + tmp_qloop_55; + const real_t tmp_qloop_57 = jac_affine_inv_0_0_GRAY*tmp_qloop_56 + jac_affine_inv_1_0_GRAY*tmp_qloop_56; + const real_t tmp_qloop_58 = jac_affine_inv_0_1_GRAY*4.0; + const real_t tmp_qloop_59 = jac_affine_inv_1_1_GRAY*4.0; + const real_t tmp_qloop_60 = tmp_qloop_58 + tmp_qloop_59; + const real_t tmp_qloop_61 = jac_affine_inv_0_0_GRAY*tmp_qloop_60 + jac_affine_inv_1_0_GRAY*tmp_qloop_60; + const real_t tmp_qloop_62 = jac_affine_inv_0_1_GRAY*tmp_qloop_56 + jac_affine_inv_1_1_GRAY*tmp_qloop_56; + const real_t tmp_qloop_63 = jac_affine_inv_0_1_GRAY*tmp_qloop_60 + jac_affine_inv_1_1_GRAY*tmp_qloop_60; + const real_t tmp_qloop_84 = jac_affine_inv_0_1_GRAY*tmp_qloop_54; + const real_t tmp_qloop_85 = (jac_affine_inv_0_0_GRAY*jac_affine_inv_0_0_GRAY)*4.0; + const real_t tmp_qloop_86 = (jac_affine_inv_0_1_GRAY*jac_affine_inv_0_1_GRAY)*4.0; + const real_t tmp_qloop_90 = jac_affine_inv_1_1_GRAY*tmp_qloop_55; + const real_t tmp_qloop_91 = (jac_affine_inv_1_0_GRAY*jac_affine_inv_1_0_GRAY)*4.0; + const real_t tmp_qloop_92 = (jac_affine_inv_1_1_GRAY*jac_affine_inv_1_1_GRAY)*4.0; + const real_t tmp_qloop_99 = jac_affine_inv_1_0_GRAY*8.0; + const real_t tmp_qloop_100 = jac_affine_inv_0_0_GRAY*tmp_qloop_99; + const real_t tmp_qloop_101 = jac_affine_inv_1_1_GRAY*tmp_qloop_54; + const real_t tmp_qloop_102 = jac_affine_inv_0_1_GRAY*tmp_qloop_55; + const real_t tmp_qloop_103 = tmp_qloop_101 + tmp_qloop_102; + const real_t tmp_qloop_104 = jac_affine_inv_1_1_GRAY*8.0; + const real_t tmp_qloop_105 = jac_affine_inv_0_1_GRAY*tmp_qloop_104; + const real_t tmp_qloop_109 = jac_affine_inv_1_0_GRAY*tmp_qloop_54; + const real_t tmp_qloop_110 = -tmp_qloop_54 - tmp_qloop_99; + const real_t tmp_qloop_111 = jac_affine_inv_1_0_GRAY*tmp_qloop_110 - tmp_qloop_109; + const real_t tmp_qloop_112 = -tmp_qloop_104 - tmp_qloop_58; + const real_t tmp_qloop_113 = jac_affine_inv_1_0_GRAY*tmp_qloop_112 - tmp_qloop_101; + const real_t tmp_qloop_114 = jac_affine_inv_1_1_GRAY*tmp_qloop_110 - tmp_qloop_102; + const real_t tmp_qloop_115 = jac_affine_inv_1_1_GRAY*tmp_qloop_58; + const real_t tmp_qloop_116 = jac_affine_inv_1_1_GRAY*tmp_qloop_112 - tmp_qloop_115; + const real_t tmp_qloop_120 = jac_affine_inv_0_0_GRAY*-8.0 - tmp_qloop_55; + const real_t tmp_qloop_121 = jac_affine_inv_0_0_GRAY*tmp_qloop_120 - tmp_qloop_109; + const real_t tmp_qloop_122 = jac_affine_inv_0_1_GRAY*-8.0 - tmp_qloop_59; + const real_t tmp_qloop_123 = jac_affine_inv_0_0_GRAY*tmp_qloop_122 - tmp_qloop_102; + const real_t tmp_qloop_124 = jac_affine_inv_0_1_GRAY*tmp_qloop_120 - tmp_qloop_101; + const real_t tmp_qloop_125 = jac_affine_inv_0_1_GRAY*tmp_qloop_122 - tmp_qloop_115; + { + /* FaceType.GRAY */ + for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1) + for (int64_t ctr_0 = 0; ctr_0 < -ctr_1 + micro_edges_per_macro_edge; ctr_0 += 1) + { + + const int64_t phantom_ctr_0 = ctr_0; + real_t _data_float_loop_ctr_array_dim_0[4]; + _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0; + _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1; + _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2; + _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3; + real_t _data_float_loop_ctr_array_dim_1[4]; + _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1; + + const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; + const real_t diffusivity_times_delta_dof_0 = _data_diffusivity_times_deltaVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]; + const real_t diffusivity_times_delta_dof_1 = _data_diffusivity_times_deltaVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + const real_t diffusivity_times_delta_dof_2 = _data_diffusivity_times_deltaVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t diffusivity_times_delta_dof_3 = _data_diffusivity_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t diffusivity_times_delta_dof_4 = _data_diffusivity_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t diffusivity_times_delta_dof_5 = _data_diffusivity_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]; + const real_t wx_dof_0 = _data_wxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]; + const real_t wx_dof_1 = _data_wxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + const real_t wx_dof_2 = _data_wxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t wx_dof_3 = _data_wxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t wx_dof_4 = _data_wxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t wx_dof_5 = _data_wxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]; + const real_t wy_dof_0 = _data_wyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]; + const real_t wy_dof_1 = _data_wyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + const real_t wy_dof_2 = _data_wyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t wy_dof_3 = _data_wyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t wy_dof_4 = _data_wyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t wy_dof_5 = _data_wyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]; + real_t q_acc_0_0 = 0.0; + real_t q_acc_1_1 = 0.0; + real_t q_acc_2_2 = 0.0; + real_t q_acc_3_3 = 0.0; + real_t q_acc_4_4 = 0.0; + real_t q_acc_5_5 = 0.0; + for (int64_t q = 0; q < 4; q += 1) + { + const real_t tmp_qloop_0 = p_affine_0_0 + (-p_affine_0_0 + p_affine_1_0)*_data_q_p_0[q] + (-p_affine_0_0 + p_affine_2_0)*_data_q_p_1[q]; + const real_t tmp_qloop_2 = (tmp_qloop_0*tmp_qloop_0); + const real_t tmp_qloop_3 = p_affine_0_1 + (-p_affine_0_1 + p_affine_1_1)*_data_q_p_0[q] + (-p_affine_0_1 + p_affine_2_1)*_data_q_p_1[q]; + const real_t tmp_qloop_4 = (tmp_qloop_3*tmp_qloop_3); + const real_t tmp_qloop_5 = tmp_qloop_2 + tmp_qloop_4; + const real_t tmp_qloop_6 = pow(tmp_qloop_5, -0.50000000000000000); + const real_t tmp_qloop_13 = tmp_qloop_12*tmp_qloop_6; + const real_t tmp_qloop_14 = tmp_qloop_1*tmp_qloop_13; + const real_t tmp_qloop_15 = pow(tmp_qloop_5, -1.5000000000000000); + const real_t tmp_qloop_16 = radRayVertex + tmp_qloop_11*(tmp_qloop_1*(-rayVertex_0 + tmp_qloop_0) - tmp_qloop_8*(-rayVertex_1 + tmp_qloop_3)); + const real_t tmp_qloop_17 = tmp_qloop_15*tmp_qloop_16; + const real_t tmp_qloop_18 = tmp_qloop_17*1.0; + const real_t tmp_qloop_19 = tmp_qloop_13*tmp_qloop_8; + const real_t tmp_qloop_20 = tmp_qloop_0*tmp_qloop_18; + const real_t tmp_qloop_23 = tmp_qloop_12*tmp_qloop_15; + const real_t tmp_qloop_24 = tmp_qloop_1*tmp_qloop_23; + const real_t tmp_qloop_25 = tmp_qloop_2*tmp_qloop_24; + const real_t tmp_qloop_26 = tmp_qloop_16*pow(tmp_qloop_5, -2.5000000000000000)*3.0; + const real_t tmp_qloop_27 = tmp_qloop_0*tmp_qloop_26*tmp_qloop_4; + const real_t tmp_qloop_28 = tmp_qloop_24*tmp_qloop_4 - tmp_qloop_27; + const real_t tmp_qloop_29 = tmp_qloop_23*tmp_qloop_7; + const real_t tmp_qloop_30 = tmp_qloop_0*tmp_qloop_3; + const real_t tmp_qloop_31 = tmp_qloop_24*tmp_qloop_30; + const real_t tmp_qloop_32 = tmp_qloop_17*2.0; + const real_t tmp_qloop_33 = tmp_qloop_2*tmp_qloop_26*tmp_qloop_3; + const real_t tmp_qloop_34 = tmp_qloop_18*tmp_qloop_3 - tmp_qloop_33; + const real_t tmp_qloop_35 = tmp_qloop_29*tmp_qloop_30; + const real_t tmp_qloop_36 = tmp_qloop_23*tmp_qloop_30*tmp_qloop_8; + const real_t tmp_qloop_37 = 4.0*_data_q_p_0[q]; + const real_t tmp_qloop_38 = 4.0*_data_q_p_1[q]; + const real_t tmp_qloop_39 = tmp_qloop_37 + tmp_qloop_38 - 3.0; + const real_t tmp_qloop_40 = jac_affine_inv_0_0_GRAY*tmp_qloop_39 + jac_affine_inv_1_0_GRAY*tmp_qloop_39; + const real_t tmp_qloop_41 = jac_affine_inv_0_1_GRAY*tmp_qloop_39 + jac_affine_inv_1_1_GRAY*tmp_qloop_39; + const real_t tmp_qloop_42 = tmp_qloop_37*_data_q_p_1[q]; + const real_t tmp_qloop_43 = (_data_q_p_0[q]*_data_q_p_0[q]); + const real_t tmp_qloop_44 = tmp_qloop_43*2.0; + const real_t tmp_qloop_45 = tmp_qloop_44 - _data_q_p_0[q]; + const real_t tmp_qloop_46 = (_data_q_p_1[q]*_data_q_p_1[q]); + const real_t tmp_qloop_47 = tmp_qloop_46*2.0; + const real_t tmp_qloop_48 = tmp_qloop_47 - _data_q_p_1[q]; + const real_t tmp_qloop_49 = tmp_qloop_38 - tmp_qloop_42 + tmp_qloop_46*-4.0; + const real_t tmp_qloop_50 = tmp_qloop_37 - tmp_qloop_42 + tmp_qloop_43*-4.0; + const real_t tmp_qloop_51 = tmp_qloop_42 + tmp_qloop_44 + tmp_qloop_47 - 3.0*_data_q_p_0[q] - 3.0*_data_q_p_1[q] + 1.0; + const real_t tmp_qloop_52 = tmp_qloop_42*wx_dof_3 + tmp_qloop_45*wx_dof_1 + tmp_qloop_48*wx_dof_2 + tmp_qloop_49*wx_dof_4 + tmp_qloop_50*wx_dof_5 + tmp_qloop_51*wx_dof_0; + const real_t tmp_qloop_53 = tmp_qloop_42*wy_dof_3 + tmp_qloop_45*wy_dof_1 + tmp_qloop_48*wy_dof_2 + tmp_qloop_49*wy_dof_4 + tmp_qloop_50*wy_dof_5 + tmp_qloop_51*wy_dof_0; + const real_t tmp_qloop_81 = tmp_qloop_37 - 1.0; + const real_t tmp_qloop_82 = jac_affine_inv_0_0_GRAY*tmp_qloop_81; + const real_t tmp_qloop_83 = jac_affine_inv_0_1_GRAY*tmp_qloop_81; + const real_t tmp_qloop_87 = tmp_qloop_38 - 1.0; + const real_t tmp_qloop_88 = jac_affine_inv_1_0_GRAY*tmp_qloop_87; + const real_t tmp_qloop_89 = jac_affine_inv_1_1_GRAY*tmp_qloop_87; + const real_t tmp_qloop_93 = jac_affine_inv_1_0_GRAY*tmp_qloop_37; + const real_t tmp_qloop_94 = jac_affine_inv_0_0_GRAY*tmp_qloop_38; + const real_t tmp_qloop_95 = tmp_qloop_93 + tmp_qloop_94; + const real_t tmp_qloop_96 = jac_affine_inv_1_1_GRAY*tmp_qloop_37; + const real_t tmp_qloop_97 = jac_affine_inv_0_1_GRAY*tmp_qloop_38; + const real_t tmp_qloop_98 = tmp_qloop_96 + tmp_qloop_97; + const real_t tmp_qloop_106 = -tmp_qloop_37 - 8.0*_data_q_p_1[q] + 4.0; + const real_t tmp_qloop_107 = jac_affine_inv_1_0_GRAY*tmp_qloop_106 - tmp_qloop_94; + const real_t tmp_qloop_108 = jac_affine_inv_1_1_GRAY*tmp_qloop_106 - tmp_qloop_97; + const real_t tmp_qloop_117 = -tmp_qloop_38 - 8.0*_data_q_p_0[q] + 4.0; + const real_t tmp_qloop_118 = jac_affine_inv_0_0_GRAY*tmp_qloop_117 - tmp_qloop_93; + const real_t tmp_qloop_119 = jac_affine_inv_0_1_GRAY*tmp_qloop_117 - tmp_qloop_96; + const real_t jac_blending_0_0 = tmp_qloop_0*tmp_qloop_14 + tmp_qloop_18*tmp_qloop_4; + const real_t jac_blending_0_1 = -tmp_qloop_0*tmp_qloop_15*tmp_qloop_16*tmp_qloop_3 - tmp_qloop_0*tmp_qloop_19; + const real_t jac_blending_1_0 = tmp_qloop_14*tmp_qloop_3 - tmp_qloop_20*tmp_qloop_3; + const real_t jac_blending_1_1 = tmp_qloop_15*tmp_qloop_16*tmp_qloop_2*1.0 - tmp_qloop_19*tmp_qloop_3; + const real_t tmp_qloop_21 = jac_blending_0_0*jac_blending_1_1 - jac_blending_0_1*jac_blending_1_0; + const real_t tmp_qloop_22 = 1.0 / (tmp_qloop_21); + const real_t abs_det_jac_blending = tmp_qloop_21; + const real_t tmp_qloop_80 = abs_det_jac_affine_GRAY*abs_det_jac_blending*(diffusivity_times_delta_dof_0*tmp_qloop_51 + diffusivity_times_delta_dof_1*tmp_qloop_45 + diffusivity_times_delta_dof_2*tmp_qloop_48 + diffusivity_times_delta_dof_3*tmp_qloop_42 + diffusivity_times_delta_dof_4*tmp_qloop_49 + diffusivity_times_delta_dof_5*tmp_qloop_50)*_data_q_w[q]; + const real_t jac_blending_inv_0_0 = jac_blending_1_1*tmp_qloop_22; + const real_t jac_blending_inv_0_1 = -jac_blending_0_1*tmp_qloop_22; + const real_t jac_blending_inv_1_0 = -jac_blending_1_0*tmp_qloop_22; + const real_t jac_blending_inv_1_1 = jac_blending_0_0*tmp_qloop_22; + const real_t hessian_blending_0_0_0 = tmp_qloop_14 - tmp_qloop_25 + tmp_qloop_28; + const real_t hessian_blending_1_0_0 = -tmp_qloop_26*(tmp_qloop_3*tmp_qloop_3*tmp_qloop_3) + tmp_qloop_29*tmp_qloop_4 + tmp_qloop_3*tmp_qloop_32 - tmp_qloop_31; + const real_t hessian_blending_0_0_1 = tmp_qloop_1*tmp_qloop_11*tmp_qloop_15*tmp_qloop_30*-2.0 - tmp_qloop_34; + const real_t hessian_blending_1_0_1 = tmp_qloop_1*tmp_qloop_10*tmp_qloop_6*tmp_qloop_9*1.0 - tmp_qloop_20 - tmp_qloop_28 - tmp_qloop_35; + const real_t hessian_blending_0_1_0 = tmp_qloop_10*tmp_qloop_15*tmp_qloop_2*tmp_qloop_8*tmp_qloop_9*1.0 - tmp_qloop_19 - tmp_qloop_31 - tmp_qloop_34; + const real_t tmp_qloop_64 = -hessian_blending_0_0_0*jac_blending_inv_0_0 - hessian_blending_0_1_0*jac_blending_inv_1_0; + const real_t tmp_qloop_72 = -hessian_blending_0_0_0*jac_blending_inv_0_1 - hessian_blending_0_1_0*jac_blending_inv_1_1; + const real_t hessian_blending_1_1_0 = -tmp_qloop_20 + tmp_qloop_27 - tmp_qloop_35 + tmp_qloop_36; + const real_t tmp_qloop_68 = -hessian_blending_1_0_0*jac_blending_inv_0_0 - hessian_blending_1_1_0*jac_blending_inv_1_0; + const real_t tmp_qloop_76 = -hessian_blending_1_0_0*jac_blending_inv_0_1 - hessian_blending_1_1_0*jac_blending_inv_1_1; + const real_t hessian_blending_0_1_1 = tmp_qloop_0*tmp_qloop_32 - (tmp_qloop_0*tmp_qloop_0*tmp_qloop_0)*tmp_qloop_26 + tmp_qloop_25 + tmp_qloop_36; + const real_t tmp_qloop_65 = -hessian_blending_0_0_1*jac_blending_inv_0_0 - hessian_blending_0_1_1*jac_blending_inv_1_0; + const real_t tmp_qloop_66 = jac_blending_inv_0_0*tmp_qloop_64 + jac_blending_inv_0_1*tmp_qloop_65; + const real_t tmp_qloop_67 = jac_blending_inv_1_0*tmp_qloop_64 + jac_blending_inv_1_1*tmp_qloop_65; + const real_t tmp_qloop_73 = -hessian_blending_0_0_1*jac_blending_inv_0_1 - hessian_blending_0_1_1*jac_blending_inv_1_1; + const real_t tmp_qloop_74 = jac_blending_inv_0_0*tmp_qloop_72 + jac_blending_inv_0_1*tmp_qloop_73; + const real_t tmp_qloop_75 = jac_blending_inv_1_0*tmp_qloop_72 + jac_blending_inv_1_1*tmp_qloop_73; + const real_t hessian_blending_1_1_1 = tmp_qloop_10*tmp_qloop_15*tmp_qloop_2*tmp_qloop_7*tmp_qloop_9*1.0 + tmp_qloop_10*tmp_qloop_15*tmp_qloop_4*tmp_qloop_8*tmp_qloop_9*1.0 - tmp_qloop_19 - tmp_qloop_33; + const real_t tmp_qloop_69 = -hessian_blending_1_0_1*jac_blending_inv_0_0 - hessian_blending_1_1_1*jac_blending_inv_1_0; + const real_t tmp_qloop_70 = jac_blending_inv_0_0*tmp_qloop_68 + jac_blending_inv_0_1*tmp_qloop_69; + const real_t tmp_qloop_71 = jac_blending_inv_1_0*tmp_qloop_68 + jac_blending_inv_1_1*tmp_qloop_69; + const real_t tmp_qloop_77 = -hessian_blending_1_0_1*jac_blending_inv_0_1 - hessian_blending_1_1_1*jac_blending_inv_1_1; + const real_t tmp_qloop_78 = jac_blending_inv_0_0*tmp_qloop_76 + jac_blending_inv_0_1*tmp_qloop_77; + const real_t tmp_qloop_79 = jac_blending_inv_1_0*tmp_qloop_76 + jac_blending_inv_1_1*tmp_qloop_77; + const real_t q_tmp_0_0 = tmp_qloop_80*(tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_40 + jac_blending_inv_1_0*tmp_qloop_41) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_40 + jac_blending_inv_1_1*tmp_qloop_41))*(jac_blending_inv_0_0*(jac_blending_inv_0_0*tmp_qloop_57 + jac_blending_inv_1_0*tmp_qloop_61) + jac_blending_inv_0_0*(tmp_qloop_40*tmp_qloop_66 + tmp_qloop_41*tmp_qloop_67) + jac_blending_inv_0_1*(jac_blending_inv_0_1*tmp_qloop_57 + jac_blending_inv_1_1*tmp_qloop_61) + jac_blending_inv_0_1*(tmp_qloop_40*tmp_qloop_70 + tmp_qloop_41*tmp_qloop_71) + jac_blending_inv_1_0*(jac_blending_inv_0_0*tmp_qloop_62 + jac_blending_inv_1_0*tmp_qloop_63) + jac_blending_inv_1_0*(tmp_qloop_40*tmp_qloop_74 + tmp_qloop_41*tmp_qloop_75) + jac_blending_inv_1_1*(jac_blending_inv_0_1*tmp_qloop_62 + jac_blending_inv_1_1*tmp_qloop_63) + jac_blending_inv_1_1*(tmp_qloop_40*tmp_qloop_78 + tmp_qloop_41*tmp_qloop_79)); + const real_t q_tmp_1_1 = tmp_qloop_80*(tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_82 + jac_blending_inv_1_0*tmp_qloop_83) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_82 + jac_blending_inv_1_1*tmp_qloop_83))*(jac_blending_inv_0_0*(jac_blending_inv_0_0*tmp_qloop_85 + jac_blending_inv_1_0*tmp_qloop_84) + jac_blending_inv_0_0*(tmp_qloop_66*tmp_qloop_82 + tmp_qloop_67*tmp_qloop_83) + jac_blending_inv_0_1*(jac_blending_inv_0_1*tmp_qloop_85 + jac_blending_inv_1_1*tmp_qloop_84) + jac_blending_inv_0_1*(tmp_qloop_70*tmp_qloop_82 + tmp_qloop_71*tmp_qloop_83) + jac_blending_inv_1_0*(jac_blending_inv_0_0*tmp_qloop_84 + jac_blending_inv_1_0*tmp_qloop_86) + jac_blending_inv_1_0*(tmp_qloop_74*tmp_qloop_82 + tmp_qloop_75*tmp_qloop_83) + jac_blending_inv_1_1*(jac_blending_inv_0_1*tmp_qloop_84 + jac_blending_inv_1_1*tmp_qloop_86) + jac_blending_inv_1_1*(tmp_qloop_78*tmp_qloop_82 + tmp_qloop_79*tmp_qloop_83)); + const real_t q_tmp_2_2 = tmp_qloop_80*(tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_88 + jac_blending_inv_1_0*tmp_qloop_89) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_88 + jac_blending_inv_1_1*tmp_qloop_89))*(jac_blending_inv_0_0*(jac_blending_inv_0_0*tmp_qloop_91 + jac_blending_inv_1_0*tmp_qloop_90) + jac_blending_inv_0_0*(tmp_qloop_66*tmp_qloop_88 + tmp_qloop_67*tmp_qloop_89) + jac_blending_inv_0_1*(jac_blending_inv_0_1*tmp_qloop_91 + jac_blending_inv_1_1*tmp_qloop_90) + jac_blending_inv_0_1*(tmp_qloop_70*tmp_qloop_88 + tmp_qloop_71*tmp_qloop_89) + jac_blending_inv_1_0*(jac_blending_inv_0_0*tmp_qloop_90 + jac_blending_inv_1_0*tmp_qloop_92) + jac_blending_inv_1_0*(tmp_qloop_74*tmp_qloop_88 + tmp_qloop_75*tmp_qloop_89) + jac_blending_inv_1_1*(jac_blending_inv_0_1*tmp_qloop_90 + jac_blending_inv_1_1*tmp_qloop_92) + jac_blending_inv_1_1*(tmp_qloop_78*tmp_qloop_88 + tmp_qloop_79*tmp_qloop_89)); + const real_t q_tmp_3_3 = tmp_qloop_80*(tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_95 + jac_blending_inv_1_0*tmp_qloop_98) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_95 + jac_blending_inv_1_1*tmp_qloop_98))*(jac_blending_inv_0_0*(jac_blending_inv_0_0*tmp_qloop_100 + jac_blending_inv_1_0*tmp_qloop_103) + jac_blending_inv_0_0*(tmp_qloop_66*tmp_qloop_95 + tmp_qloop_67*tmp_qloop_98) + jac_blending_inv_0_1*(jac_blending_inv_0_1*tmp_qloop_100 + jac_blending_inv_1_1*tmp_qloop_103) + jac_blending_inv_0_1*(tmp_qloop_70*tmp_qloop_95 + tmp_qloop_71*tmp_qloop_98) + jac_blending_inv_1_0*(jac_blending_inv_0_0*tmp_qloop_103 + jac_blending_inv_1_0*tmp_qloop_105) + jac_blending_inv_1_0*(tmp_qloop_74*tmp_qloop_95 + tmp_qloop_75*tmp_qloop_98) + jac_blending_inv_1_1*(jac_blending_inv_0_1*tmp_qloop_103 + jac_blending_inv_1_1*tmp_qloop_105) + jac_blending_inv_1_1*(tmp_qloop_78*tmp_qloop_95 + tmp_qloop_79*tmp_qloop_98)); + const real_t q_tmp_4_4 = tmp_qloop_80*(tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_107 + jac_blending_inv_1_0*tmp_qloop_108) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_107 + jac_blending_inv_1_1*tmp_qloop_108))*(jac_blending_inv_0_0*(jac_blending_inv_0_0*tmp_qloop_111 + jac_blending_inv_1_0*tmp_qloop_113) + jac_blending_inv_0_0*(tmp_qloop_107*tmp_qloop_66 + tmp_qloop_108*tmp_qloop_67) + jac_blending_inv_0_1*(jac_blending_inv_0_1*tmp_qloop_111 + jac_blending_inv_1_1*tmp_qloop_113) + jac_blending_inv_0_1*(tmp_qloop_107*tmp_qloop_70 + tmp_qloop_108*tmp_qloop_71) + jac_blending_inv_1_0*(jac_blending_inv_0_0*tmp_qloop_114 + jac_blending_inv_1_0*tmp_qloop_116) + jac_blending_inv_1_0*(tmp_qloop_107*tmp_qloop_74 + tmp_qloop_108*tmp_qloop_75) + jac_blending_inv_1_1*(jac_blending_inv_0_1*tmp_qloop_114 + jac_blending_inv_1_1*tmp_qloop_116) + jac_blending_inv_1_1*(tmp_qloop_107*tmp_qloop_78 + tmp_qloop_108*tmp_qloop_79)); + const real_t q_tmp_5_5 = tmp_qloop_80*(tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_118 + jac_blending_inv_1_0*tmp_qloop_119) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_118 + jac_blending_inv_1_1*tmp_qloop_119))*(jac_blending_inv_0_0*(jac_blending_inv_0_0*tmp_qloop_121 + jac_blending_inv_1_0*tmp_qloop_123) + jac_blending_inv_0_0*(tmp_qloop_118*tmp_qloop_66 + tmp_qloop_119*tmp_qloop_67) + jac_blending_inv_0_1*(jac_blending_inv_0_1*tmp_qloop_121 + jac_blending_inv_1_1*tmp_qloop_123) + jac_blending_inv_0_1*(tmp_qloop_118*tmp_qloop_70 + tmp_qloop_119*tmp_qloop_71) + jac_blending_inv_1_0*(jac_blending_inv_0_0*tmp_qloop_124 + jac_blending_inv_1_0*tmp_qloop_125) + jac_blending_inv_1_0*(tmp_qloop_118*tmp_qloop_74 + tmp_qloop_119*tmp_qloop_75) + jac_blending_inv_1_1*(jac_blending_inv_0_1*tmp_qloop_124 + jac_blending_inv_1_1*tmp_qloop_125) + jac_blending_inv_1_1*(tmp_qloop_118*tmp_qloop_78 + tmp_qloop_119*tmp_qloop_79)); + q_acc_0_0 = q_acc_0_0 + q_tmp_0_0; + q_acc_1_1 = q_acc_1_1 + q_tmp_1_1; + q_acc_2_2 = q_acc_2_2 + q_tmp_2_2; + q_acc_3_3 = q_acc_3_3 + q_tmp_3_3; + q_acc_4_4 = q_acc_4_4 + q_tmp_4_4; + q_acc_5_5 = q_acc_5_5 + q_tmp_5_5; + } + const real_t elMatDiag_0 = q_acc_0_0; + const real_t elMatDiag_1 = q_acc_1_1; + const real_t elMatDiag_2 = q_acc_2_2; + const real_t elMatDiag_3 = q_acc_3_3; + const real_t elMatDiag_4 = q_acc_4_4; + const real_t elMatDiag_5 = q_acc_5_5; + _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))] = elMatDiag_0 + _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]; + _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1] = elMatDiag_1 + _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatDiag_2 + _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))] = elMatDiag_3 + _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))] = elMatDiag_4 + _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))] = elMatDiag_5 + _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]; + } + } + const real_t tmp_coords_jac_0_BLUE = 1.0 / (micro_edges_per_macro_edge_float)*1.0; + const real_t tmp_coords_jac_1_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0); + const real_t tmp_coords_jac_2_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1); + const real_t tmp_coords_jac_3_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0); + const real_t tmp_coords_jac_4_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1); + const real_t p_affine_const_0_0_BLUE = tmp_coords_jac_1_BLUE; + const real_t p_affine_const_0_1_BLUE = tmp_coords_jac_2_BLUE; + const real_t p_affine_const_1_0_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_3_BLUE; + const real_t p_affine_const_1_1_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_4_BLUE; + const real_t p_affine_const_2_0_BLUE = tmp_coords_jac_1_BLUE + tmp_coords_jac_3_BLUE; + const real_t p_affine_const_2_1_BLUE = tmp_coords_jac_2_BLUE + tmp_coords_jac_4_BLUE; + const real_t jac_affine_0_0_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_1_0_BLUE; + const real_t jac_affine_0_1_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_2_0_BLUE; + const real_t jac_affine_1_0_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_1_1_BLUE; + const real_t jac_affine_1_1_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_2_1_BLUE; + const real_t tmp_coords_jac_5_BLUE = jac_affine_0_0_BLUE*jac_affine_1_1_BLUE - jac_affine_0_1_BLUE*jac_affine_1_0_BLUE; + const real_t tmp_coords_jac_6_BLUE = 1.0 / (tmp_coords_jac_5_BLUE); + const real_t jac_affine_inv_0_0_BLUE = jac_affine_1_1_BLUE*tmp_coords_jac_6_BLUE; + const real_t jac_affine_inv_0_1_BLUE = -jac_affine_0_1_BLUE*tmp_coords_jac_6_BLUE; + const real_t jac_affine_inv_1_0_BLUE = -jac_affine_1_0_BLUE*tmp_coords_jac_6_BLUE; + const real_t jac_affine_inv_1_1_BLUE = jac_affine_0_0_BLUE*tmp_coords_jac_6_BLUE; + const real_t abs_det_jac_affine_BLUE = abs(tmp_coords_jac_5_BLUE); + const real_t tmp_moved_constant_0 = jac_affine_inv_0_0_BLUE*4.0; + const real_t tmp_moved_constant_1 = jac_affine_inv_1_0_BLUE*4.0; + const real_t tmp_moved_constant_2 = tmp_moved_constant_0 + tmp_moved_constant_1; + const real_t tmp_moved_constant_3 = jac_affine_inv_0_0_BLUE*tmp_moved_constant_2 + jac_affine_inv_1_0_BLUE*tmp_moved_constant_2; + const real_t tmp_moved_constant_4 = jac_affine_inv_0_1_BLUE*4.0; + const real_t tmp_moved_constant_5 = jac_affine_inv_1_1_BLUE*4.0; + const real_t tmp_moved_constant_6 = tmp_moved_constant_4 + tmp_moved_constant_5; + const real_t tmp_moved_constant_7 = jac_affine_inv_0_0_BLUE*tmp_moved_constant_6 + jac_affine_inv_1_0_BLUE*tmp_moved_constant_6; + const real_t tmp_moved_constant_8 = jac_affine_inv_0_1_BLUE*tmp_moved_constant_2 + jac_affine_inv_1_1_BLUE*tmp_moved_constant_2; + const real_t tmp_moved_constant_9 = jac_affine_inv_0_1_BLUE*tmp_moved_constant_6 + jac_affine_inv_1_1_BLUE*tmp_moved_constant_6; + const real_t tmp_moved_constant_10 = jac_affine_inv_0_1_BLUE*tmp_moved_constant_0; + const real_t tmp_moved_constant_11 = (jac_affine_inv_0_0_BLUE*jac_affine_inv_0_0_BLUE)*4.0; + const real_t tmp_moved_constant_12 = (jac_affine_inv_0_1_BLUE*jac_affine_inv_0_1_BLUE)*4.0; + const real_t tmp_moved_constant_13 = jac_affine_inv_1_1_BLUE*tmp_moved_constant_1; + const real_t tmp_moved_constant_14 = (jac_affine_inv_1_0_BLUE*jac_affine_inv_1_0_BLUE)*4.0; + const real_t tmp_moved_constant_15 = (jac_affine_inv_1_1_BLUE*jac_affine_inv_1_1_BLUE)*4.0; + const real_t tmp_moved_constant_16 = jac_affine_inv_1_0_BLUE*8.0; + const real_t tmp_moved_constant_17 = jac_affine_inv_0_0_BLUE*tmp_moved_constant_16; + const real_t tmp_moved_constant_18 = jac_affine_inv_1_1_BLUE*tmp_moved_constant_0; + const real_t tmp_moved_constant_19 = jac_affine_inv_0_1_BLUE*tmp_moved_constant_1; + const real_t tmp_moved_constant_20 = tmp_moved_constant_18 + tmp_moved_constant_19; + const real_t tmp_moved_constant_21 = jac_affine_inv_1_1_BLUE*8.0; + const real_t tmp_moved_constant_22 = jac_affine_inv_0_1_BLUE*tmp_moved_constant_21; + const real_t tmp_moved_constant_23 = jac_affine_inv_1_0_BLUE*tmp_moved_constant_0; + const real_t tmp_moved_constant_24 = -tmp_moved_constant_0 - tmp_moved_constant_16; + const real_t tmp_moved_constant_25 = jac_affine_inv_1_0_BLUE*tmp_moved_constant_24 - tmp_moved_constant_23; + const real_t tmp_moved_constant_26 = -tmp_moved_constant_21 - tmp_moved_constant_4; + const real_t tmp_moved_constant_27 = jac_affine_inv_1_0_BLUE*tmp_moved_constant_26 - tmp_moved_constant_18; + const real_t tmp_moved_constant_28 = jac_affine_inv_1_1_BLUE*tmp_moved_constant_24 - tmp_moved_constant_19; + const real_t tmp_moved_constant_29 = jac_affine_inv_1_1_BLUE*tmp_moved_constant_4; + const real_t tmp_moved_constant_30 = jac_affine_inv_1_1_BLUE*tmp_moved_constant_26 - tmp_moved_constant_29; + const real_t tmp_moved_constant_31 = jac_affine_inv_0_0_BLUE*-8.0 - tmp_moved_constant_1; + const real_t tmp_moved_constant_32 = jac_affine_inv_0_0_BLUE*tmp_moved_constant_31 - tmp_moved_constant_23; + const real_t tmp_moved_constant_33 = jac_affine_inv_0_1_BLUE*-8.0 - tmp_moved_constant_5; + const real_t tmp_moved_constant_34 = jac_affine_inv_0_0_BLUE*tmp_moved_constant_33 - tmp_moved_constant_19; + const real_t tmp_moved_constant_35 = jac_affine_inv_0_1_BLUE*tmp_moved_constant_31 - tmp_moved_constant_18; + const real_t tmp_moved_constant_36 = jac_affine_inv_0_1_BLUE*tmp_moved_constant_33 - tmp_moved_constant_29; + { + /* FaceType.BLUE */ + for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1) + for (int64_t ctr_0 = 0; ctr_0 < -ctr_1 + micro_edges_per_macro_edge - 1; ctr_0 += 1) + { + + const int64_t phantom_ctr_0 = ctr_0; + real_t _data_float_loop_ctr_array_dim_0[4]; + _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0; + _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1; + _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2; + _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3; + real_t _data_float_loop_ctr_array_dim_1[4]; + _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1; + + const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; + const real_t diffusivity_times_delta_dof_0 = _data_diffusivity_times_deltaVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + const real_t diffusivity_times_delta_dof_1 = _data_diffusivity_times_deltaVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t diffusivity_times_delta_dof_2 = _data_diffusivity_times_deltaVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]; + const real_t diffusivity_times_delta_dof_3 = _data_diffusivity_times_deltaEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t diffusivity_times_delta_dof_4 = _data_diffusivity_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]; + const real_t diffusivity_times_delta_dof_5 = _data_diffusivity_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t wx_dof_0 = _data_wxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + const real_t wx_dof_1 = _data_wxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t wx_dof_2 = _data_wxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]; + const real_t wx_dof_3 = _data_wxEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t wx_dof_4 = _data_wxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]; + const real_t wx_dof_5 = _data_wxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t wy_dof_0 = _data_wyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + const real_t wy_dof_1 = _data_wyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t wy_dof_2 = _data_wyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]; + const real_t wy_dof_3 = _data_wyEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t wy_dof_4 = _data_wyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]; + const real_t wy_dof_5 = _data_wyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + real_t q_acc_0_0 = 0.0; + real_t q_acc_1_1 = 0.0; + real_t q_acc_2_2 = 0.0; + real_t q_acc_3_3 = 0.0; + real_t q_acc_4_4 = 0.0; + real_t q_acc_5_5 = 0.0; + for (int64_t q = 0; q < 4; q += 1) + { + const real_t tmp_qloop_0 = p_affine_0_0 + (-p_affine_0_0 + p_affine_1_0)*_data_q_p_0[q] + (-p_affine_0_0 + p_affine_2_0)*_data_q_p_1[q]; + const real_t tmp_qloop_2 = (tmp_qloop_0*tmp_qloop_0); + const real_t tmp_qloop_3 = p_affine_0_1 + (-p_affine_0_1 + p_affine_1_1)*_data_q_p_0[q] + (-p_affine_0_1 + p_affine_2_1)*_data_q_p_1[q]; + const real_t tmp_qloop_4 = (tmp_qloop_3*tmp_qloop_3); + const real_t tmp_qloop_5 = tmp_qloop_2 + tmp_qloop_4; + const real_t tmp_qloop_6 = pow(tmp_qloop_5, -0.50000000000000000); + const real_t tmp_qloop_13 = tmp_qloop_12*tmp_qloop_6; + const real_t tmp_qloop_14 = tmp_qloop_1*tmp_qloop_13; + const real_t tmp_qloop_15 = pow(tmp_qloop_5, -1.5000000000000000); + const real_t tmp_qloop_16 = radRayVertex + tmp_qloop_11*(tmp_qloop_1*(-rayVertex_0 + tmp_qloop_0) - tmp_qloop_8*(-rayVertex_1 + tmp_qloop_3)); + const real_t tmp_qloop_17 = tmp_qloop_15*tmp_qloop_16; + const real_t tmp_qloop_18 = tmp_qloop_17*1.0; + const real_t tmp_qloop_19 = tmp_qloop_13*tmp_qloop_8; + const real_t tmp_qloop_20 = tmp_qloop_0*tmp_qloop_18; + const real_t tmp_qloop_23 = tmp_qloop_12*tmp_qloop_15; + const real_t tmp_qloop_24 = tmp_qloop_1*tmp_qloop_23; + const real_t tmp_qloop_25 = tmp_qloop_2*tmp_qloop_24; + const real_t tmp_qloop_26 = tmp_qloop_16*pow(tmp_qloop_5, -2.5000000000000000)*3.0; + const real_t tmp_qloop_27 = tmp_qloop_0*tmp_qloop_26*tmp_qloop_4; + const real_t tmp_qloop_28 = tmp_qloop_24*tmp_qloop_4 - tmp_qloop_27; + const real_t tmp_qloop_29 = tmp_qloop_23*tmp_qloop_7; + const real_t tmp_qloop_30 = tmp_qloop_0*tmp_qloop_3; + const real_t tmp_qloop_31 = tmp_qloop_24*tmp_qloop_30; + const real_t tmp_qloop_32 = tmp_qloop_17*2.0; + const real_t tmp_qloop_33 = tmp_qloop_2*tmp_qloop_26*tmp_qloop_3; + const real_t tmp_qloop_34 = tmp_qloop_18*tmp_qloop_3 - tmp_qloop_33; + const real_t tmp_qloop_35 = tmp_qloop_29*tmp_qloop_30; + const real_t tmp_qloop_36 = tmp_qloop_23*tmp_qloop_30*tmp_qloop_8; + const real_t tmp_qloop_37 = 4.0*_data_q_p_0[q]; + const real_t tmp_qloop_38 = 4.0*_data_q_p_1[q]; + const real_t tmp_qloop_39 = tmp_qloop_37 + tmp_qloop_38 - 3.0; + const real_t tmp_qloop_40 = jac_affine_inv_0_0_BLUE*tmp_qloop_39 + jac_affine_inv_1_0_BLUE*tmp_qloop_39; + const real_t tmp_qloop_41 = jac_affine_inv_0_1_BLUE*tmp_qloop_39 + jac_affine_inv_1_1_BLUE*tmp_qloop_39; + const real_t tmp_qloop_42 = tmp_qloop_37*_data_q_p_1[q]; + const real_t tmp_qloop_43 = (_data_q_p_0[q]*_data_q_p_0[q]); + const real_t tmp_qloop_44 = tmp_qloop_43*2.0; + const real_t tmp_qloop_45 = tmp_qloop_44 - _data_q_p_0[q]; + const real_t tmp_qloop_46 = (_data_q_p_1[q]*_data_q_p_1[q]); + const real_t tmp_qloop_47 = tmp_qloop_46*2.0; + const real_t tmp_qloop_48 = tmp_qloop_47 - _data_q_p_1[q]; + const real_t tmp_qloop_49 = tmp_qloop_38 - tmp_qloop_42 + tmp_qloop_46*-4.0; + const real_t tmp_qloop_50 = tmp_qloop_37 - tmp_qloop_42 + tmp_qloop_43*-4.0; + const real_t tmp_qloop_51 = tmp_qloop_42 + tmp_qloop_44 + tmp_qloop_47 - 3.0*_data_q_p_0[q] - 3.0*_data_q_p_1[q] + 1.0; + const real_t tmp_qloop_52 = tmp_qloop_42*wx_dof_3 + tmp_qloop_45*wx_dof_1 + tmp_qloop_48*wx_dof_2 + tmp_qloop_49*wx_dof_4 + tmp_qloop_50*wx_dof_5 + tmp_qloop_51*wx_dof_0; + const real_t tmp_qloop_53 = tmp_qloop_42*wy_dof_3 + tmp_qloop_45*wy_dof_1 + tmp_qloop_48*wy_dof_2 + tmp_qloop_49*wy_dof_4 + tmp_qloop_50*wy_dof_5 + tmp_qloop_51*wy_dof_0; + const real_t tmp_qloop_81 = tmp_qloop_37 - 1.0; + const real_t tmp_qloop_82 = jac_affine_inv_0_0_BLUE*tmp_qloop_81; + const real_t tmp_qloop_83 = jac_affine_inv_0_1_BLUE*tmp_qloop_81; + const real_t tmp_qloop_87 = tmp_qloop_38 - 1.0; + const real_t tmp_qloop_88 = jac_affine_inv_1_0_BLUE*tmp_qloop_87; + const real_t tmp_qloop_89 = jac_affine_inv_1_1_BLUE*tmp_qloop_87; + const real_t tmp_qloop_93 = jac_affine_inv_1_0_BLUE*tmp_qloop_37; + const real_t tmp_qloop_94 = jac_affine_inv_0_0_BLUE*tmp_qloop_38; + const real_t tmp_qloop_95 = tmp_qloop_93 + tmp_qloop_94; + const real_t tmp_qloop_96 = jac_affine_inv_1_1_BLUE*tmp_qloop_37; + const real_t tmp_qloop_97 = jac_affine_inv_0_1_BLUE*tmp_qloop_38; + const real_t tmp_qloop_98 = tmp_qloop_96 + tmp_qloop_97; + const real_t tmp_qloop_106 = -tmp_qloop_37 - 8.0*_data_q_p_1[q] + 4.0; + const real_t tmp_qloop_107 = jac_affine_inv_1_0_BLUE*tmp_qloop_106 - tmp_qloop_94; + const real_t tmp_qloop_108 = jac_affine_inv_1_1_BLUE*tmp_qloop_106 - tmp_qloop_97; + const real_t tmp_qloop_117 = -tmp_qloop_38 - 8.0*_data_q_p_0[q] + 4.0; + const real_t tmp_qloop_118 = jac_affine_inv_0_0_BLUE*tmp_qloop_117 - tmp_qloop_93; + const real_t tmp_qloop_119 = jac_affine_inv_0_1_BLUE*tmp_qloop_117 - tmp_qloop_96; + const real_t jac_blending_0_0 = tmp_qloop_0*tmp_qloop_14 + tmp_qloop_18*tmp_qloop_4; + const real_t jac_blending_0_1 = -tmp_qloop_0*tmp_qloop_15*tmp_qloop_16*tmp_qloop_3 - tmp_qloop_0*tmp_qloop_19; + const real_t jac_blending_1_0 = tmp_qloop_14*tmp_qloop_3 - tmp_qloop_20*tmp_qloop_3; + const real_t jac_blending_1_1 = tmp_qloop_15*tmp_qloop_16*tmp_qloop_2*1.0 - tmp_qloop_19*tmp_qloop_3; + const real_t tmp_qloop_21 = jac_blending_0_0*jac_blending_1_1 - jac_blending_0_1*jac_blending_1_0; + const real_t tmp_qloop_22 = 1.0 / (tmp_qloop_21); + const real_t abs_det_jac_blending = tmp_qloop_21; + const real_t tmp_qloop_80 = abs_det_jac_affine_BLUE*abs_det_jac_blending*(diffusivity_times_delta_dof_0*tmp_qloop_51 + diffusivity_times_delta_dof_1*tmp_qloop_45 + diffusivity_times_delta_dof_2*tmp_qloop_48 + diffusivity_times_delta_dof_3*tmp_qloop_42 + diffusivity_times_delta_dof_4*tmp_qloop_49 + diffusivity_times_delta_dof_5*tmp_qloop_50)*_data_q_w[q]; + const real_t jac_blending_inv_0_0 = jac_blending_1_1*tmp_qloop_22; + const real_t jac_blending_inv_0_1 = -jac_blending_0_1*tmp_qloop_22; + const real_t jac_blending_inv_1_0 = -jac_blending_1_0*tmp_qloop_22; + const real_t jac_blending_inv_1_1 = jac_blending_0_0*tmp_qloop_22; + const real_t hessian_blending_0_0_0 = tmp_qloop_14 - tmp_qloop_25 + tmp_qloop_28; + const real_t hessian_blending_1_0_0 = -tmp_qloop_26*(tmp_qloop_3*tmp_qloop_3*tmp_qloop_3) + tmp_qloop_29*tmp_qloop_4 + tmp_qloop_3*tmp_qloop_32 - tmp_qloop_31; + const real_t hessian_blending_0_0_1 = tmp_qloop_1*tmp_qloop_11*tmp_qloop_15*tmp_qloop_30*-2.0 - tmp_qloop_34; + const real_t hessian_blending_1_0_1 = tmp_qloop_1*tmp_qloop_10*tmp_qloop_6*tmp_qloop_9*1.0 - tmp_qloop_20 - tmp_qloop_28 - tmp_qloop_35; + const real_t hessian_blending_0_1_0 = tmp_qloop_10*tmp_qloop_15*tmp_qloop_2*tmp_qloop_8*tmp_qloop_9*1.0 - tmp_qloop_19 - tmp_qloop_31 - tmp_qloop_34; + const real_t tmp_qloop_64 = -hessian_blending_0_0_0*jac_blending_inv_0_0 - hessian_blending_0_1_0*jac_blending_inv_1_0; + const real_t tmp_qloop_72 = -hessian_blending_0_0_0*jac_blending_inv_0_1 - hessian_blending_0_1_0*jac_blending_inv_1_1; + const real_t hessian_blending_1_1_0 = -tmp_qloop_20 + tmp_qloop_27 - tmp_qloop_35 + tmp_qloop_36; + const real_t tmp_qloop_68 = -hessian_blending_1_0_0*jac_blending_inv_0_0 - hessian_blending_1_1_0*jac_blending_inv_1_0; + const real_t tmp_qloop_76 = -hessian_blending_1_0_0*jac_blending_inv_0_1 - hessian_blending_1_1_0*jac_blending_inv_1_1; + const real_t hessian_blending_0_1_1 = tmp_qloop_0*tmp_qloop_32 - (tmp_qloop_0*tmp_qloop_0*tmp_qloop_0)*tmp_qloop_26 + tmp_qloop_25 + tmp_qloop_36; + const real_t tmp_qloop_65 = -hessian_blending_0_0_1*jac_blending_inv_0_0 - hessian_blending_0_1_1*jac_blending_inv_1_0; + const real_t tmp_qloop_66 = jac_blending_inv_0_0*tmp_qloop_64 + jac_blending_inv_0_1*tmp_qloop_65; + const real_t tmp_qloop_67 = jac_blending_inv_1_0*tmp_qloop_64 + jac_blending_inv_1_1*tmp_qloop_65; + const real_t tmp_qloop_73 = -hessian_blending_0_0_1*jac_blending_inv_0_1 - hessian_blending_0_1_1*jac_blending_inv_1_1; + const real_t tmp_qloop_74 = jac_blending_inv_0_0*tmp_qloop_72 + jac_blending_inv_0_1*tmp_qloop_73; + const real_t tmp_qloop_75 = jac_blending_inv_1_0*tmp_qloop_72 + jac_blending_inv_1_1*tmp_qloop_73; + const real_t hessian_blending_1_1_1 = tmp_qloop_10*tmp_qloop_15*tmp_qloop_2*tmp_qloop_7*tmp_qloop_9*1.0 + tmp_qloop_10*tmp_qloop_15*tmp_qloop_4*tmp_qloop_8*tmp_qloop_9*1.0 - tmp_qloop_19 - tmp_qloop_33; + const real_t tmp_qloop_69 = -hessian_blending_1_0_1*jac_blending_inv_0_0 - hessian_blending_1_1_1*jac_blending_inv_1_0; + const real_t tmp_qloop_70 = jac_blending_inv_0_0*tmp_qloop_68 + jac_blending_inv_0_1*tmp_qloop_69; + const real_t tmp_qloop_71 = jac_blending_inv_1_0*tmp_qloop_68 + jac_blending_inv_1_1*tmp_qloop_69; + const real_t tmp_qloop_77 = -hessian_blending_1_0_1*jac_blending_inv_0_1 - hessian_blending_1_1_1*jac_blending_inv_1_1; + const real_t tmp_qloop_78 = jac_blending_inv_0_0*tmp_qloop_76 + jac_blending_inv_0_1*tmp_qloop_77; + const real_t tmp_qloop_79 = jac_blending_inv_1_0*tmp_qloop_76 + jac_blending_inv_1_1*tmp_qloop_77; + const real_t q_tmp_0_0 = tmp_qloop_80*(tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_40 + jac_blending_inv_1_0*tmp_qloop_41) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_40 + jac_blending_inv_1_1*tmp_qloop_41))*(jac_blending_inv_0_0*(jac_blending_inv_0_0*tmp_moved_constant_3 + jac_blending_inv_1_0*tmp_moved_constant_7) + jac_blending_inv_0_0*(tmp_qloop_40*tmp_qloop_66 + tmp_qloop_41*tmp_qloop_67) + jac_blending_inv_0_1*(jac_blending_inv_0_1*tmp_moved_constant_3 + jac_blending_inv_1_1*tmp_moved_constant_7) + jac_blending_inv_0_1*(tmp_qloop_40*tmp_qloop_70 + tmp_qloop_41*tmp_qloop_71) + jac_blending_inv_1_0*(jac_blending_inv_0_0*tmp_moved_constant_8 + jac_blending_inv_1_0*tmp_moved_constant_9) + jac_blending_inv_1_0*(tmp_qloop_40*tmp_qloop_74 + tmp_qloop_41*tmp_qloop_75) + jac_blending_inv_1_1*(jac_blending_inv_0_1*tmp_moved_constant_8 + jac_blending_inv_1_1*tmp_moved_constant_9) + jac_blending_inv_1_1*(tmp_qloop_40*tmp_qloop_78 + tmp_qloop_41*tmp_qloop_79)); + const real_t q_tmp_1_1 = tmp_qloop_80*(tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_82 + jac_blending_inv_1_0*tmp_qloop_83) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_82 + jac_blending_inv_1_1*tmp_qloop_83))*(jac_blending_inv_0_0*(jac_blending_inv_0_0*tmp_moved_constant_11 + jac_blending_inv_1_0*tmp_moved_constant_10) + jac_blending_inv_0_0*(tmp_qloop_66*tmp_qloop_82 + tmp_qloop_67*tmp_qloop_83) + jac_blending_inv_0_1*(jac_blending_inv_0_1*tmp_moved_constant_11 + jac_blending_inv_1_1*tmp_moved_constant_10) + jac_blending_inv_0_1*(tmp_qloop_70*tmp_qloop_82 + tmp_qloop_71*tmp_qloop_83) + jac_blending_inv_1_0*(jac_blending_inv_0_0*tmp_moved_constant_10 + jac_blending_inv_1_0*tmp_moved_constant_12) + jac_blending_inv_1_0*(tmp_qloop_74*tmp_qloop_82 + tmp_qloop_75*tmp_qloop_83) + jac_blending_inv_1_1*(jac_blending_inv_0_1*tmp_moved_constant_10 + jac_blending_inv_1_1*tmp_moved_constant_12) + jac_blending_inv_1_1*(tmp_qloop_78*tmp_qloop_82 + tmp_qloop_79*tmp_qloop_83)); + const real_t q_tmp_2_2 = tmp_qloop_80*(tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_88 + jac_blending_inv_1_0*tmp_qloop_89) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_88 + jac_blending_inv_1_1*tmp_qloop_89))*(jac_blending_inv_0_0*(jac_blending_inv_0_0*tmp_moved_constant_14 + jac_blending_inv_1_0*tmp_moved_constant_13) + jac_blending_inv_0_0*(tmp_qloop_66*tmp_qloop_88 + tmp_qloop_67*tmp_qloop_89) + jac_blending_inv_0_1*(jac_blending_inv_0_1*tmp_moved_constant_14 + jac_blending_inv_1_1*tmp_moved_constant_13) + jac_blending_inv_0_1*(tmp_qloop_70*tmp_qloop_88 + tmp_qloop_71*tmp_qloop_89) + jac_blending_inv_1_0*(jac_blending_inv_0_0*tmp_moved_constant_13 + jac_blending_inv_1_0*tmp_moved_constant_15) + jac_blending_inv_1_0*(tmp_qloop_74*tmp_qloop_88 + tmp_qloop_75*tmp_qloop_89) + jac_blending_inv_1_1*(jac_blending_inv_0_1*tmp_moved_constant_13 + jac_blending_inv_1_1*tmp_moved_constant_15) + jac_blending_inv_1_1*(tmp_qloop_78*tmp_qloop_88 + tmp_qloop_79*tmp_qloop_89)); + const real_t q_tmp_3_3 = tmp_qloop_80*(tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_95 + jac_blending_inv_1_0*tmp_qloop_98) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_95 + jac_blending_inv_1_1*tmp_qloop_98))*(jac_blending_inv_0_0*(jac_blending_inv_0_0*tmp_moved_constant_17 + jac_blending_inv_1_0*tmp_moved_constant_20) + jac_blending_inv_0_0*(tmp_qloop_66*tmp_qloop_95 + tmp_qloop_67*tmp_qloop_98) + jac_blending_inv_0_1*(jac_blending_inv_0_1*tmp_moved_constant_17 + jac_blending_inv_1_1*tmp_moved_constant_20) + jac_blending_inv_0_1*(tmp_qloop_70*tmp_qloop_95 + tmp_qloop_71*tmp_qloop_98) + jac_blending_inv_1_0*(jac_blending_inv_0_0*tmp_moved_constant_20 + jac_blending_inv_1_0*tmp_moved_constant_22) + jac_blending_inv_1_0*(tmp_qloop_74*tmp_qloop_95 + tmp_qloop_75*tmp_qloop_98) + jac_blending_inv_1_1*(jac_blending_inv_0_1*tmp_moved_constant_20 + jac_blending_inv_1_1*tmp_moved_constant_22) + jac_blending_inv_1_1*(tmp_qloop_78*tmp_qloop_95 + tmp_qloop_79*tmp_qloop_98)); + const real_t q_tmp_4_4 = tmp_qloop_80*(tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_107 + jac_blending_inv_1_0*tmp_qloop_108) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_107 + jac_blending_inv_1_1*tmp_qloop_108))*(jac_blending_inv_0_0*(jac_blending_inv_0_0*tmp_moved_constant_25 + jac_blending_inv_1_0*tmp_moved_constant_27) + jac_blending_inv_0_0*(tmp_qloop_107*tmp_qloop_66 + tmp_qloop_108*tmp_qloop_67) + jac_blending_inv_0_1*(jac_blending_inv_0_1*tmp_moved_constant_25 + jac_blending_inv_1_1*tmp_moved_constant_27) + jac_blending_inv_0_1*(tmp_qloop_107*tmp_qloop_70 + tmp_qloop_108*tmp_qloop_71) + jac_blending_inv_1_0*(jac_blending_inv_0_0*tmp_moved_constant_28 + jac_blending_inv_1_0*tmp_moved_constant_30) + jac_blending_inv_1_0*(tmp_qloop_107*tmp_qloop_74 + tmp_qloop_108*tmp_qloop_75) + jac_blending_inv_1_1*(jac_blending_inv_0_1*tmp_moved_constant_28 + jac_blending_inv_1_1*tmp_moved_constant_30) + jac_blending_inv_1_1*(tmp_qloop_107*tmp_qloop_78 + tmp_qloop_108*tmp_qloop_79)); + const real_t q_tmp_5_5 = tmp_qloop_80*(tmp_qloop_52*(jac_blending_inv_0_0*tmp_qloop_118 + jac_blending_inv_1_0*tmp_qloop_119) + tmp_qloop_53*(jac_blending_inv_0_1*tmp_qloop_118 + jac_blending_inv_1_1*tmp_qloop_119))*(jac_blending_inv_0_0*(jac_blending_inv_0_0*tmp_moved_constant_32 + jac_blending_inv_1_0*tmp_moved_constant_34) + jac_blending_inv_0_0*(tmp_qloop_118*tmp_qloop_66 + tmp_qloop_119*tmp_qloop_67) + jac_blending_inv_0_1*(jac_blending_inv_0_1*tmp_moved_constant_32 + jac_blending_inv_1_1*tmp_moved_constant_34) + jac_blending_inv_0_1*(tmp_qloop_118*tmp_qloop_70 + tmp_qloop_119*tmp_qloop_71) + jac_blending_inv_1_0*(jac_blending_inv_0_0*tmp_moved_constant_35 + jac_blending_inv_1_0*tmp_moved_constant_36) + jac_blending_inv_1_0*(tmp_qloop_118*tmp_qloop_74 + tmp_qloop_119*tmp_qloop_75) + jac_blending_inv_1_1*(jac_blending_inv_0_1*tmp_moved_constant_35 + jac_blending_inv_1_1*tmp_moved_constant_36) + jac_blending_inv_1_1*(tmp_qloop_118*tmp_qloop_78 + tmp_qloop_119*tmp_qloop_79)); + q_acc_0_0 = q_acc_0_0 + q_tmp_0_0; + q_acc_1_1 = q_acc_1_1 + q_tmp_1_1; + q_acc_2_2 = q_acc_2_2 + q_tmp_2_2; + q_acc_3_3 = q_acc_3_3 + q_tmp_3_3; + q_acc_4_4 = q_acc_4_4 + q_tmp_4_4; + q_acc_5_5 = q_acc_5_5 + q_tmp_5_5; + } + const real_t elMatDiag_0 = q_acc_0_0; + const real_t elMatDiag_1 = q_acc_1_1; + const real_t elMatDiag_2 = q_acc_2_2; + const real_t elMatDiag_3 = q_acc_3_3; + const real_t elMatDiag_4 = q_acc_4_4; + const real_t elMatDiag_5 = q_acc_5_5; + _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1] = elMatDiag_0 + _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatDiag_1 + _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1] = elMatDiag_2 + _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]; + _data_invDiag_Edge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatDiag_3 + _data_invDiag_Edge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1] = elMatDiag_4 + _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]; + _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))] = elMatDiag_5 + _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + } + } + } +} +} // namespace operatorgeneration + +} // namespace hyteg diff --git a/operators/supg_diffusion/noarch/P2ElementwiseSupgDiffusionAnnulusMap_toMatrix_P2ElementwiseSupgDiffusionAnnulusMap_macro_2D.cpp b/operators/supg_diffusion/noarch/P2ElementwiseSupgDiffusionAnnulusMap_toMatrix_P2ElementwiseSupgDiffusionAnnulusMap_macro_2D.cpp new file mode 100644 index 0000000000000000000000000000000000000000..337a42b7ce3e9bf9d3044e030a3dfe3f287bc6c3 --- /dev/null +++ b/operators/supg_diffusion/noarch/P2ElementwiseSupgDiffusionAnnulusMap_toMatrix_P2ElementwiseSupgDiffusionAnnulusMap_macro_2D.cpp @@ -0,0 +1,925 @@ +/* +* Copyright (c) 2017-2024 Nils Kohl, Daniel Bauer, Fabian Böhm. +* +* This file is part of HyTeG +* (see https://i10git.cs.fau.de/hyteg/hyteg). +* +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. +* +* You should have received a copy of the GNU General Public License +* along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +/* +* The entire file was generated with the HyTeG Operator Generator. +* +* Avoid modifying this file. If buggy, consider fixing the generator itself. +*/ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +#include "../P2ElementwiseSupgDiffusionAnnulusMap.hpp" + +#define FUNC_PREFIX + +namespace hyteg { + +namespace operatorgeneration { + +void P2ElementwiseSupgDiffusionAnnulusMap::toMatrix_P2ElementwiseSupgDiffusionAnnulusMap_macro_2D( real_t * RESTRICT _data_diffusivity_times_deltaEdge, real_t * RESTRICT _data_diffusivity_times_deltaVertex, idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t * RESTRICT _data_wxEdge, real_t * RESTRICT _data_wxVertex, real_t * RESTRICT _data_wyEdge, real_t * RESTRICT _data_wyVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float, real_t radRayVertex, real_t radRefVertex, real_t rayVertex_0, real_t rayVertex_1, real_t refVertex_0, real_t refVertex_1, real_t thrVertex_0, real_t thrVertex_1 ) const +{ + { + const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669}; + + const real_t _data_q_p_0 [] = {0.33333333333333331, 0.20000000000000001, 0.59999999999999998, 0.20000000000000001}; + + const real_t _data_q_p_1 [] = {0.33333333333333331, 0.59999999999999998, 0.20000000000000001, 0.20000000000000001}; + + const real_t tmp_coords_jac_0_GRAY = 1.0 / (micro_edges_per_macro_edge_float)*1.0; + const real_t p_affine_const_0_0_GRAY = macro_vertex_coord_id_0comp0; + const real_t p_affine_const_0_1_GRAY = macro_vertex_coord_id_0comp1; + const real_t p_affine_const_1_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0); + const real_t p_affine_const_1_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1); + const real_t p_affine_const_2_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0); + const real_t p_affine_const_2_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1); + const real_t jac_affine_0_0_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_1_0_GRAY; + const real_t jac_affine_0_1_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_2_0_GRAY; + const real_t jac_affine_1_0_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_1_1_GRAY; + const real_t jac_affine_1_1_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_2_1_GRAY; + const real_t tmp_coords_jac_1_GRAY = jac_affine_0_0_GRAY*jac_affine_1_1_GRAY - jac_affine_0_1_GRAY*jac_affine_1_0_GRAY; + const real_t tmp_coords_jac_2_GRAY = 1.0 / (tmp_coords_jac_1_GRAY); + const real_t jac_affine_inv_0_0_GRAY = jac_affine_1_1_GRAY*tmp_coords_jac_2_GRAY; + const real_t jac_affine_inv_0_1_GRAY = -jac_affine_0_1_GRAY*tmp_coords_jac_2_GRAY; + const real_t jac_affine_inv_1_0_GRAY = -jac_affine_1_0_GRAY*tmp_coords_jac_2_GRAY; + const real_t jac_affine_inv_1_1_GRAY = jac_affine_0_0_GRAY*tmp_coords_jac_2_GRAY; + const real_t abs_det_jac_affine_GRAY = abs(tmp_coords_jac_1_GRAY); + const real_t tmp_qloop_1 = -rayVertex_1 + thrVertex_1; + const real_t tmp_qloop_7 = rayVertex_0 - thrVertex_0; + const real_t tmp_qloop_8 = -tmp_qloop_7; + const real_t tmp_qloop_9 = 1.0 / (tmp_qloop_1*(-rayVertex_0 + refVertex_0) - tmp_qloop_8*(-rayVertex_1 + refVertex_1)); + const real_t tmp_qloop_10 = -radRayVertex + radRefVertex; + const real_t tmp_qloop_11 = tmp_qloop_10*tmp_qloop_9; + const real_t tmp_qloop_12 = tmp_qloop_11*1.0; + const real_t tmp_qloop_37 = jac_affine_inv_0_0_GRAY*4.0; + const real_t tmp_qloop_38 = jac_affine_inv_1_0_GRAY*4.0; + const real_t tmp_qloop_39 = tmp_qloop_37 + tmp_qloop_38; + const real_t tmp_qloop_40 = jac_affine_inv_0_0_GRAY*tmp_qloop_39 + jac_affine_inv_1_0_GRAY*tmp_qloop_39; + const real_t tmp_qloop_41 = jac_affine_inv_0_1_GRAY*4.0; + const real_t tmp_qloop_42 = jac_affine_inv_1_1_GRAY*4.0; + const real_t tmp_qloop_43 = tmp_qloop_41 + tmp_qloop_42; + const real_t tmp_qloop_44 = jac_affine_inv_0_0_GRAY*tmp_qloop_43 + jac_affine_inv_1_0_GRAY*tmp_qloop_43; + const real_t tmp_qloop_45 = jac_affine_inv_0_1_GRAY*tmp_qloop_39 + jac_affine_inv_1_1_GRAY*tmp_qloop_39; + const real_t tmp_qloop_46 = jac_affine_inv_0_1_GRAY*tmp_qloop_43 + jac_affine_inv_1_1_GRAY*tmp_qloop_43; + const real_t tmp_qloop_83 = jac_affine_inv_0_1_GRAY*tmp_qloop_37; + const real_t tmp_qloop_84 = (jac_affine_inv_0_0_GRAY*jac_affine_inv_0_0_GRAY)*4.0; + const real_t tmp_qloop_85 = (jac_affine_inv_0_1_GRAY*jac_affine_inv_0_1_GRAY)*4.0; + const real_t tmp_qloop_90 = jac_affine_inv_1_1_GRAY*tmp_qloop_38; + const real_t tmp_qloop_91 = (jac_affine_inv_1_0_GRAY*jac_affine_inv_1_0_GRAY)*4.0; + const real_t tmp_qloop_92 = (jac_affine_inv_1_1_GRAY*jac_affine_inv_1_1_GRAY)*4.0; + const real_t tmp_qloop_97 = jac_affine_inv_1_0_GRAY*8.0; + const real_t tmp_qloop_98 = jac_affine_inv_0_0_GRAY*tmp_qloop_97; + const real_t tmp_qloop_99 = jac_affine_inv_1_1_GRAY*tmp_qloop_37; + const real_t tmp_qloop_100 = jac_affine_inv_0_1_GRAY*tmp_qloop_38; + const real_t tmp_qloop_101 = tmp_qloop_100 + tmp_qloop_99; + const real_t tmp_qloop_102 = jac_affine_inv_1_1_GRAY*8.0; + const real_t tmp_qloop_103 = jac_affine_inv_0_1_GRAY*tmp_qloop_102; + const real_t tmp_qloop_111 = jac_affine_inv_1_0_GRAY*tmp_qloop_37; + const real_t tmp_qloop_112 = -tmp_qloop_37 - tmp_qloop_97; + const real_t tmp_qloop_113 = jac_affine_inv_1_0_GRAY*tmp_qloop_112 - tmp_qloop_111; + const real_t tmp_qloop_114 = -tmp_qloop_102 - tmp_qloop_41; + const real_t tmp_qloop_115 = jac_affine_inv_1_0_GRAY*tmp_qloop_114 - tmp_qloop_99; + const real_t tmp_qloop_116 = jac_affine_inv_1_1_GRAY*tmp_qloop_112 - tmp_qloop_100; + const real_t tmp_qloop_117 = jac_affine_inv_1_1_GRAY*tmp_qloop_41; + const real_t tmp_qloop_118 = jac_affine_inv_1_1_GRAY*tmp_qloop_114 - tmp_qloop_117; + const real_t tmp_qloop_123 = jac_affine_inv_0_0_GRAY*-8.0 - tmp_qloop_38; + const real_t tmp_qloop_124 = jac_affine_inv_0_0_GRAY*tmp_qloop_123 - tmp_qloop_111; + const real_t tmp_qloop_125 = jac_affine_inv_0_1_GRAY*-8.0 - tmp_qloop_42; + const real_t tmp_qloop_126 = jac_affine_inv_0_0_GRAY*tmp_qloop_125 - tmp_qloop_100; + const real_t tmp_qloop_127 = jac_affine_inv_0_1_GRAY*tmp_qloop_123 - tmp_qloop_99; + const real_t tmp_qloop_128 = jac_affine_inv_0_1_GRAY*tmp_qloop_125 - tmp_qloop_117; + { + /* FaceType.GRAY */ + for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1) + for (int64_t ctr_0 = 0; ctr_0 < -ctr_1 + micro_edges_per_macro_edge; ctr_0 += 1) + { + + const int64_t phantom_ctr_0 = ctr_0; + real_t _data_float_loop_ctr_array_dim_0[4]; + _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0; + _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1; + _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2; + _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3; + real_t _data_float_loop_ctr_array_dim_1[4]; + _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1; + + const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; + const real_t diffusivity_times_delta_dof_0 = _data_diffusivity_times_deltaVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]; + const real_t diffusivity_times_delta_dof_1 = _data_diffusivity_times_deltaVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + const real_t diffusivity_times_delta_dof_2 = _data_diffusivity_times_deltaVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t diffusivity_times_delta_dof_3 = _data_diffusivity_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t diffusivity_times_delta_dof_4 = _data_diffusivity_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t diffusivity_times_delta_dof_5 = _data_diffusivity_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]; + const real_t wx_dof_0 = _data_wxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]; + const real_t wx_dof_1 = _data_wxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + const real_t wx_dof_2 = _data_wxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t wx_dof_3 = _data_wxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t wx_dof_4 = _data_wxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t wx_dof_5 = _data_wxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]; + const real_t wy_dof_0 = _data_wyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]; + const real_t wy_dof_1 = _data_wyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + const real_t wy_dof_2 = _data_wyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t wy_dof_3 = _data_wyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t wy_dof_4 = _data_wyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t wy_dof_5 = _data_wyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]; + real_t q_acc_0_0 = 0.0; + real_t q_acc_0_1 = 0.0; + real_t q_acc_0_2 = 0.0; + real_t q_acc_0_3 = 0.0; + real_t q_acc_0_4 = 0.0; + real_t q_acc_0_5 = 0.0; + real_t q_acc_1_0 = 0.0; + real_t q_acc_1_1 = 0.0; + real_t q_acc_1_2 = 0.0; + real_t q_acc_1_3 = 0.0; + real_t q_acc_1_4 = 0.0; + real_t q_acc_1_5 = 0.0; + real_t q_acc_2_0 = 0.0; + real_t q_acc_2_1 = 0.0; + real_t q_acc_2_2 = 0.0; + real_t q_acc_2_3 = 0.0; + real_t q_acc_2_4 = 0.0; + real_t q_acc_2_5 = 0.0; + real_t q_acc_3_0 = 0.0; + real_t q_acc_3_1 = 0.0; + real_t q_acc_3_2 = 0.0; + real_t q_acc_3_3 = 0.0; + real_t q_acc_3_4 = 0.0; + real_t q_acc_3_5 = 0.0; + real_t q_acc_4_0 = 0.0; + real_t q_acc_4_1 = 0.0; + real_t q_acc_4_2 = 0.0; + real_t q_acc_4_3 = 0.0; + real_t q_acc_4_4 = 0.0; + real_t q_acc_4_5 = 0.0; + real_t q_acc_5_0 = 0.0; + real_t q_acc_5_1 = 0.0; + real_t q_acc_5_2 = 0.0; + real_t q_acc_5_3 = 0.0; + real_t q_acc_5_4 = 0.0; + real_t q_acc_5_5 = 0.0; + for (int64_t q = 0; q < 4; q += 1) + { + const real_t tmp_qloop_0 = p_affine_0_0 + (-p_affine_0_0 + p_affine_1_0)*_data_q_p_0[q] + (-p_affine_0_0 + p_affine_2_0)*_data_q_p_1[q]; + const real_t tmp_qloop_2 = (tmp_qloop_0*tmp_qloop_0); + const real_t tmp_qloop_3 = p_affine_0_1 + (-p_affine_0_1 + p_affine_1_1)*_data_q_p_0[q] + (-p_affine_0_1 + p_affine_2_1)*_data_q_p_1[q]; + const real_t tmp_qloop_4 = (tmp_qloop_3*tmp_qloop_3); + const real_t tmp_qloop_5 = tmp_qloop_2 + tmp_qloop_4; + const real_t tmp_qloop_6 = pow(tmp_qloop_5, -0.50000000000000000); + const real_t tmp_qloop_13 = tmp_qloop_12*tmp_qloop_6; + const real_t tmp_qloop_14 = tmp_qloop_1*tmp_qloop_13; + const real_t tmp_qloop_15 = pow(tmp_qloop_5, -1.5000000000000000); + const real_t tmp_qloop_16 = radRayVertex + tmp_qloop_11*(tmp_qloop_1*(-rayVertex_0 + tmp_qloop_0) - tmp_qloop_8*(-rayVertex_1 + tmp_qloop_3)); + const real_t tmp_qloop_17 = tmp_qloop_15*tmp_qloop_16; + const real_t tmp_qloop_18 = tmp_qloop_17*1.0; + const real_t tmp_qloop_19 = tmp_qloop_13*tmp_qloop_8; + const real_t tmp_qloop_20 = tmp_qloop_0*tmp_qloop_18; + const real_t tmp_qloop_23 = tmp_qloop_12*tmp_qloop_15; + const real_t tmp_qloop_24 = tmp_qloop_1*tmp_qloop_23; + const real_t tmp_qloop_25 = tmp_qloop_2*tmp_qloop_24; + const real_t tmp_qloop_26 = tmp_qloop_16*pow(tmp_qloop_5, -2.5000000000000000)*3.0; + const real_t tmp_qloop_27 = tmp_qloop_0*tmp_qloop_26*tmp_qloop_4; + const real_t tmp_qloop_28 = tmp_qloop_24*tmp_qloop_4 - tmp_qloop_27; + const real_t tmp_qloop_29 = tmp_qloop_23*tmp_qloop_7; + const real_t tmp_qloop_30 = tmp_qloop_0*tmp_qloop_3; + const real_t tmp_qloop_31 = tmp_qloop_24*tmp_qloop_30; + const real_t tmp_qloop_32 = tmp_qloop_17*2.0; + const real_t tmp_qloop_33 = tmp_qloop_2*tmp_qloop_26*tmp_qloop_3; + const real_t tmp_qloop_34 = tmp_qloop_18*tmp_qloop_3 - tmp_qloop_33; + const real_t tmp_qloop_35 = tmp_qloop_29*tmp_qloop_30; + const real_t tmp_qloop_36 = tmp_qloop_23*tmp_qloop_30*tmp_qloop_8; + const real_t tmp_qloop_47 = 4.0*_data_q_p_0[q]; + const real_t tmp_qloop_48 = 4.0*_data_q_p_1[q]; + const real_t tmp_qloop_49 = tmp_qloop_47 + tmp_qloop_48 - 3.0; + const real_t tmp_qloop_50 = jac_affine_inv_0_0_GRAY*tmp_qloop_49 + jac_affine_inv_1_0_GRAY*tmp_qloop_49; + const real_t tmp_qloop_54 = jac_affine_inv_0_1_GRAY*tmp_qloop_49 + jac_affine_inv_1_1_GRAY*tmp_qloop_49; + const real_t tmp_qloop_69 = tmp_qloop_47*_data_q_p_1[q]; + const real_t tmp_qloop_70 = (_data_q_p_0[q]*_data_q_p_0[q]); + const real_t tmp_qloop_71 = tmp_qloop_70*2.0; + const real_t tmp_qloop_72 = tmp_qloop_71 - _data_q_p_0[q]; + const real_t tmp_qloop_73 = (_data_q_p_1[q]*_data_q_p_1[q]); + const real_t tmp_qloop_74 = tmp_qloop_73*2.0; + const real_t tmp_qloop_75 = tmp_qloop_74 - _data_q_p_1[q]; + const real_t tmp_qloop_76 = tmp_qloop_48 - tmp_qloop_69 + tmp_qloop_73*-4.0; + const real_t tmp_qloop_77 = tmp_qloop_47 - tmp_qloop_69 + tmp_qloop_70*-4.0; + const real_t tmp_qloop_78 = tmp_qloop_69 + tmp_qloop_71 + tmp_qloop_74 - 3.0*_data_q_p_0[q] - 3.0*_data_q_p_1[q] + 1.0; + const real_t tmp_qloop_79 = tmp_qloop_69*wx_dof_3 + tmp_qloop_72*wx_dof_1 + tmp_qloop_75*wx_dof_2 + tmp_qloop_76*wx_dof_4 + tmp_qloop_77*wx_dof_5 + tmp_qloop_78*wx_dof_0; + const real_t tmp_qloop_80 = tmp_qloop_69*wy_dof_3 + tmp_qloop_72*wy_dof_1 + tmp_qloop_75*wy_dof_2 + tmp_qloop_76*wy_dof_4 + tmp_qloop_77*wy_dof_5 + tmp_qloop_78*wy_dof_0; + const real_t tmp_qloop_86 = tmp_qloop_47 - 1.0; + const real_t tmp_qloop_87 = jac_affine_inv_0_0_GRAY*tmp_qloop_86; + const real_t tmp_qloop_88 = jac_affine_inv_0_1_GRAY*tmp_qloop_86; + const real_t tmp_qloop_93 = tmp_qloop_48 - 1.0; + const real_t tmp_qloop_94 = jac_affine_inv_1_0_GRAY*tmp_qloop_93; + const real_t tmp_qloop_95 = jac_affine_inv_1_1_GRAY*tmp_qloop_93; + const real_t tmp_qloop_104 = jac_affine_inv_1_0_GRAY*tmp_qloop_47; + const real_t tmp_qloop_105 = jac_affine_inv_0_0_GRAY*tmp_qloop_48; + const real_t tmp_qloop_106 = tmp_qloop_104 + tmp_qloop_105; + const real_t tmp_qloop_107 = jac_affine_inv_1_1_GRAY*tmp_qloop_47; + const real_t tmp_qloop_108 = jac_affine_inv_0_1_GRAY*tmp_qloop_48; + const real_t tmp_qloop_109 = tmp_qloop_107 + tmp_qloop_108; + const real_t tmp_qloop_119 = -tmp_qloop_47 - 8.0*_data_q_p_1[q] + 4.0; + const real_t tmp_qloop_120 = jac_affine_inv_1_0_GRAY*tmp_qloop_119 - tmp_qloop_105; + const real_t tmp_qloop_121 = jac_affine_inv_1_1_GRAY*tmp_qloop_119 - tmp_qloop_108; + const real_t tmp_qloop_129 = -tmp_qloop_48 - 8.0*_data_q_p_0[q] + 4.0; + const real_t tmp_qloop_130 = jac_affine_inv_0_0_GRAY*tmp_qloop_129 - tmp_qloop_104; + const real_t tmp_qloop_131 = jac_affine_inv_0_1_GRAY*tmp_qloop_129 - tmp_qloop_107; + const real_t jac_blending_0_0 = tmp_qloop_0*tmp_qloop_14 + tmp_qloop_18*tmp_qloop_4; + const real_t jac_blending_0_1 = -tmp_qloop_0*tmp_qloop_15*tmp_qloop_16*tmp_qloop_3 - tmp_qloop_0*tmp_qloop_19; + const real_t jac_blending_1_0 = tmp_qloop_14*tmp_qloop_3 - tmp_qloop_20*tmp_qloop_3; + const real_t jac_blending_1_1 = tmp_qloop_15*tmp_qloop_16*tmp_qloop_2*1.0 - tmp_qloop_19*tmp_qloop_3; + const real_t tmp_qloop_21 = jac_blending_0_0*jac_blending_1_1 - jac_blending_0_1*jac_blending_1_0; + const real_t tmp_qloop_22 = 1.0 / (tmp_qloop_21); + const real_t abs_det_jac_blending = tmp_qloop_21; + const real_t tmp_qloop_81 = abs_det_jac_affine_GRAY*abs_det_jac_blending*(diffusivity_times_delta_dof_0*tmp_qloop_78 + diffusivity_times_delta_dof_1*tmp_qloop_72 + diffusivity_times_delta_dof_2*tmp_qloop_75 + diffusivity_times_delta_dof_3*tmp_qloop_69 + diffusivity_times_delta_dof_4*tmp_qloop_76 + diffusivity_times_delta_dof_5*tmp_qloop_77)*_data_q_w[q]; + const real_t jac_blending_inv_0_0 = jac_blending_1_1*tmp_qloop_22; + const real_t jac_blending_inv_0_1 = -jac_blending_0_1*tmp_qloop_22; + const real_t jac_blending_inv_1_0 = -jac_blending_1_0*tmp_qloop_22; + const real_t jac_blending_inv_1_1 = jac_blending_0_0*tmp_qloop_22; + const real_t tmp_qloop_82 = tmp_qloop_81*(tmp_qloop_79*(jac_blending_inv_0_0*tmp_qloop_50 + jac_blending_inv_1_0*tmp_qloop_54) + tmp_qloop_80*(jac_blending_inv_0_1*tmp_qloop_50 + jac_blending_inv_1_1*tmp_qloop_54)); + const real_t tmp_qloop_133 = tmp_qloop_81*(tmp_qloop_79*(jac_blending_inv_0_0*tmp_qloop_87 + jac_blending_inv_1_0*tmp_qloop_88) + tmp_qloop_80*(jac_blending_inv_0_1*tmp_qloop_87 + jac_blending_inv_1_1*tmp_qloop_88)); + const real_t tmp_qloop_134 = tmp_qloop_81*(tmp_qloop_79*(jac_blending_inv_0_0*tmp_qloop_94 + jac_blending_inv_1_0*tmp_qloop_95) + tmp_qloop_80*(jac_blending_inv_0_1*tmp_qloop_94 + jac_blending_inv_1_1*tmp_qloop_95)); + const real_t tmp_qloop_135 = tmp_qloop_81*(tmp_qloop_79*(jac_blending_inv_0_0*tmp_qloop_106 + jac_blending_inv_1_0*tmp_qloop_109) + tmp_qloop_80*(jac_blending_inv_0_1*tmp_qloop_106 + jac_blending_inv_1_1*tmp_qloop_109)); + const real_t tmp_qloop_136 = tmp_qloop_81*(tmp_qloop_79*(jac_blending_inv_0_0*tmp_qloop_120 + jac_blending_inv_1_0*tmp_qloop_121) + tmp_qloop_80*(jac_blending_inv_0_1*tmp_qloop_120 + jac_blending_inv_1_1*tmp_qloop_121)); + const real_t tmp_qloop_137 = tmp_qloop_81*(tmp_qloop_79*(jac_blending_inv_0_0*tmp_qloop_130 + jac_blending_inv_1_0*tmp_qloop_131) + tmp_qloop_80*(jac_blending_inv_0_1*tmp_qloop_130 + jac_blending_inv_1_1*tmp_qloop_131)); + const real_t hessian_blending_0_0_0 = tmp_qloop_14 - tmp_qloop_25 + tmp_qloop_28; + const real_t hessian_blending_1_0_0 = -tmp_qloop_26*(tmp_qloop_3*tmp_qloop_3*tmp_qloop_3) + tmp_qloop_29*tmp_qloop_4 + tmp_qloop_3*tmp_qloop_32 - tmp_qloop_31; + const real_t hessian_blending_0_0_1 = tmp_qloop_1*tmp_qloop_11*tmp_qloop_15*tmp_qloop_30*-2.0 - tmp_qloop_34; + const real_t hessian_blending_1_0_1 = tmp_qloop_1*tmp_qloop_10*tmp_qloop_6*tmp_qloop_9*1.0 - tmp_qloop_20 - tmp_qloop_28 - tmp_qloop_35; + const real_t hessian_blending_0_1_0 = tmp_qloop_10*tmp_qloop_15*tmp_qloop_2*tmp_qloop_8*tmp_qloop_9*1.0 - tmp_qloop_19 - tmp_qloop_31 - tmp_qloop_34; + const real_t tmp_qloop_51 = -hessian_blending_0_0_0*jac_blending_inv_0_0 - hessian_blending_0_1_0*jac_blending_inv_1_0; + const real_t tmp_qloop_60 = -hessian_blending_0_0_0*jac_blending_inv_0_1 - hessian_blending_0_1_0*jac_blending_inv_1_1; + const real_t hessian_blending_1_1_0 = -tmp_qloop_20 + tmp_qloop_27 - tmp_qloop_35 + tmp_qloop_36; + const real_t tmp_qloop_56 = -hessian_blending_1_0_0*jac_blending_inv_0_0 - hessian_blending_1_1_0*jac_blending_inv_1_0; + const real_t tmp_qloop_64 = -hessian_blending_1_0_0*jac_blending_inv_0_1 - hessian_blending_1_1_0*jac_blending_inv_1_1; + const real_t hessian_blending_0_1_1 = tmp_qloop_0*tmp_qloop_32 - (tmp_qloop_0*tmp_qloop_0*tmp_qloop_0)*tmp_qloop_26 + tmp_qloop_25 + tmp_qloop_36; + const real_t tmp_qloop_52 = -hessian_blending_0_0_1*jac_blending_inv_0_0 - hessian_blending_0_1_1*jac_blending_inv_1_0; + const real_t tmp_qloop_53 = jac_blending_inv_0_0*tmp_qloop_51 + jac_blending_inv_0_1*tmp_qloop_52; + const real_t tmp_qloop_55 = jac_blending_inv_1_0*tmp_qloop_51 + jac_blending_inv_1_1*tmp_qloop_52; + const real_t tmp_qloop_61 = -hessian_blending_0_0_1*jac_blending_inv_0_1 - hessian_blending_0_1_1*jac_blending_inv_1_1; + const real_t tmp_qloop_62 = jac_blending_inv_0_0*tmp_qloop_60 + jac_blending_inv_0_1*tmp_qloop_61; + const real_t tmp_qloop_63 = jac_blending_inv_1_0*tmp_qloop_60 + jac_blending_inv_1_1*tmp_qloop_61; + const real_t hessian_blending_1_1_1 = tmp_qloop_10*tmp_qloop_15*tmp_qloop_2*tmp_qloop_7*tmp_qloop_9*1.0 + tmp_qloop_10*tmp_qloop_15*tmp_qloop_4*tmp_qloop_8*tmp_qloop_9*1.0 - tmp_qloop_19 - tmp_qloop_33; + const real_t tmp_qloop_57 = -hessian_blending_1_0_1*jac_blending_inv_0_0 - hessian_blending_1_1_1*jac_blending_inv_1_0; + const real_t tmp_qloop_58 = jac_blending_inv_0_0*tmp_qloop_56 + jac_blending_inv_0_1*tmp_qloop_57; + const real_t tmp_qloop_59 = jac_blending_inv_1_0*tmp_qloop_56 + jac_blending_inv_1_1*tmp_qloop_57; + const real_t tmp_qloop_65 = -hessian_blending_1_0_1*jac_blending_inv_0_1 - hessian_blending_1_1_1*jac_blending_inv_1_1; + const real_t tmp_qloop_66 = jac_blending_inv_0_0*tmp_qloop_64 + jac_blending_inv_0_1*tmp_qloop_65; + const real_t tmp_qloop_67 = jac_blending_inv_1_0*tmp_qloop_64 + jac_blending_inv_1_1*tmp_qloop_65; + const real_t tmp_qloop_68 = jac_blending_inv_0_0*(jac_blending_inv_0_0*tmp_qloop_40 + jac_blending_inv_1_0*tmp_qloop_44) + jac_blending_inv_0_0*(tmp_qloop_50*tmp_qloop_53 + tmp_qloop_54*tmp_qloop_55) + jac_blending_inv_0_1*(jac_blending_inv_0_1*tmp_qloop_40 + jac_blending_inv_1_1*tmp_qloop_44) + jac_blending_inv_0_1*(tmp_qloop_50*tmp_qloop_58 + tmp_qloop_54*tmp_qloop_59) + jac_blending_inv_1_0*(jac_blending_inv_0_0*tmp_qloop_45 + jac_blending_inv_1_0*tmp_qloop_46) + jac_blending_inv_1_0*(tmp_qloop_50*tmp_qloop_62 + tmp_qloop_54*tmp_qloop_63) + jac_blending_inv_1_1*(jac_blending_inv_0_1*tmp_qloop_45 + jac_blending_inv_1_1*tmp_qloop_46) + jac_blending_inv_1_1*(tmp_qloop_50*tmp_qloop_66 + tmp_qloop_54*tmp_qloop_67); + const real_t tmp_qloop_89 = jac_blending_inv_0_0*(jac_blending_inv_0_0*tmp_qloop_84 + jac_blending_inv_1_0*tmp_qloop_83) + jac_blending_inv_0_0*(tmp_qloop_53*tmp_qloop_87 + tmp_qloop_55*tmp_qloop_88) + jac_blending_inv_0_1*(jac_blending_inv_0_1*tmp_qloop_84 + jac_blending_inv_1_1*tmp_qloop_83) + jac_blending_inv_0_1*(tmp_qloop_58*tmp_qloop_87 + tmp_qloop_59*tmp_qloop_88) + jac_blending_inv_1_0*(jac_blending_inv_0_0*tmp_qloop_83 + jac_blending_inv_1_0*tmp_qloop_85) + jac_blending_inv_1_0*(tmp_qloop_62*tmp_qloop_87 + tmp_qloop_63*tmp_qloop_88) + jac_blending_inv_1_1*(jac_blending_inv_0_1*tmp_qloop_83 + jac_blending_inv_1_1*tmp_qloop_85) + jac_blending_inv_1_1*(tmp_qloop_66*tmp_qloop_87 + tmp_qloop_67*tmp_qloop_88); + const real_t tmp_qloop_96 = jac_blending_inv_0_0*(jac_blending_inv_0_0*tmp_qloop_91 + jac_blending_inv_1_0*tmp_qloop_90) + jac_blending_inv_0_0*(tmp_qloop_53*tmp_qloop_94 + tmp_qloop_55*tmp_qloop_95) + jac_blending_inv_0_1*(jac_blending_inv_0_1*tmp_qloop_91 + jac_blending_inv_1_1*tmp_qloop_90) + jac_blending_inv_0_1*(tmp_qloop_58*tmp_qloop_94 + tmp_qloop_59*tmp_qloop_95) + jac_blending_inv_1_0*(jac_blending_inv_0_0*tmp_qloop_90 + jac_blending_inv_1_0*tmp_qloop_92) + jac_blending_inv_1_0*(tmp_qloop_62*tmp_qloop_94 + tmp_qloop_63*tmp_qloop_95) + jac_blending_inv_1_1*(jac_blending_inv_0_1*tmp_qloop_90 + jac_blending_inv_1_1*tmp_qloop_92) + jac_blending_inv_1_1*(tmp_qloop_66*tmp_qloop_94 + tmp_qloop_67*tmp_qloop_95); + const real_t tmp_qloop_110 = jac_blending_inv_0_0*(jac_blending_inv_0_0*tmp_qloop_98 + jac_blending_inv_1_0*tmp_qloop_101) + jac_blending_inv_0_0*(tmp_qloop_106*tmp_qloop_53 + tmp_qloop_109*tmp_qloop_55) + jac_blending_inv_0_1*(jac_blending_inv_0_1*tmp_qloop_98 + jac_blending_inv_1_1*tmp_qloop_101) + jac_blending_inv_0_1*(tmp_qloop_106*tmp_qloop_58 + tmp_qloop_109*tmp_qloop_59) + jac_blending_inv_1_0*(jac_blending_inv_0_0*tmp_qloop_101 + jac_blending_inv_1_0*tmp_qloop_103) + jac_blending_inv_1_0*(tmp_qloop_106*tmp_qloop_62 + tmp_qloop_109*tmp_qloop_63) + jac_blending_inv_1_1*(jac_blending_inv_0_1*tmp_qloop_101 + jac_blending_inv_1_1*tmp_qloop_103) + jac_blending_inv_1_1*(tmp_qloop_106*tmp_qloop_66 + tmp_qloop_109*tmp_qloop_67); + const real_t tmp_qloop_122 = jac_blending_inv_0_0*(jac_blending_inv_0_0*tmp_qloop_113 + jac_blending_inv_1_0*tmp_qloop_115) + jac_blending_inv_0_0*(tmp_qloop_120*tmp_qloop_53 + tmp_qloop_121*tmp_qloop_55) + jac_blending_inv_0_1*(jac_blending_inv_0_1*tmp_qloop_113 + jac_blending_inv_1_1*tmp_qloop_115) + jac_blending_inv_0_1*(tmp_qloop_120*tmp_qloop_58 + tmp_qloop_121*tmp_qloop_59) + jac_blending_inv_1_0*(jac_blending_inv_0_0*tmp_qloop_116 + jac_blending_inv_1_0*tmp_qloop_118) + jac_blending_inv_1_0*(tmp_qloop_120*tmp_qloop_62 + tmp_qloop_121*tmp_qloop_63) + jac_blending_inv_1_1*(jac_blending_inv_0_1*tmp_qloop_116 + jac_blending_inv_1_1*tmp_qloop_118) + jac_blending_inv_1_1*(tmp_qloop_120*tmp_qloop_66 + tmp_qloop_121*tmp_qloop_67); + const real_t tmp_qloop_132 = jac_blending_inv_0_0*(jac_blending_inv_0_0*tmp_qloop_124 + jac_blending_inv_1_0*tmp_qloop_126) + jac_blending_inv_0_0*(tmp_qloop_130*tmp_qloop_53 + tmp_qloop_131*tmp_qloop_55) + jac_blending_inv_0_1*(jac_blending_inv_0_1*tmp_qloop_124 + jac_blending_inv_1_1*tmp_qloop_126) + jac_blending_inv_0_1*(tmp_qloop_130*tmp_qloop_58 + tmp_qloop_131*tmp_qloop_59) + jac_blending_inv_1_0*(jac_blending_inv_0_0*tmp_qloop_127 + jac_blending_inv_1_0*tmp_qloop_128) + jac_blending_inv_1_0*(tmp_qloop_130*tmp_qloop_62 + tmp_qloop_131*tmp_qloop_63) + jac_blending_inv_1_1*(jac_blending_inv_0_1*tmp_qloop_127 + jac_blending_inv_1_1*tmp_qloop_128) + jac_blending_inv_1_1*(tmp_qloop_130*tmp_qloop_66 + tmp_qloop_131*tmp_qloop_67); + const real_t q_tmp_0_0 = tmp_qloop_68*tmp_qloop_82; + const real_t q_tmp_0_1 = tmp_qloop_82*tmp_qloop_89; + const real_t q_tmp_0_2 = tmp_qloop_82*tmp_qloop_96; + const real_t q_tmp_0_3 = tmp_qloop_110*tmp_qloop_82; + const real_t q_tmp_0_4 = tmp_qloop_122*tmp_qloop_82; + const real_t q_tmp_0_5 = tmp_qloop_132*tmp_qloop_82; + const real_t q_tmp_1_0 = tmp_qloop_133*tmp_qloop_68; + const real_t q_tmp_1_1 = tmp_qloop_133*tmp_qloop_89; + const real_t q_tmp_1_2 = tmp_qloop_133*tmp_qloop_96; + const real_t q_tmp_1_3 = tmp_qloop_110*tmp_qloop_133; + const real_t q_tmp_1_4 = tmp_qloop_122*tmp_qloop_133; + const real_t q_tmp_1_5 = tmp_qloop_132*tmp_qloop_133; + const real_t q_tmp_2_0 = tmp_qloop_134*tmp_qloop_68; + const real_t q_tmp_2_1 = tmp_qloop_134*tmp_qloop_89; + const real_t q_tmp_2_2 = tmp_qloop_134*tmp_qloop_96; + const real_t q_tmp_2_3 = tmp_qloop_110*tmp_qloop_134; + const real_t q_tmp_2_4 = tmp_qloop_122*tmp_qloop_134; + const real_t q_tmp_2_5 = tmp_qloop_132*tmp_qloop_134; + const real_t q_tmp_3_0 = tmp_qloop_135*tmp_qloop_68; + const real_t q_tmp_3_1 = tmp_qloop_135*tmp_qloop_89; + const real_t q_tmp_3_2 = tmp_qloop_135*tmp_qloop_96; + const real_t q_tmp_3_3 = tmp_qloop_110*tmp_qloop_135; + const real_t q_tmp_3_4 = tmp_qloop_122*tmp_qloop_135; + const real_t q_tmp_3_5 = tmp_qloop_132*tmp_qloop_135; + const real_t q_tmp_4_0 = tmp_qloop_136*tmp_qloop_68; + const real_t q_tmp_4_1 = tmp_qloop_136*tmp_qloop_89; + const real_t q_tmp_4_2 = tmp_qloop_136*tmp_qloop_96; + const real_t q_tmp_4_3 = tmp_qloop_110*tmp_qloop_136; + const real_t q_tmp_4_4 = tmp_qloop_122*tmp_qloop_136; + const real_t q_tmp_4_5 = tmp_qloop_132*tmp_qloop_136; + const real_t q_tmp_5_0 = tmp_qloop_137*tmp_qloop_68; + const real_t q_tmp_5_1 = tmp_qloop_137*tmp_qloop_89; + const real_t q_tmp_5_2 = tmp_qloop_137*tmp_qloop_96; + const real_t q_tmp_5_3 = tmp_qloop_110*tmp_qloop_137; + const real_t q_tmp_5_4 = tmp_qloop_122*tmp_qloop_137; + const real_t q_tmp_5_5 = tmp_qloop_132*tmp_qloop_137; + q_acc_0_0 = q_acc_0_0 + q_tmp_0_0; + q_acc_0_1 = q_acc_0_1 + q_tmp_0_1; + q_acc_0_2 = q_acc_0_2 + q_tmp_0_2; + q_acc_0_3 = q_acc_0_3 + q_tmp_0_3; + q_acc_0_4 = q_acc_0_4 + q_tmp_0_4; + q_acc_0_5 = q_acc_0_5 + q_tmp_0_5; + q_acc_1_0 = q_acc_1_0 + q_tmp_1_0; + q_acc_1_1 = q_acc_1_1 + q_tmp_1_1; + q_acc_1_2 = q_acc_1_2 + q_tmp_1_2; + q_acc_1_3 = q_acc_1_3 + q_tmp_1_3; + q_acc_1_4 = q_acc_1_4 + q_tmp_1_4; + q_acc_1_5 = q_acc_1_5 + q_tmp_1_5; + q_acc_2_0 = q_acc_2_0 + q_tmp_2_0; + q_acc_2_1 = q_acc_2_1 + q_tmp_2_1; + q_acc_2_2 = q_acc_2_2 + q_tmp_2_2; + q_acc_2_3 = q_acc_2_3 + q_tmp_2_3; + q_acc_2_4 = q_acc_2_4 + q_tmp_2_4; + q_acc_2_5 = q_acc_2_5 + q_tmp_2_5; + q_acc_3_0 = q_acc_3_0 + q_tmp_3_0; + q_acc_3_1 = q_acc_3_1 + q_tmp_3_1; + q_acc_3_2 = q_acc_3_2 + q_tmp_3_2; + q_acc_3_3 = q_acc_3_3 + q_tmp_3_3; + q_acc_3_4 = q_acc_3_4 + q_tmp_3_4; + q_acc_3_5 = q_acc_3_5 + q_tmp_3_5; + q_acc_4_0 = q_acc_4_0 + q_tmp_4_0; + q_acc_4_1 = q_acc_4_1 + q_tmp_4_1; + q_acc_4_2 = q_acc_4_2 + q_tmp_4_2; + q_acc_4_3 = q_acc_4_3 + q_tmp_4_3; + q_acc_4_4 = q_acc_4_4 + q_tmp_4_4; + q_acc_4_5 = q_acc_4_5 + q_tmp_4_5; + q_acc_5_0 = q_acc_5_0 + q_tmp_5_0; + q_acc_5_1 = q_acc_5_1 + q_tmp_5_1; + q_acc_5_2 = q_acc_5_2 + q_tmp_5_2; + q_acc_5_3 = q_acc_5_3 + q_tmp_5_3; + q_acc_5_4 = q_acc_5_4 + q_tmp_5_4; + q_acc_5_5 = q_acc_5_5 + q_tmp_5_5; + } + const real_t elMat_0_0 = q_acc_0_0; + const real_t elMat_0_1 = q_acc_0_1; + const real_t elMat_0_2 = q_acc_0_2; + const real_t elMat_0_3 = q_acc_0_3; + const real_t elMat_0_4 = q_acc_0_4; + const real_t elMat_0_5 = q_acc_0_5; + const real_t elMat_1_0 = q_acc_1_0; + const real_t elMat_1_1 = q_acc_1_1; + const real_t elMat_1_2 = q_acc_1_2; + const real_t elMat_1_3 = q_acc_1_3; + const real_t elMat_1_4 = q_acc_1_4; + const real_t elMat_1_5 = q_acc_1_5; + const real_t elMat_2_0 = q_acc_2_0; + const real_t elMat_2_1 = q_acc_2_1; + const real_t elMat_2_2 = q_acc_2_2; + const real_t elMat_2_3 = q_acc_2_3; + const real_t elMat_2_4 = q_acc_2_4; + const real_t elMat_2_5 = q_acc_2_5; + const real_t elMat_3_0 = q_acc_3_0; + const real_t elMat_3_1 = q_acc_3_1; + const real_t elMat_3_2 = q_acc_3_2; + const real_t elMat_3_3 = q_acc_3_3; + const real_t elMat_3_4 = q_acc_3_4; + const real_t elMat_3_5 = q_acc_3_5; + const real_t elMat_4_0 = q_acc_4_0; + const real_t elMat_4_1 = q_acc_4_1; + const real_t elMat_4_2 = q_acc_4_2; + const real_t elMat_4_3 = q_acc_4_3; + const real_t elMat_4_4 = q_acc_4_4; + const real_t elMat_4_5 = q_acc_4_5; + const real_t elMat_5_0 = q_acc_5_0; + const real_t elMat_5_1 = q_acc_5_1; + const real_t elMat_5_2 = q_acc_5_2; + const real_t elMat_5_3 = q_acc_5_3; + const real_t elMat_5_4 = q_acc_5_4; + const real_t elMat_5_5 = q_acc_5_5; + + std::vector< uint_t > _data_rowIdx( 6 ); + std::vector< uint_t > _data_colIdx( 6 ); + std::vector< real_t > _data_mat( 36 ); + + _data_rowIdx[0] = ((uint64_t)(_data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))])); + _data_rowIdx[1] = ((uint64_t)(_data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1])); + _data_rowIdx[2] = ((uint64_t)(_data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))])); + _data_rowIdx[3] = ((uint64_t)(_data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))])); + _data_rowIdx[4] = ((uint64_t)(_data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))])); + _data_rowIdx[5] = ((uint64_t)(_data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))])); + _data_colIdx[0] = ((uint64_t)(_data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))])); + _data_colIdx[1] = ((uint64_t)(_data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1])); + _data_colIdx[2] = ((uint64_t)(_data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))])); + _data_colIdx[3] = ((uint64_t)(_data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))])); + _data_colIdx[4] = ((uint64_t)(_data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))])); + _data_colIdx[5] = ((uint64_t)(_data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))])); + + /* Apply basis transformation */ + + + + _data_mat[0] = ((real_t)(elMat_0_0)); + _data_mat[1] = ((real_t)(elMat_0_1)); + _data_mat[2] = ((real_t)(elMat_0_2)); + _data_mat[3] = ((real_t)(elMat_0_3)); + _data_mat[4] = ((real_t)(elMat_0_4)); + _data_mat[5] = ((real_t)(elMat_0_5)); + _data_mat[6] = ((real_t)(elMat_1_0)); + _data_mat[7] = ((real_t)(elMat_1_1)); + _data_mat[8] = ((real_t)(elMat_1_2)); + _data_mat[9] = ((real_t)(elMat_1_3)); + _data_mat[10] = ((real_t)(elMat_1_4)); + _data_mat[11] = ((real_t)(elMat_1_5)); + _data_mat[12] = ((real_t)(elMat_2_0)); + _data_mat[13] = ((real_t)(elMat_2_1)); + _data_mat[14] = ((real_t)(elMat_2_2)); + _data_mat[15] = ((real_t)(elMat_2_3)); + _data_mat[16] = ((real_t)(elMat_2_4)); + _data_mat[17] = ((real_t)(elMat_2_5)); + _data_mat[18] = ((real_t)(elMat_3_0)); + _data_mat[19] = ((real_t)(elMat_3_1)); + _data_mat[20] = ((real_t)(elMat_3_2)); + _data_mat[21] = ((real_t)(elMat_3_3)); + _data_mat[22] = ((real_t)(elMat_3_4)); + _data_mat[23] = ((real_t)(elMat_3_5)); + _data_mat[24] = ((real_t)(elMat_4_0)); + _data_mat[25] = ((real_t)(elMat_4_1)); + _data_mat[26] = ((real_t)(elMat_4_2)); + _data_mat[27] = ((real_t)(elMat_4_3)); + _data_mat[28] = ((real_t)(elMat_4_4)); + _data_mat[29] = ((real_t)(elMat_4_5)); + _data_mat[30] = ((real_t)(elMat_5_0)); + _data_mat[31] = ((real_t)(elMat_5_1)); + _data_mat[32] = ((real_t)(elMat_5_2)); + _data_mat[33] = ((real_t)(elMat_5_3)); + _data_mat[34] = ((real_t)(elMat_5_4)); + _data_mat[35] = ((real_t)(elMat_5_5)); + + + mat->addValues( _data_rowIdx, _data_colIdx, _data_mat ); + } + } + const real_t tmp_coords_jac_0_BLUE = 1.0 / (micro_edges_per_macro_edge_float)*1.0; + const real_t tmp_coords_jac_1_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0); + const real_t tmp_coords_jac_2_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1); + const real_t tmp_coords_jac_3_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0); + const real_t tmp_coords_jac_4_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1); + const real_t p_affine_const_0_0_BLUE = tmp_coords_jac_1_BLUE; + const real_t p_affine_const_0_1_BLUE = tmp_coords_jac_2_BLUE; + const real_t p_affine_const_1_0_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_3_BLUE; + const real_t p_affine_const_1_1_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_4_BLUE; + const real_t p_affine_const_2_0_BLUE = tmp_coords_jac_1_BLUE + tmp_coords_jac_3_BLUE; + const real_t p_affine_const_2_1_BLUE = tmp_coords_jac_2_BLUE + tmp_coords_jac_4_BLUE; + const real_t jac_affine_0_0_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_1_0_BLUE; + const real_t jac_affine_0_1_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_2_0_BLUE; + const real_t jac_affine_1_0_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_1_1_BLUE; + const real_t jac_affine_1_1_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_2_1_BLUE; + const real_t tmp_coords_jac_5_BLUE = jac_affine_0_0_BLUE*jac_affine_1_1_BLUE - jac_affine_0_1_BLUE*jac_affine_1_0_BLUE; + const real_t tmp_coords_jac_6_BLUE = 1.0 / (tmp_coords_jac_5_BLUE); + const real_t jac_affine_inv_0_0_BLUE = jac_affine_1_1_BLUE*tmp_coords_jac_6_BLUE; + const real_t jac_affine_inv_0_1_BLUE = -jac_affine_0_1_BLUE*tmp_coords_jac_6_BLUE; + const real_t jac_affine_inv_1_0_BLUE = -jac_affine_1_0_BLUE*tmp_coords_jac_6_BLUE; + const real_t jac_affine_inv_1_1_BLUE = jac_affine_0_0_BLUE*tmp_coords_jac_6_BLUE; + const real_t abs_det_jac_affine_BLUE = abs(tmp_coords_jac_5_BLUE); + const real_t tmp_moved_constant_0 = jac_affine_inv_0_0_BLUE*4.0; + const real_t tmp_moved_constant_1 = jac_affine_inv_1_0_BLUE*4.0; + const real_t tmp_moved_constant_2 = tmp_moved_constant_0 + tmp_moved_constant_1; + const real_t tmp_moved_constant_3 = jac_affine_inv_0_0_BLUE*tmp_moved_constant_2 + jac_affine_inv_1_0_BLUE*tmp_moved_constant_2; + const real_t tmp_moved_constant_4 = jac_affine_inv_0_1_BLUE*4.0; + const real_t tmp_moved_constant_5 = jac_affine_inv_1_1_BLUE*4.0; + const real_t tmp_moved_constant_6 = tmp_moved_constant_4 + tmp_moved_constant_5; + const real_t tmp_moved_constant_7 = jac_affine_inv_0_0_BLUE*tmp_moved_constant_6 + jac_affine_inv_1_0_BLUE*tmp_moved_constant_6; + const real_t tmp_moved_constant_8 = jac_affine_inv_0_1_BLUE*tmp_moved_constant_2 + jac_affine_inv_1_1_BLUE*tmp_moved_constant_2; + const real_t tmp_moved_constant_9 = jac_affine_inv_0_1_BLUE*tmp_moved_constant_6 + jac_affine_inv_1_1_BLUE*tmp_moved_constant_6; + const real_t tmp_moved_constant_10 = jac_affine_inv_0_1_BLUE*tmp_moved_constant_0; + const real_t tmp_moved_constant_11 = (jac_affine_inv_0_0_BLUE*jac_affine_inv_0_0_BLUE)*4.0; + const real_t tmp_moved_constant_12 = (jac_affine_inv_0_1_BLUE*jac_affine_inv_0_1_BLUE)*4.0; + const real_t tmp_moved_constant_13 = jac_affine_inv_1_1_BLUE*tmp_moved_constant_1; + const real_t tmp_moved_constant_14 = (jac_affine_inv_1_0_BLUE*jac_affine_inv_1_0_BLUE)*4.0; + const real_t tmp_moved_constant_15 = (jac_affine_inv_1_1_BLUE*jac_affine_inv_1_1_BLUE)*4.0; + const real_t tmp_moved_constant_16 = jac_affine_inv_1_0_BLUE*8.0; + const real_t tmp_moved_constant_17 = jac_affine_inv_0_0_BLUE*tmp_moved_constant_16; + const real_t tmp_moved_constant_18 = jac_affine_inv_1_1_BLUE*tmp_moved_constant_0; + const real_t tmp_moved_constant_19 = jac_affine_inv_0_1_BLUE*tmp_moved_constant_1; + const real_t tmp_moved_constant_20 = tmp_moved_constant_18 + tmp_moved_constant_19; + const real_t tmp_moved_constant_21 = jac_affine_inv_1_1_BLUE*8.0; + const real_t tmp_moved_constant_22 = jac_affine_inv_0_1_BLUE*tmp_moved_constant_21; + const real_t tmp_moved_constant_23 = jac_affine_inv_1_0_BLUE*tmp_moved_constant_0; + const real_t tmp_moved_constant_24 = -tmp_moved_constant_0 - tmp_moved_constant_16; + const real_t tmp_moved_constant_25 = jac_affine_inv_1_0_BLUE*tmp_moved_constant_24 - tmp_moved_constant_23; + const real_t tmp_moved_constant_26 = -tmp_moved_constant_21 - tmp_moved_constant_4; + const real_t tmp_moved_constant_27 = jac_affine_inv_1_0_BLUE*tmp_moved_constant_26 - tmp_moved_constant_18; + const real_t tmp_moved_constant_28 = jac_affine_inv_1_1_BLUE*tmp_moved_constant_24 - tmp_moved_constant_19; + const real_t tmp_moved_constant_29 = jac_affine_inv_1_1_BLUE*tmp_moved_constant_4; + const real_t tmp_moved_constant_30 = jac_affine_inv_1_1_BLUE*tmp_moved_constant_26 - tmp_moved_constant_29; + const real_t tmp_moved_constant_31 = jac_affine_inv_0_0_BLUE*-8.0 - tmp_moved_constant_1; + const real_t tmp_moved_constant_32 = jac_affine_inv_0_0_BLUE*tmp_moved_constant_31 - tmp_moved_constant_23; + const real_t tmp_moved_constant_33 = jac_affine_inv_0_1_BLUE*-8.0 - tmp_moved_constant_5; + const real_t tmp_moved_constant_34 = jac_affine_inv_0_0_BLUE*tmp_moved_constant_33 - tmp_moved_constant_19; + const real_t tmp_moved_constant_35 = jac_affine_inv_0_1_BLUE*tmp_moved_constant_31 - tmp_moved_constant_18; + const real_t tmp_moved_constant_36 = jac_affine_inv_0_1_BLUE*tmp_moved_constant_33 - tmp_moved_constant_29; + { + /* FaceType.BLUE */ + for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1) + for (int64_t ctr_0 = 0; ctr_0 < -ctr_1 + micro_edges_per_macro_edge - 1; ctr_0 += 1) + { + + const int64_t phantom_ctr_0 = ctr_0; + real_t _data_float_loop_ctr_array_dim_0[4]; + _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0; + _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1; + _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2; + _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3; + real_t _data_float_loop_ctr_array_dim_1[4]; + _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1; + + const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; + const real_t diffusivity_times_delta_dof_0 = _data_diffusivity_times_deltaVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + const real_t diffusivity_times_delta_dof_1 = _data_diffusivity_times_deltaVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t diffusivity_times_delta_dof_2 = _data_diffusivity_times_deltaVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]; + const real_t diffusivity_times_delta_dof_3 = _data_diffusivity_times_deltaEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t diffusivity_times_delta_dof_4 = _data_diffusivity_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]; + const real_t diffusivity_times_delta_dof_5 = _data_diffusivity_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t wx_dof_0 = _data_wxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + const real_t wx_dof_1 = _data_wxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t wx_dof_2 = _data_wxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]; + const real_t wx_dof_3 = _data_wxEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t wx_dof_4 = _data_wxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]; + const real_t wx_dof_5 = _data_wxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t wy_dof_0 = _data_wyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + const real_t wy_dof_1 = _data_wyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t wy_dof_2 = _data_wyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]; + const real_t wy_dof_3 = _data_wyEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t wy_dof_4 = _data_wyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]; + const real_t wy_dof_5 = _data_wyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + real_t q_acc_0_0 = 0.0; + real_t q_acc_0_1 = 0.0; + real_t q_acc_0_2 = 0.0; + real_t q_acc_0_3 = 0.0; + real_t q_acc_0_4 = 0.0; + real_t q_acc_0_5 = 0.0; + real_t q_acc_1_0 = 0.0; + real_t q_acc_1_1 = 0.0; + real_t q_acc_1_2 = 0.0; + real_t q_acc_1_3 = 0.0; + real_t q_acc_1_4 = 0.0; + real_t q_acc_1_5 = 0.0; + real_t q_acc_2_0 = 0.0; + real_t q_acc_2_1 = 0.0; + real_t q_acc_2_2 = 0.0; + real_t q_acc_2_3 = 0.0; + real_t q_acc_2_4 = 0.0; + real_t q_acc_2_5 = 0.0; + real_t q_acc_3_0 = 0.0; + real_t q_acc_3_1 = 0.0; + real_t q_acc_3_2 = 0.0; + real_t q_acc_3_3 = 0.0; + real_t q_acc_3_4 = 0.0; + real_t q_acc_3_5 = 0.0; + real_t q_acc_4_0 = 0.0; + real_t q_acc_4_1 = 0.0; + real_t q_acc_4_2 = 0.0; + real_t q_acc_4_3 = 0.0; + real_t q_acc_4_4 = 0.0; + real_t q_acc_4_5 = 0.0; + real_t q_acc_5_0 = 0.0; + real_t q_acc_5_1 = 0.0; + real_t q_acc_5_2 = 0.0; + real_t q_acc_5_3 = 0.0; + real_t q_acc_5_4 = 0.0; + real_t q_acc_5_5 = 0.0; + for (int64_t q = 0; q < 4; q += 1) + { + const real_t tmp_qloop_0 = p_affine_0_0 + (-p_affine_0_0 + p_affine_1_0)*_data_q_p_0[q] + (-p_affine_0_0 + p_affine_2_0)*_data_q_p_1[q]; + const real_t tmp_qloop_2 = (tmp_qloop_0*tmp_qloop_0); + const real_t tmp_qloop_3 = p_affine_0_1 + (-p_affine_0_1 + p_affine_1_1)*_data_q_p_0[q] + (-p_affine_0_1 + p_affine_2_1)*_data_q_p_1[q]; + const real_t tmp_qloop_4 = (tmp_qloop_3*tmp_qloop_3); + const real_t tmp_qloop_5 = tmp_qloop_2 + tmp_qloop_4; + const real_t tmp_qloop_6 = pow(tmp_qloop_5, -0.50000000000000000); + const real_t tmp_qloop_13 = tmp_qloop_12*tmp_qloop_6; + const real_t tmp_qloop_14 = tmp_qloop_1*tmp_qloop_13; + const real_t tmp_qloop_15 = pow(tmp_qloop_5, -1.5000000000000000); + const real_t tmp_qloop_16 = radRayVertex + tmp_qloop_11*(tmp_qloop_1*(-rayVertex_0 + tmp_qloop_0) - tmp_qloop_8*(-rayVertex_1 + tmp_qloop_3)); + const real_t tmp_qloop_17 = tmp_qloop_15*tmp_qloop_16; + const real_t tmp_qloop_18 = tmp_qloop_17*1.0; + const real_t tmp_qloop_19 = tmp_qloop_13*tmp_qloop_8; + const real_t tmp_qloop_20 = tmp_qloop_0*tmp_qloop_18; + const real_t tmp_qloop_23 = tmp_qloop_12*tmp_qloop_15; + const real_t tmp_qloop_24 = tmp_qloop_1*tmp_qloop_23; + const real_t tmp_qloop_25 = tmp_qloop_2*tmp_qloop_24; + const real_t tmp_qloop_26 = tmp_qloop_16*pow(tmp_qloop_5, -2.5000000000000000)*3.0; + const real_t tmp_qloop_27 = tmp_qloop_0*tmp_qloop_26*tmp_qloop_4; + const real_t tmp_qloop_28 = tmp_qloop_24*tmp_qloop_4 - tmp_qloop_27; + const real_t tmp_qloop_29 = tmp_qloop_23*tmp_qloop_7; + const real_t tmp_qloop_30 = tmp_qloop_0*tmp_qloop_3; + const real_t tmp_qloop_31 = tmp_qloop_24*tmp_qloop_30; + const real_t tmp_qloop_32 = tmp_qloop_17*2.0; + const real_t tmp_qloop_33 = tmp_qloop_2*tmp_qloop_26*tmp_qloop_3; + const real_t tmp_qloop_34 = tmp_qloop_18*tmp_qloop_3 - tmp_qloop_33; + const real_t tmp_qloop_35 = tmp_qloop_29*tmp_qloop_30; + const real_t tmp_qloop_36 = tmp_qloop_23*tmp_qloop_30*tmp_qloop_8; + const real_t tmp_qloop_47 = 4.0*_data_q_p_0[q]; + const real_t tmp_qloop_48 = 4.0*_data_q_p_1[q]; + const real_t tmp_qloop_49 = tmp_qloop_47 + tmp_qloop_48 - 3.0; + const real_t tmp_qloop_50 = jac_affine_inv_0_0_BLUE*tmp_qloop_49 + jac_affine_inv_1_0_BLUE*tmp_qloop_49; + const real_t tmp_qloop_54 = jac_affine_inv_0_1_BLUE*tmp_qloop_49 + jac_affine_inv_1_1_BLUE*tmp_qloop_49; + const real_t tmp_qloop_69 = tmp_qloop_47*_data_q_p_1[q]; + const real_t tmp_qloop_70 = (_data_q_p_0[q]*_data_q_p_0[q]); + const real_t tmp_qloop_71 = tmp_qloop_70*2.0; + const real_t tmp_qloop_72 = tmp_qloop_71 - _data_q_p_0[q]; + const real_t tmp_qloop_73 = (_data_q_p_1[q]*_data_q_p_1[q]); + const real_t tmp_qloop_74 = tmp_qloop_73*2.0; + const real_t tmp_qloop_75 = tmp_qloop_74 - _data_q_p_1[q]; + const real_t tmp_qloop_76 = tmp_qloop_48 - tmp_qloop_69 + tmp_qloop_73*-4.0; + const real_t tmp_qloop_77 = tmp_qloop_47 - tmp_qloop_69 + tmp_qloop_70*-4.0; + const real_t tmp_qloop_78 = tmp_qloop_69 + tmp_qloop_71 + tmp_qloop_74 - 3.0*_data_q_p_0[q] - 3.0*_data_q_p_1[q] + 1.0; + const real_t tmp_qloop_79 = tmp_qloop_69*wx_dof_3 + tmp_qloop_72*wx_dof_1 + tmp_qloop_75*wx_dof_2 + tmp_qloop_76*wx_dof_4 + tmp_qloop_77*wx_dof_5 + tmp_qloop_78*wx_dof_0; + const real_t tmp_qloop_80 = tmp_qloop_69*wy_dof_3 + tmp_qloop_72*wy_dof_1 + tmp_qloop_75*wy_dof_2 + tmp_qloop_76*wy_dof_4 + tmp_qloop_77*wy_dof_5 + tmp_qloop_78*wy_dof_0; + const real_t tmp_qloop_86 = tmp_qloop_47 - 1.0; + const real_t tmp_qloop_87 = jac_affine_inv_0_0_BLUE*tmp_qloop_86; + const real_t tmp_qloop_88 = jac_affine_inv_0_1_BLUE*tmp_qloop_86; + const real_t tmp_qloop_93 = tmp_qloop_48 - 1.0; + const real_t tmp_qloop_94 = jac_affine_inv_1_0_BLUE*tmp_qloop_93; + const real_t tmp_qloop_95 = jac_affine_inv_1_1_BLUE*tmp_qloop_93; + const real_t tmp_qloop_104 = jac_affine_inv_1_0_BLUE*tmp_qloop_47; + const real_t tmp_qloop_105 = jac_affine_inv_0_0_BLUE*tmp_qloop_48; + const real_t tmp_qloop_106 = tmp_qloop_104 + tmp_qloop_105; + const real_t tmp_qloop_107 = jac_affine_inv_1_1_BLUE*tmp_qloop_47; + const real_t tmp_qloop_108 = jac_affine_inv_0_1_BLUE*tmp_qloop_48; + const real_t tmp_qloop_109 = tmp_qloop_107 + tmp_qloop_108; + const real_t tmp_qloop_119 = -tmp_qloop_47 - 8.0*_data_q_p_1[q] + 4.0; + const real_t tmp_qloop_120 = jac_affine_inv_1_0_BLUE*tmp_qloop_119 - tmp_qloop_105; + const real_t tmp_qloop_121 = jac_affine_inv_1_1_BLUE*tmp_qloop_119 - tmp_qloop_108; + const real_t tmp_qloop_129 = -tmp_qloop_48 - 8.0*_data_q_p_0[q] + 4.0; + const real_t tmp_qloop_130 = jac_affine_inv_0_0_BLUE*tmp_qloop_129 - tmp_qloop_104; + const real_t tmp_qloop_131 = jac_affine_inv_0_1_BLUE*tmp_qloop_129 - tmp_qloop_107; + const real_t jac_blending_0_0 = tmp_qloop_0*tmp_qloop_14 + tmp_qloop_18*tmp_qloop_4; + const real_t jac_blending_0_1 = -tmp_qloop_0*tmp_qloop_15*tmp_qloop_16*tmp_qloop_3 - tmp_qloop_0*tmp_qloop_19; + const real_t jac_blending_1_0 = tmp_qloop_14*tmp_qloop_3 - tmp_qloop_20*tmp_qloop_3; + const real_t jac_blending_1_1 = tmp_qloop_15*tmp_qloop_16*tmp_qloop_2*1.0 - tmp_qloop_19*tmp_qloop_3; + const real_t tmp_qloop_21 = jac_blending_0_0*jac_blending_1_1 - jac_blending_0_1*jac_blending_1_0; + const real_t tmp_qloop_22 = 1.0 / (tmp_qloop_21); + const real_t abs_det_jac_blending = tmp_qloop_21; + const real_t tmp_qloop_81 = abs_det_jac_affine_BLUE*abs_det_jac_blending*(diffusivity_times_delta_dof_0*tmp_qloop_78 + diffusivity_times_delta_dof_1*tmp_qloop_72 + diffusivity_times_delta_dof_2*tmp_qloop_75 + diffusivity_times_delta_dof_3*tmp_qloop_69 + diffusivity_times_delta_dof_4*tmp_qloop_76 + diffusivity_times_delta_dof_5*tmp_qloop_77)*_data_q_w[q]; + const real_t jac_blending_inv_0_0 = jac_blending_1_1*tmp_qloop_22; + const real_t jac_blending_inv_0_1 = -jac_blending_0_1*tmp_qloop_22; + const real_t jac_blending_inv_1_0 = -jac_blending_1_0*tmp_qloop_22; + const real_t jac_blending_inv_1_1 = jac_blending_0_0*tmp_qloop_22; + const real_t tmp_qloop_82 = tmp_qloop_81*(tmp_qloop_79*(jac_blending_inv_0_0*tmp_qloop_50 + jac_blending_inv_1_0*tmp_qloop_54) + tmp_qloop_80*(jac_blending_inv_0_1*tmp_qloop_50 + jac_blending_inv_1_1*tmp_qloop_54)); + const real_t tmp_qloop_133 = tmp_qloop_81*(tmp_qloop_79*(jac_blending_inv_0_0*tmp_qloop_87 + jac_blending_inv_1_0*tmp_qloop_88) + tmp_qloop_80*(jac_blending_inv_0_1*tmp_qloop_87 + jac_blending_inv_1_1*tmp_qloop_88)); + const real_t tmp_qloop_134 = tmp_qloop_81*(tmp_qloop_79*(jac_blending_inv_0_0*tmp_qloop_94 + jac_blending_inv_1_0*tmp_qloop_95) + tmp_qloop_80*(jac_blending_inv_0_1*tmp_qloop_94 + jac_blending_inv_1_1*tmp_qloop_95)); + const real_t tmp_qloop_135 = tmp_qloop_81*(tmp_qloop_79*(jac_blending_inv_0_0*tmp_qloop_106 + jac_blending_inv_1_0*tmp_qloop_109) + tmp_qloop_80*(jac_blending_inv_0_1*tmp_qloop_106 + jac_blending_inv_1_1*tmp_qloop_109)); + const real_t tmp_qloop_136 = tmp_qloop_81*(tmp_qloop_79*(jac_blending_inv_0_0*tmp_qloop_120 + jac_blending_inv_1_0*tmp_qloop_121) + tmp_qloop_80*(jac_blending_inv_0_1*tmp_qloop_120 + jac_blending_inv_1_1*tmp_qloop_121)); + const real_t tmp_qloop_137 = tmp_qloop_81*(tmp_qloop_79*(jac_blending_inv_0_0*tmp_qloop_130 + jac_blending_inv_1_0*tmp_qloop_131) + tmp_qloop_80*(jac_blending_inv_0_1*tmp_qloop_130 + jac_blending_inv_1_1*tmp_qloop_131)); + const real_t hessian_blending_0_0_0 = tmp_qloop_14 - tmp_qloop_25 + tmp_qloop_28; + const real_t hessian_blending_1_0_0 = -tmp_qloop_26*(tmp_qloop_3*tmp_qloop_3*tmp_qloop_3) + tmp_qloop_29*tmp_qloop_4 + tmp_qloop_3*tmp_qloop_32 - tmp_qloop_31; + const real_t hessian_blending_0_0_1 = tmp_qloop_1*tmp_qloop_11*tmp_qloop_15*tmp_qloop_30*-2.0 - tmp_qloop_34; + const real_t hessian_blending_1_0_1 = tmp_qloop_1*tmp_qloop_10*tmp_qloop_6*tmp_qloop_9*1.0 - tmp_qloop_20 - tmp_qloop_28 - tmp_qloop_35; + const real_t hessian_blending_0_1_0 = tmp_qloop_10*tmp_qloop_15*tmp_qloop_2*tmp_qloop_8*tmp_qloop_9*1.0 - tmp_qloop_19 - tmp_qloop_31 - tmp_qloop_34; + const real_t tmp_qloop_51 = -hessian_blending_0_0_0*jac_blending_inv_0_0 - hessian_blending_0_1_0*jac_blending_inv_1_0; + const real_t tmp_qloop_60 = -hessian_blending_0_0_0*jac_blending_inv_0_1 - hessian_blending_0_1_0*jac_blending_inv_1_1; + const real_t hessian_blending_1_1_0 = -tmp_qloop_20 + tmp_qloop_27 - tmp_qloop_35 + tmp_qloop_36; + const real_t tmp_qloop_56 = -hessian_blending_1_0_0*jac_blending_inv_0_0 - hessian_blending_1_1_0*jac_blending_inv_1_0; + const real_t tmp_qloop_64 = -hessian_blending_1_0_0*jac_blending_inv_0_1 - hessian_blending_1_1_0*jac_blending_inv_1_1; + const real_t hessian_blending_0_1_1 = tmp_qloop_0*tmp_qloop_32 - (tmp_qloop_0*tmp_qloop_0*tmp_qloop_0)*tmp_qloop_26 + tmp_qloop_25 + tmp_qloop_36; + const real_t tmp_qloop_52 = -hessian_blending_0_0_1*jac_blending_inv_0_0 - hessian_blending_0_1_1*jac_blending_inv_1_0; + const real_t tmp_qloop_53 = jac_blending_inv_0_0*tmp_qloop_51 + jac_blending_inv_0_1*tmp_qloop_52; + const real_t tmp_qloop_55 = jac_blending_inv_1_0*tmp_qloop_51 + jac_blending_inv_1_1*tmp_qloop_52; + const real_t tmp_qloop_61 = -hessian_blending_0_0_1*jac_blending_inv_0_1 - hessian_blending_0_1_1*jac_blending_inv_1_1; + const real_t tmp_qloop_62 = jac_blending_inv_0_0*tmp_qloop_60 + jac_blending_inv_0_1*tmp_qloop_61; + const real_t tmp_qloop_63 = jac_blending_inv_1_0*tmp_qloop_60 + jac_blending_inv_1_1*tmp_qloop_61; + const real_t hessian_blending_1_1_1 = tmp_qloop_10*tmp_qloop_15*tmp_qloop_2*tmp_qloop_7*tmp_qloop_9*1.0 + tmp_qloop_10*tmp_qloop_15*tmp_qloop_4*tmp_qloop_8*tmp_qloop_9*1.0 - tmp_qloop_19 - tmp_qloop_33; + const real_t tmp_qloop_57 = -hessian_blending_1_0_1*jac_blending_inv_0_0 - hessian_blending_1_1_1*jac_blending_inv_1_0; + const real_t tmp_qloop_58 = jac_blending_inv_0_0*tmp_qloop_56 + jac_blending_inv_0_1*tmp_qloop_57; + const real_t tmp_qloop_59 = jac_blending_inv_1_0*tmp_qloop_56 + jac_blending_inv_1_1*tmp_qloop_57; + const real_t tmp_qloop_65 = -hessian_blending_1_0_1*jac_blending_inv_0_1 - hessian_blending_1_1_1*jac_blending_inv_1_1; + const real_t tmp_qloop_66 = jac_blending_inv_0_0*tmp_qloop_64 + jac_blending_inv_0_1*tmp_qloop_65; + const real_t tmp_qloop_67 = jac_blending_inv_1_0*tmp_qloop_64 + jac_blending_inv_1_1*tmp_qloop_65; + const real_t tmp_qloop_68 = jac_blending_inv_0_0*(jac_blending_inv_0_0*tmp_moved_constant_3 + jac_blending_inv_1_0*tmp_moved_constant_7) + jac_blending_inv_0_0*(tmp_qloop_50*tmp_qloop_53 + tmp_qloop_54*tmp_qloop_55) + jac_blending_inv_0_1*(jac_blending_inv_0_1*tmp_moved_constant_3 + jac_blending_inv_1_1*tmp_moved_constant_7) + jac_blending_inv_0_1*(tmp_qloop_50*tmp_qloop_58 + tmp_qloop_54*tmp_qloop_59) + jac_blending_inv_1_0*(jac_blending_inv_0_0*tmp_moved_constant_8 + jac_blending_inv_1_0*tmp_moved_constant_9) + jac_blending_inv_1_0*(tmp_qloop_50*tmp_qloop_62 + tmp_qloop_54*tmp_qloop_63) + jac_blending_inv_1_1*(jac_blending_inv_0_1*tmp_moved_constant_8 + jac_blending_inv_1_1*tmp_moved_constant_9) + jac_blending_inv_1_1*(tmp_qloop_50*tmp_qloop_66 + tmp_qloop_54*tmp_qloop_67); + const real_t tmp_qloop_89 = jac_blending_inv_0_0*(jac_blending_inv_0_0*tmp_moved_constant_11 + jac_blending_inv_1_0*tmp_moved_constant_10) + jac_blending_inv_0_0*(tmp_qloop_53*tmp_qloop_87 + tmp_qloop_55*tmp_qloop_88) + jac_blending_inv_0_1*(jac_blending_inv_0_1*tmp_moved_constant_11 + jac_blending_inv_1_1*tmp_moved_constant_10) + jac_blending_inv_0_1*(tmp_qloop_58*tmp_qloop_87 + tmp_qloop_59*tmp_qloop_88) + jac_blending_inv_1_0*(jac_blending_inv_0_0*tmp_moved_constant_10 + jac_blending_inv_1_0*tmp_moved_constant_12) + jac_blending_inv_1_0*(tmp_qloop_62*tmp_qloop_87 + tmp_qloop_63*tmp_qloop_88) + jac_blending_inv_1_1*(jac_blending_inv_0_1*tmp_moved_constant_10 + jac_blending_inv_1_1*tmp_moved_constant_12) + jac_blending_inv_1_1*(tmp_qloop_66*tmp_qloop_87 + tmp_qloop_67*tmp_qloop_88); + const real_t tmp_qloop_96 = jac_blending_inv_0_0*(jac_blending_inv_0_0*tmp_moved_constant_14 + jac_blending_inv_1_0*tmp_moved_constant_13) + jac_blending_inv_0_0*(tmp_qloop_53*tmp_qloop_94 + tmp_qloop_55*tmp_qloop_95) + jac_blending_inv_0_1*(jac_blending_inv_0_1*tmp_moved_constant_14 + jac_blending_inv_1_1*tmp_moved_constant_13) + jac_blending_inv_0_1*(tmp_qloop_58*tmp_qloop_94 + tmp_qloop_59*tmp_qloop_95) + jac_blending_inv_1_0*(jac_blending_inv_0_0*tmp_moved_constant_13 + jac_blending_inv_1_0*tmp_moved_constant_15) + jac_blending_inv_1_0*(tmp_qloop_62*tmp_qloop_94 + tmp_qloop_63*tmp_qloop_95) + jac_blending_inv_1_1*(jac_blending_inv_0_1*tmp_moved_constant_13 + jac_blending_inv_1_1*tmp_moved_constant_15) + jac_blending_inv_1_1*(tmp_qloop_66*tmp_qloop_94 + tmp_qloop_67*tmp_qloop_95); + const real_t tmp_qloop_110 = jac_blending_inv_0_0*(jac_blending_inv_0_0*tmp_moved_constant_17 + jac_blending_inv_1_0*tmp_moved_constant_20) + jac_blending_inv_0_0*(tmp_qloop_106*tmp_qloop_53 + tmp_qloop_109*tmp_qloop_55) + jac_blending_inv_0_1*(jac_blending_inv_0_1*tmp_moved_constant_17 + jac_blending_inv_1_1*tmp_moved_constant_20) + jac_blending_inv_0_1*(tmp_qloop_106*tmp_qloop_58 + tmp_qloop_109*tmp_qloop_59) + jac_blending_inv_1_0*(jac_blending_inv_0_0*tmp_moved_constant_20 + jac_blending_inv_1_0*tmp_moved_constant_22) + jac_blending_inv_1_0*(tmp_qloop_106*tmp_qloop_62 + tmp_qloop_109*tmp_qloop_63) + jac_blending_inv_1_1*(jac_blending_inv_0_1*tmp_moved_constant_20 + jac_blending_inv_1_1*tmp_moved_constant_22) + jac_blending_inv_1_1*(tmp_qloop_106*tmp_qloop_66 + tmp_qloop_109*tmp_qloop_67); + const real_t tmp_qloop_122 = jac_blending_inv_0_0*(jac_blending_inv_0_0*tmp_moved_constant_25 + jac_blending_inv_1_0*tmp_moved_constant_27) + jac_blending_inv_0_0*(tmp_qloop_120*tmp_qloop_53 + tmp_qloop_121*tmp_qloop_55) + jac_blending_inv_0_1*(jac_blending_inv_0_1*tmp_moved_constant_25 + jac_blending_inv_1_1*tmp_moved_constant_27) + jac_blending_inv_0_1*(tmp_qloop_120*tmp_qloop_58 + tmp_qloop_121*tmp_qloop_59) + jac_blending_inv_1_0*(jac_blending_inv_0_0*tmp_moved_constant_28 + jac_blending_inv_1_0*tmp_moved_constant_30) + jac_blending_inv_1_0*(tmp_qloop_120*tmp_qloop_62 + tmp_qloop_121*tmp_qloop_63) + jac_blending_inv_1_1*(jac_blending_inv_0_1*tmp_moved_constant_28 + jac_blending_inv_1_1*tmp_moved_constant_30) + jac_blending_inv_1_1*(tmp_qloop_120*tmp_qloop_66 + tmp_qloop_121*tmp_qloop_67); + const real_t tmp_qloop_132 = jac_blending_inv_0_0*(jac_blending_inv_0_0*tmp_moved_constant_32 + jac_blending_inv_1_0*tmp_moved_constant_34) + jac_blending_inv_0_0*(tmp_qloop_130*tmp_qloop_53 + tmp_qloop_131*tmp_qloop_55) + jac_blending_inv_0_1*(jac_blending_inv_0_1*tmp_moved_constant_32 + jac_blending_inv_1_1*tmp_moved_constant_34) + jac_blending_inv_0_1*(tmp_qloop_130*tmp_qloop_58 + tmp_qloop_131*tmp_qloop_59) + jac_blending_inv_1_0*(jac_blending_inv_0_0*tmp_moved_constant_35 + jac_blending_inv_1_0*tmp_moved_constant_36) + jac_blending_inv_1_0*(tmp_qloop_130*tmp_qloop_62 + tmp_qloop_131*tmp_qloop_63) + jac_blending_inv_1_1*(jac_blending_inv_0_1*tmp_moved_constant_35 + jac_blending_inv_1_1*tmp_moved_constant_36) + jac_blending_inv_1_1*(tmp_qloop_130*tmp_qloop_66 + tmp_qloop_131*tmp_qloop_67); + const real_t q_tmp_0_0 = tmp_qloop_68*tmp_qloop_82; + const real_t q_tmp_0_1 = tmp_qloop_82*tmp_qloop_89; + const real_t q_tmp_0_2 = tmp_qloop_82*tmp_qloop_96; + const real_t q_tmp_0_3 = tmp_qloop_110*tmp_qloop_82; + const real_t q_tmp_0_4 = tmp_qloop_122*tmp_qloop_82; + const real_t q_tmp_0_5 = tmp_qloop_132*tmp_qloop_82; + const real_t q_tmp_1_0 = tmp_qloop_133*tmp_qloop_68; + const real_t q_tmp_1_1 = tmp_qloop_133*tmp_qloop_89; + const real_t q_tmp_1_2 = tmp_qloop_133*tmp_qloop_96; + const real_t q_tmp_1_3 = tmp_qloop_110*tmp_qloop_133; + const real_t q_tmp_1_4 = tmp_qloop_122*tmp_qloop_133; + const real_t q_tmp_1_5 = tmp_qloop_132*tmp_qloop_133; + const real_t q_tmp_2_0 = tmp_qloop_134*tmp_qloop_68; + const real_t q_tmp_2_1 = tmp_qloop_134*tmp_qloop_89; + const real_t q_tmp_2_2 = tmp_qloop_134*tmp_qloop_96; + const real_t q_tmp_2_3 = tmp_qloop_110*tmp_qloop_134; + const real_t q_tmp_2_4 = tmp_qloop_122*tmp_qloop_134; + const real_t q_tmp_2_5 = tmp_qloop_132*tmp_qloop_134; + const real_t q_tmp_3_0 = tmp_qloop_135*tmp_qloop_68; + const real_t q_tmp_3_1 = tmp_qloop_135*tmp_qloop_89; + const real_t q_tmp_3_2 = tmp_qloop_135*tmp_qloop_96; + const real_t q_tmp_3_3 = tmp_qloop_110*tmp_qloop_135; + const real_t q_tmp_3_4 = tmp_qloop_122*tmp_qloop_135; + const real_t q_tmp_3_5 = tmp_qloop_132*tmp_qloop_135; + const real_t q_tmp_4_0 = tmp_qloop_136*tmp_qloop_68; + const real_t q_tmp_4_1 = tmp_qloop_136*tmp_qloop_89; + const real_t q_tmp_4_2 = tmp_qloop_136*tmp_qloop_96; + const real_t q_tmp_4_3 = tmp_qloop_110*tmp_qloop_136; + const real_t q_tmp_4_4 = tmp_qloop_122*tmp_qloop_136; + const real_t q_tmp_4_5 = tmp_qloop_132*tmp_qloop_136; + const real_t q_tmp_5_0 = tmp_qloop_137*tmp_qloop_68; + const real_t q_tmp_5_1 = tmp_qloop_137*tmp_qloop_89; + const real_t q_tmp_5_2 = tmp_qloop_137*tmp_qloop_96; + const real_t q_tmp_5_3 = tmp_qloop_110*tmp_qloop_137; + const real_t q_tmp_5_4 = tmp_qloop_122*tmp_qloop_137; + const real_t q_tmp_5_5 = tmp_qloop_132*tmp_qloop_137; + q_acc_0_0 = q_acc_0_0 + q_tmp_0_0; + q_acc_0_1 = q_acc_0_1 + q_tmp_0_1; + q_acc_0_2 = q_acc_0_2 + q_tmp_0_2; + q_acc_0_3 = q_acc_0_3 + q_tmp_0_3; + q_acc_0_4 = q_acc_0_4 + q_tmp_0_4; + q_acc_0_5 = q_acc_0_5 + q_tmp_0_5; + q_acc_1_0 = q_acc_1_0 + q_tmp_1_0; + q_acc_1_1 = q_acc_1_1 + q_tmp_1_1; + q_acc_1_2 = q_acc_1_2 + q_tmp_1_2; + q_acc_1_3 = q_acc_1_3 + q_tmp_1_3; + q_acc_1_4 = q_acc_1_4 + q_tmp_1_4; + q_acc_1_5 = q_acc_1_5 + q_tmp_1_5; + q_acc_2_0 = q_acc_2_0 + q_tmp_2_0; + q_acc_2_1 = q_acc_2_1 + q_tmp_2_1; + q_acc_2_2 = q_acc_2_2 + q_tmp_2_2; + q_acc_2_3 = q_acc_2_3 + q_tmp_2_3; + q_acc_2_4 = q_acc_2_4 + q_tmp_2_4; + q_acc_2_5 = q_acc_2_5 + q_tmp_2_5; + q_acc_3_0 = q_acc_3_0 + q_tmp_3_0; + q_acc_3_1 = q_acc_3_1 + q_tmp_3_1; + q_acc_3_2 = q_acc_3_2 + q_tmp_3_2; + q_acc_3_3 = q_acc_3_3 + q_tmp_3_3; + q_acc_3_4 = q_acc_3_4 + q_tmp_3_4; + q_acc_3_5 = q_acc_3_5 + q_tmp_3_5; + q_acc_4_0 = q_acc_4_0 + q_tmp_4_0; + q_acc_4_1 = q_acc_4_1 + q_tmp_4_1; + q_acc_4_2 = q_acc_4_2 + q_tmp_4_2; + q_acc_4_3 = q_acc_4_3 + q_tmp_4_3; + q_acc_4_4 = q_acc_4_4 + q_tmp_4_4; + q_acc_4_5 = q_acc_4_5 + q_tmp_4_5; + q_acc_5_0 = q_acc_5_0 + q_tmp_5_0; + q_acc_5_1 = q_acc_5_1 + q_tmp_5_1; + q_acc_5_2 = q_acc_5_2 + q_tmp_5_2; + q_acc_5_3 = q_acc_5_3 + q_tmp_5_3; + q_acc_5_4 = q_acc_5_4 + q_tmp_5_4; + q_acc_5_5 = q_acc_5_5 + q_tmp_5_5; + } + const real_t elMat_0_0 = q_acc_0_0; + const real_t elMat_0_1 = q_acc_0_1; + const real_t elMat_0_2 = q_acc_0_2; + const real_t elMat_0_3 = q_acc_0_3; + const real_t elMat_0_4 = q_acc_0_4; + const real_t elMat_0_5 = q_acc_0_5; + const real_t elMat_1_0 = q_acc_1_0; + const real_t elMat_1_1 = q_acc_1_1; + const real_t elMat_1_2 = q_acc_1_2; + const real_t elMat_1_3 = q_acc_1_3; + const real_t elMat_1_4 = q_acc_1_4; + const real_t elMat_1_5 = q_acc_1_5; + const real_t elMat_2_0 = q_acc_2_0; + const real_t elMat_2_1 = q_acc_2_1; + const real_t elMat_2_2 = q_acc_2_2; + const real_t elMat_2_3 = q_acc_2_3; + const real_t elMat_2_4 = q_acc_2_4; + const real_t elMat_2_5 = q_acc_2_5; + const real_t elMat_3_0 = q_acc_3_0; + const real_t elMat_3_1 = q_acc_3_1; + const real_t elMat_3_2 = q_acc_3_2; + const real_t elMat_3_3 = q_acc_3_3; + const real_t elMat_3_4 = q_acc_3_4; + const real_t elMat_3_5 = q_acc_3_5; + const real_t elMat_4_0 = q_acc_4_0; + const real_t elMat_4_1 = q_acc_4_1; + const real_t elMat_4_2 = q_acc_4_2; + const real_t elMat_4_3 = q_acc_4_3; + const real_t elMat_4_4 = q_acc_4_4; + const real_t elMat_4_5 = q_acc_4_5; + const real_t elMat_5_0 = q_acc_5_0; + const real_t elMat_5_1 = q_acc_5_1; + const real_t elMat_5_2 = q_acc_5_2; + const real_t elMat_5_3 = q_acc_5_3; + const real_t elMat_5_4 = q_acc_5_4; + const real_t elMat_5_5 = q_acc_5_5; + + std::vector< uint_t > _data_rowIdx( 6 ); + std::vector< uint_t > _data_colIdx( 6 ); + std::vector< real_t > _data_mat( 36 ); + + _data_rowIdx[0] = ((uint64_t)(_data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1])); + _data_rowIdx[1] = ((uint64_t)(_data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))])); + _data_rowIdx[2] = ((uint64_t)(_data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1])); + _data_rowIdx[3] = ((uint64_t)(_data_dstEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))])); + _data_rowIdx[4] = ((uint64_t)(_data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1])); + _data_rowIdx[5] = ((uint64_t)(_data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))])); + _data_colIdx[0] = ((uint64_t)(_data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1])); + _data_colIdx[1] = ((uint64_t)(_data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))])); + _data_colIdx[2] = ((uint64_t)(_data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1])); + _data_colIdx[3] = ((uint64_t)(_data_srcEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))])); + _data_colIdx[4] = ((uint64_t)(_data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1])); + _data_colIdx[5] = ((uint64_t)(_data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))])); + + /* Apply basis transformation */ + + + + _data_mat[0] = ((real_t)(elMat_0_0)); + _data_mat[1] = ((real_t)(elMat_0_1)); + _data_mat[2] = ((real_t)(elMat_0_2)); + _data_mat[3] = ((real_t)(elMat_0_3)); + _data_mat[4] = ((real_t)(elMat_0_4)); + _data_mat[5] = ((real_t)(elMat_0_5)); + _data_mat[6] = ((real_t)(elMat_1_0)); + _data_mat[7] = ((real_t)(elMat_1_1)); + _data_mat[8] = ((real_t)(elMat_1_2)); + _data_mat[9] = ((real_t)(elMat_1_3)); + _data_mat[10] = ((real_t)(elMat_1_4)); + _data_mat[11] = ((real_t)(elMat_1_5)); + _data_mat[12] = ((real_t)(elMat_2_0)); + _data_mat[13] = ((real_t)(elMat_2_1)); + _data_mat[14] = ((real_t)(elMat_2_2)); + _data_mat[15] = ((real_t)(elMat_2_3)); + _data_mat[16] = ((real_t)(elMat_2_4)); + _data_mat[17] = ((real_t)(elMat_2_5)); + _data_mat[18] = ((real_t)(elMat_3_0)); + _data_mat[19] = ((real_t)(elMat_3_1)); + _data_mat[20] = ((real_t)(elMat_3_2)); + _data_mat[21] = ((real_t)(elMat_3_3)); + _data_mat[22] = ((real_t)(elMat_3_4)); + _data_mat[23] = ((real_t)(elMat_3_5)); + _data_mat[24] = ((real_t)(elMat_4_0)); + _data_mat[25] = ((real_t)(elMat_4_1)); + _data_mat[26] = ((real_t)(elMat_4_2)); + _data_mat[27] = ((real_t)(elMat_4_3)); + _data_mat[28] = ((real_t)(elMat_4_4)); + _data_mat[29] = ((real_t)(elMat_4_5)); + _data_mat[30] = ((real_t)(elMat_5_0)); + _data_mat[31] = ((real_t)(elMat_5_1)); + _data_mat[32] = ((real_t)(elMat_5_2)); + _data_mat[33] = ((real_t)(elMat_5_3)); + _data_mat[34] = ((real_t)(elMat_5_4)); + _data_mat[35] = ((real_t)(elMat_5_5)); + + + mat->addValues( _data_rowIdx, _data_colIdx, _data_mat ); + } + } + } +} +} // namespace operatorgeneration + +} // namespace hyteg diff --git a/operators/supg_diffusion/noarch/P2ElementwiseSupgDiffusion_apply_P2ElementwiseSupgDiffusion_macro_2D.cpp b/operators/supg_diffusion/noarch/P2ElementwiseSupgDiffusion_apply_P2ElementwiseSupgDiffusion_macro_2D.cpp new file mode 100644 index 0000000000000000000000000000000000000000..7a8ddb719e52164207837821efd8d0e735bd9f1a --- /dev/null +++ b/operators/supg_diffusion/noarch/P2ElementwiseSupgDiffusion_apply_P2ElementwiseSupgDiffusion_macro_2D.cpp @@ -0,0 +1,552 @@ +/* +* Copyright (c) 2017-2024 Nils Kohl, Daniel Bauer, Fabian Böhm. +* +* This file is part of HyTeG +* (see https://i10git.cs.fau.de/hyteg/hyteg). +* +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. +* +* You should have received a copy of the GNU General Public License +* along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +/* +* The entire file was generated with the HyTeG Operator Generator. +* +* Avoid modifying this file. If buggy, consider fixing the generator itself. +*/ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +#include "../P2ElementwiseSupgDiffusion.hpp" + +#define FUNC_PREFIX + +namespace hyteg { + +namespace operatorgeneration { + +void P2ElementwiseSupgDiffusion::apply_P2ElementwiseSupgDiffusion_macro_2D( real_t * RESTRICT _data_diffusivity_times_deltaEdge, real_t * RESTRICT _data_diffusivity_times_deltaVertex, real_t * RESTRICT _data_dstEdge, real_t * RESTRICT _data_dstVertex, real_t * RESTRICT _data_srcEdge, real_t * RESTRICT _data_srcVertex, real_t * RESTRICT _data_wxEdge, real_t * RESTRICT _data_wxVertex, real_t * RESTRICT _data_wyEdge, real_t * RESTRICT _data_wyVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +{ + { + const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669}; + + const real_t _data_q_p_0 [] = {0.33333333333333331, 0.20000000000000001, 0.59999999999999998, 0.20000000000000001}; + + const real_t _data_q_p_1 [] = {0.33333333333333331, 0.59999999999999998, 0.20000000000000001, 0.20000000000000001}; + + const real_t tmp_coords_jac_0_GRAY = 1.0 / (micro_edges_per_macro_edge_float)*1.0; + const real_t p_affine_const_0_0_GRAY = macro_vertex_coord_id_0comp0; + const real_t p_affine_const_0_1_GRAY = macro_vertex_coord_id_0comp1; + const real_t p_affine_const_1_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0); + const real_t p_affine_const_1_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1); + const real_t p_affine_const_2_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0); + const real_t p_affine_const_2_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1); + const real_t jac_affine_0_0_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_1_0_GRAY; + const real_t jac_affine_0_1_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_2_0_GRAY; + const real_t jac_affine_1_0_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_1_1_GRAY; + const real_t jac_affine_1_1_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_2_1_GRAY; + const real_t tmp_coords_jac_1_GRAY = jac_affine_0_0_GRAY*jac_affine_1_1_GRAY - jac_affine_0_1_GRAY*jac_affine_1_0_GRAY; + const real_t tmp_coords_jac_2_GRAY = 1.0 / (tmp_coords_jac_1_GRAY); + const real_t jac_affine_inv_0_0_GRAY = jac_affine_1_1_GRAY*tmp_coords_jac_2_GRAY; + const real_t jac_affine_inv_0_1_GRAY = -jac_affine_0_1_GRAY*tmp_coords_jac_2_GRAY; + const real_t jac_affine_inv_1_0_GRAY = -jac_affine_1_0_GRAY*tmp_coords_jac_2_GRAY; + const real_t jac_affine_inv_1_1_GRAY = jac_affine_0_0_GRAY*tmp_coords_jac_2_GRAY; + const real_t abs_det_jac_affine_GRAY = abs(tmp_coords_jac_1_GRAY); + const real_t tmp_qloop_0 = jac_affine_inv_0_0_GRAY*4.0; + const real_t tmp_qloop_1 = jac_affine_inv_1_0_GRAY*4.0; + const real_t tmp_qloop_2 = tmp_qloop_0 + tmp_qloop_1; + const real_t tmp_qloop_3 = jac_affine_inv_0_1_GRAY*4.0; + const real_t tmp_qloop_4 = jac_affine_inv_1_1_GRAY*4.0; + const real_t tmp_qloop_5 = tmp_qloop_3 + tmp_qloop_4; + const real_t tmp_qloop_6 = jac_affine_inv_0_0_GRAY*tmp_qloop_2 + jac_affine_inv_0_1_GRAY*tmp_qloop_5 + jac_affine_inv_1_0_GRAY*tmp_qloop_2 + jac_affine_inv_1_1_GRAY*tmp_qloop_5; + const real_t tmp_qloop_24 = (jac_affine_inv_0_0_GRAY*jac_affine_inv_0_0_GRAY)*4.0 + (jac_affine_inv_0_1_GRAY*jac_affine_inv_0_1_GRAY)*4.0; + const real_t tmp_qloop_25 = (jac_affine_inv_1_0_GRAY*jac_affine_inv_1_0_GRAY)*4.0 + (jac_affine_inv_1_1_GRAY*jac_affine_inv_1_1_GRAY)*4.0; + const real_t tmp_qloop_26 = jac_affine_inv_1_0_GRAY*8.0; + const real_t tmp_qloop_27 = jac_affine_inv_1_1_GRAY*8.0; + const real_t tmp_qloop_28 = jac_affine_inv_0_0_GRAY*tmp_qloop_26 + jac_affine_inv_0_1_GRAY*tmp_qloop_27; + const real_t tmp_qloop_29 = jac_affine_inv_1_0_GRAY*tmp_qloop_0 + jac_affine_inv_1_1_GRAY*tmp_qloop_3; + const real_t tmp_qloop_30 = jac_affine_inv_1_0_GRAY*(-tmp_qloop_0 - tmp_qloop_26) + jac_affine_inv_1_1_GRAY*(-tmp_qloop_27 - tmp_qloop_3) - tmp_qloop_29; + const real_t tmp_qloop_31 = jac_affine_inv_0_0_GRAY*(jac_affine_inv_0_0_GRAY*-8.0 - tmp_qloop_1) + jac_affine_inv_0_1_GRAY*(jac_affine_inv_0_1_GRAY*-8.0 - tmp_qloop_4) - tmp_qloop_29; + { + /* FaceType.GRAY */ + for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1) + for (int64_t ctr_0 = 0; ctr_0 < -ctr_1 + micro_edges_per_macro_edge; ctr_0 += 1) + { + + const int64_t phantom_ctr_0 = ctr_0; + real_t _data_float_loop_ctr_array_dim_0[4]; + _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0; + _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1; + _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2; + _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3; + real_t _data_float_loop_ctr_array_dim_1[4]; + _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1; + + const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; + const real_t src_dof_0 = _data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]; + const real_t src_dof_1 = _data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + const real_t src_dof_2 = _data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t src_dof_3 = _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t src_dof_4 = _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t src_dof_5 = _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]; + const real_t diffusivity_times_delta_dof_0 = _data_diffusivity_times_deltaVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]; + const real_t diffusivity_times_delta_dof_1 = _data_diffusivity_times_deltaVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + const real_t diffusivity_times_delta_dof_2 = _data_diffusivity_times_deltaVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t diffusivity_times_delta_dof_3 = _data_diffusivity_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t diffusivity_times_delta_dof_4 = _data_diffusivity_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t diffusivity_times_delta_dof_5 = _data_diffusivity_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]; + const real_t wx_dof_0 = _data_wxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]; + const real_t wx_dof_1 = _data_wxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + const real_t wx_dof_2 = _data_wxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t wx_dof_3 = _data_wxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t wx_dof_4 = _data_wxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t wx_dof_5 = _data_wxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]; + const real_t wy_dof_0 = _data_wyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]; + const real_t wy_dof_1 = _data_wyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + const real_t wy_dof_2 = _data_wyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t wy_dof_3 = _data_wyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t wy_dof_4 = _data_wyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t wy_dof_5 = _data_wyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]; + real_t q_acc_0_0 = 0.0; + real_t q_acc_0_1 = 0.0; + real_t q_acc_0_2 = 0.0; + real_t q_acc_0_3 = 0.0; + real_t q_acc_0_4 = 0.0; + real_t q_acc_0_5 = 0.0; + real_t q_acc_1_0 = 0.0; + real_t q_acc_1_1 = 0.0; + real_t q_acc_1_2 = 0.0; + real_t q_acc_1_3 = 0.0; + real_t q_acc_1_4 = 0.0; + real_t q_acc_1_5 = 0.0; + real_t q_acc_2_0 = 0.0; + real_t q_acc_2_1 = 0.0; + real_t q_acc_2_2 = 0.0; + real_t q_acc_2_3 = 0.0; + real_t q_acc_2_4 = 0.0; + real_t q_acc_2_5 = 0.0; + real_t q_acc_3_0 = 0.0; + real_t q_acc_3_1 = 0.0; + real_t q_acc_3_2 = 0.0; + real_t q_acc_3_3 = 0.0; + real_t q_acc_3_4 = 0.0; + real_t q_acc_3_5 = 0.0; + real_t q_acc_4_0 = 0.0; + real_t q_acc_4_1 = 0.0; + real_t q_acc_4_2 = 0.0; + real_t q_acc_4_3 = 0.0; + real_t q_acc_4_4 = 0.0; + real_t q_acc_4_5 = 0.0; + real_t q_acc_5_0 = 0.0; + real_t q_acc_5_1 = 0.0; + real_t q_acc_5_2 = 0.0; + real_t q_acc_5_3 = 0.0; + real_t q_acc_5_4 = 0.0; + real_t q_acc_5_5 = 0.0; + for (int64_t q = 0; q < 4; q += 1) + { + const real_t tmp_qloop_7 = 4.0*_data_q_p_0[q]; + const real_t tmp_qloop_8 = 4.0*_data_q_p_1[q]; + const real_t tmp_qloop_9 = tmp_qloop_7 + tmp_qloop_8 - 3.0; + const real_t tmp_qloop_10 = tmp_qloop_7*_data_q_p_1[q]; + const real_t tmp_qloop_11 = (_data_q_p_0[q]*_data_q_p_0[q]); + const real_t tmp_qloop_12 = tmp_qloop_11*2.0; + const real_t tmp_qloop_13 = tmp_qloop_12 - _data_q_p_0[q]; + const real_t tmp_qloop_14 = (_data_q_p_1[q]*_data_q_p_1[q]); + const real_t tmp_qloop_15 = tmp_qloop_14*2.0; + const real_t tmp_qloop_16 = tmp_qloop_15 - _data_q_p_1[q]; + const real_t tmp_qloop_17 = -tmp_qloop_10 + tmp_qloop_14*-4.0 + tmp_qloop_8; + const real_t tmp_qloop_18 = -tmp_qloop_10 + tmp_qloop_11*-4.0 + tmp_qloop_7; + const real_t tmp_qloop_19 = tmp_qloop_10 + tmp_qloop_12 + tmp_qloop_15 - 3.0*_data_q_p_0[q] - 3.0*_data_q_p_1[q] + 1.0; + const real_t tmp_qloop_20 = tmp_qloop_10*wx_dof_3 + tmp_qloop_13*wx_dof_1 + tmp_qloop_16*wx_dof_2 + tmp_qloop_17*wx_dof_4 + tmp_qloop_18*wx_dof_5 + tmp_qloop_19*wx_dof_0; + const real_t tmp_qloop_21 = tmp_qloop_10*wy_dof_3 + tmp_qloop_13*wy_dof_1 + tmp_qloop_16*wy_dof_2 + tmp_qloop_17*wy_dof_4 + tmp_qloop_18*wy_dof_5 + tmp_qloop_19*wy_dof_0; + const real_t tmp_qloop_22 = abs_det_jac_affine_GRAY*(diffusivity_times_delta_dof_0*tmp_qloop_19 + diffusivity_times_delta_dof_1*tmp_qloop_13 + diffusivity_times_delta_dof_2*tmp_qloop_16 + diffusivity_times_delta_dof_3*tmp_qloop_10 + diffusivity_times_delta_dof_4*tmp_qloop_17 + diffusivity_times_delta_dof_5*tmp_qloop_18)*_data_q_w[q]; + const real_t tmp_qloop_23 = tmp_qloop_22*(tmp_qloop_20*(jac_affine_inv_0_0_GRAY*tmp_qloop_9 + jac_affine_inv_1_0_GRAY*tmp_qloop_9) + tmp_qloop_21*(jac_affine_inv_0_1_GRAY*tmp_qloop_9 + jac_affine_inv_1_1_GRAY*tmp_qloop_9)); + const real_t tmp_qloop_32 = tmp_qloop_7 - 1.0; + const real_t tmp_qloop_33 = tmp_qloop_22*(jac_affine_inv_0_0_GRAY*tmp_qloop_20*tmp_qloop_32 + jac_affine_inv_0_1_GRAY*tmp_qloop_21*tmp_qloop_32); + const real_t tmp_qloop_34 = tmp_qloop_8 - 1.0; + const real_t tmp_qloop_35 = tmp_qloop_22*(jac_affine_inv_1_0_GRAY*tmp_qloop_20*tmp_qloop_34 + jac_affine_inv_1_1_GRAY*tmp_qloop_21*tmp_qloop_34); + const real_t tmp_qloop_36 = jac_affine_inv_1_0_GRAY*tmp_qloop_7; + const real_t tmp_qloop_37 = jac_affine_inv_0_0_GRAY*tmp_qloop_8; + const real_t tmp_qloop_38 = jac_affine_inv_1_1_GRAY*tmp_qloop_7; + const real_t tmp_qloop_39 = jac_affine_inv_0_1_GRAY*tmp_qloop_8; + const real_t tmp_qloop_40 = tmp_qloop_22*(tmp_qloop_20*(tmp_qloop_36 + tmp_qloop_37) + tmp_qloop_21*(tmp_qloop_38 + tmp_qloop_39)); + const real_t tmp_qloop_41 = -tmp_qloop_7 - 8.0*_data_q_p_1[q] + 4.0; + const real_t tmp_qloop_42 = tmp_qloop_22*(tmp_qloop_20*(jac_affine_inv_1_0_GRAY*tmp_qloop_41 - tmp_qloop_37) + tmp_qloop_21*(jac_affine_inv_1_1_GRAY*tmp_qloop_41 - tmp_qloop_39)); + const real_t tmp_qloop_43 = -tmp_qloop_8 - 8.0*_data_q_p_0[q] + 4.0; + const real_t tmp_qloop_44 = tmp_qloop_22*(tmp_qloop_20*(jac_affine_inv_0_0_GRAY*tmp_qloop_43 - tmp_qloop_36) + tmp_qloop_21*(jac_affine_inv_0_1_GRAY*tmp_qloop_43 - tmp_qloop_38)); + const real_t q_tmp_0_0 = tmp_qloop_23*tmp_qloop_6; + const real_t q_tmp_0_1 = tmp_qloop_23*tmp_qloop_24; + const real_t q_tmp_0_2 = tmp_qloop_23*tmp_qloop_25; + const real_t q_tmp_0_3 = tmp_qloop_23*tmp_qloop_28; + const real_t q_tmp_0_4 = tmp_qloop_23*tmp_qloop_30; + const real_t q_tmp_0_5 = tmp_qloop_23*tmp_qloop_31; + const real_t q_tmp_1_0 = tmp_qloop_33*tmp_qloop_6; + const real_t q_tmp_1_1 = tmp_qloop_24*tmp_qloop_33; + const real_t q_tmp_1_2 = tmp_qloop_25*tmp_qloop_33; + const real_t q_tmp_1_3 = tmp_qloop_28*tmp_qloop_33; + const real_t q_tmp_1_4 = tmp_qloop_30*tmp_qloop_33; + const real_t q_tmp_1_5 = tmp_qloop_31*tmp_qloop_33; + const real_t q_tmp_2_0 = tmp_qloop_35*tmp_qloop_6; + const real_t q_tmp_2_1 = tmp_qloop_24*tmp_qloop_35; + const real_t q_tmp_2_2 = tmp_qloop_25*tmp_qloop_35; + const real_t q_tmp_2_3 = tmp_qloop_28*tmp_qloop_35; + const real_t q_tmp_2_4 = tmp_qloop_30*tmp_qloop_35; + const real_t q_tmp_2_5 = tmp_qloop_31*tmp_qloop_35; + const real_t q_tmp_3_0 = tmp_qloop_40*tmp_qloop_6; + const real_t q_tmp_3_1 = tmp_qloop_24*tmp_qloop_40; + const real_t q_tmp_3_2 = tmp_qloop_25*tmp_qloop_40; + const real_t q_tmp_3_3 = tmp_qloop_28*tmp_qloop_40; + const real_t q_tmp_3_4 = tmp_qloop_30*tmp_qloop_40; + const real_t q_tmp_3_5 = tmp_qloop_31*tmp_qloop_40; + const real_t q_tmp_4_0 = tmp_qloop_42*tmp_qloop_6; + const real_t q_tmp_4_1 = tmp_qloop_24*tmp_qloop_42; + const real_t q_tmp_4_2 = tmp_qloop_25*tmp_qloop_42; + const real_t q_tmp_4_3 = tmp_qloop_28*tmp_qloop_42; + const real_t q_tmp_4_4 = tmp_qloop_30*tmp_qloop_42; + const real_t q_tmp_4_5 = tmp_qloop_31*tmp_qloop_42; + const real_t q_tmp_5_0 = tmp_qloop_44*tmp_qloop_6; + const real_t q_tmp_5_1 = tmp_qloop_24*tmp_qloop_44; + const real_t q_tmp_5_2 = tmp_qloop_25*tmp_qloop_44; + const real_t q_tmp_5_3 = tmp_qloop_28*tmp_qloop_44; + const real_t q_tmp_5_4 = tmp_qloop_30*tmp_qloop_44; + const real_t q_tmp_5_5 = tmp_qloop_31*tmp_qloop_44; + q_acc_0_0 = q_acc_0_0 + q_tmp_0_0; + q_acc_0_1 = q_acc_0_1 + q_tmp_0_1; + q_acc_0_2 = q_acc_0_2 + q_tmp_0_2; + q_acc_0_3 = q_acc_0_3 + q_tmp_0_3; + q_acc_0_4 = q_acc_0_4 + q_tmp_0_4; + q_acc_0_5 = q_acc_0_5 + q_tmp_0_5; + q_acc_1_0 = q_acc_1_0 + q_tmp_1_0; + q_acc_1_1 = q_acc_1_1 + q_tmp_1_1; + q_acc_1_2 = q_acc_1_2 + q_tmp_1_2; + q_acc_1_3 = q_acc_1_3 + q_tmp_1_3; + q_acc_1_4 = q_acc_1_4 + q_tmp_1_4; + q_acc_1_5 = q_acc_1_5 + q_tmp_1_5; + q_acc_2_0 = q_acc_2_0 + q_tmp_2_0; + q_acc_2_1 = q_acc_2_1 + q_tmp_2_1; + q_acc_2_2 = q_acc_2_2 + q_tmp_2_2; + q_acc_2_3 = q_acc_2_3 + q_tmp_2_3; + q_acc_2_4 = q_acc_2_4 + q_tmp_2_4; + q_acc_2_5 = q_acc_2_5 + q_tmp_2_5; + q_acc_3_0 = q_acc_3_0 + q_tmp_3_0; + q_acc_3_1 = q_acc_3_1 + q_tmp_3_1; + q_acc_3_2 = q_acc_3_2 + q_tmp_3_2; + q_acc_3_3 = q_acc_3_3 + q_tmp_3_3; + q_acc_3_4 = q_acc_3_4 + q_tmp_3_4; + q_acc_3_5 = q_acc_3_5 + q_tmp_3_5; + q_acc_4_0 = q_acc_4_0 + q_tmp_4_0; + q_acc_4_1 = q_acc_4_1 + q_tmp_4_1; + q_acc_4_2 = q_acc_4_2 + q_tmp_4_2; + q_acc_4_3 = q_acc_4_3 + q_tmp_4_3; + q_acc_4_4 = q_acc_4_4 + q_tmp_4_4; + q_acc_4_5 = q_acc_4_5 + q_tmp_4_5; + q_acc_5_0 = q_acc_5_0 + q_tmp_5_0; + q_acc_5_1 = q_acc_5_1 + q_tmp_5_1; + q_acc_5_2 = q_acc_5_2 + q_tmp_5_2; + q_acc_5_3 = q_acc_5_3 + q_tmp_5_3; + q_acc_5_4 = q_acc_5_4 + q_tmp_5_4; + q_acc_5_5 = q_acc_5_5 + q_tmp_5_5; + } + const real_t elMatVec_0 = q_acc_0_0*src_dof_0 + q_acc_0_1*src_dof_1 + q_acc_0_2*src_dof_2 + q_acc_0_3*src_dof_3 + q_acc_0_4*src_dof_4 + q_acc_0_5*src_dof_5; + const real_t elMatVec_1 = q_acc_1_0*src_dof_0 + q_acc_1_1*src_dof_1 + q_acc_1_2*src_dof_2 + q_acc_1_3*src_dof_3 + q_acc_1_4*src_dof_4 + q_acc_1_5*src_dof_5; + const real_t elMatVec_2 = q_acc_2_0*src_dof_0 + q_acc_2_1*src_dof_1 + q_acc_2_2*src_dof_2 + q_acc_2_3*src_dof_3 + q_acc_2_4*src_dof_4 + q_acc_2_5*src_dof_5; + const real_t elMatVec_3 = q_acc_3_0*src_dof_0 + q_acc_3_1*src_dof_1 + q_acc_3_2*src_dof_2 + q_acc_3_3*src_dof_3 + q_acc_3_4*src_dof_4 + q_acc_3_5*src_dof_5; + const real_t elMatVec_4 = q_acc_4_0*src_dof_0 + q_acc_4_1*src_dof_1 + q_acc_4_2*src_dof_2 + q_acc_4_3*src_dof_3 + q_acc_4_4*src_dof_4 + q_acc_4_5*src_dof_5; + const real_t elMatVec_5 = q_acc_5_0*src_dof_0 + q_acc_5_1*src_dof_1 + q_acc_5_2*src_dof_2 + q_acc_5_3*src_dof_3 + q_acc_5_4*src_dof_4 + q_acc_5_5*src_dof_5; + _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))] = elMatVec_0 + _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]; + _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1] = elMatVec_1 + _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatVec_2 + _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))] = elMatVec_3 + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))] = elMatVec_4 + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))] = elMatVec_5 + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]; + } + } + const real_t tmp_coords_jac_0_BLUE = 1.0 / (micro_edges_per_macro_edge_float)*1.0; + const real_t tmp_coords_jac_1_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0); + const real_t tmp_coords_jac_2_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1); + const real_t tmp_coords_jac_3_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0); + const real_t tmp_coords_jac_4_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1); + const real_t p_affine_const_0_0_BLUE = tmp_coords_jac_1_BLUE; + const real_t p_affine_const_0_1_BLUE = tmp_coords_jac_2_BLUE; + const real_t p_affine_const_1_0_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_3_BLUE; + const real_t p_affine_const_1_1_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_4_BLUE; + const real_t p_affine_const_2_0_BLUE = tmp_coords_jac_1_BLUE + tmp_coords_jac_3_BLUE; + const real_t p_affine_const_2_1_BLUE = tmp_coords_jac_2_BLUE + tmp_coords_jac_4_BLUE; + const real_t jac_affine_0_0_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_1_0_BLUE; + const real_t jac_affine_0_1_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_2_0_BLUE; + const real_t jac_affine_1_0_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_1_1_BLUE; + const real_t jac_affine_1_1_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_2_1_BLUE; + const real_t tmp_coords_jac_5_BLUE = jac_affine_0_0_BLUE*jac_affine_1_1_BLUE - jac_affine_0_1_BLUE*jac_affine_1_0_BLUE; + const real_t tmp_coords_jac_6_BLUE = 1.0 / (tmp_coords_jac_5_BLUE); + const real_t jac_affine_inv_0_0_BLUE = jac_affine_1_1_BLUE*tmp_coords_jac_6_BLUE; + const real_t jac_affine_inv_0_1_BLUE = -jac_affine_0_1_BLUE*tmp_coords_jac_6_BLUE; + const real_t jac_affine_inv_1_0_BLUE = -jac_affine_1_0_BLUE*tmp_coords_jac_6_BLUE; + const real_t jac_affine_inv_1_1_BLUE = jac_affine_0_0_BLUE*tmp_coords_jac_6_BLUE; + const real_t abs_det_jac_affine_BLUE = abs(tmp_coords_jac_5_BLUE); + const real_t tmp_moved_constant_0 = jac_affine_inv_0_0_BLUE*4.0; + const real_t tmp_moved_constant_1 = jac_affine_inv_1_0_BLUE*4.0; + const real_t tmp_moved_constant_2 = tmp_moved_constant_0 + tmp_moved_constant_1; + const real_t tmp_moved_constant_3 = jac_affine_inv_0_1_BLUE*4.0; + const real_t tmp_moved_constant_4 = jac_affine_inv_1_1_BLUE*4.0; + const real_t tmp_moved_constant_5 = tmp_moved_constant_3 + tmp_moved_constant_4; + const real_t tmp_moved_constant_6 = jac_affine_inv_0_0_BLUE*tmp_moved_constant_2 + jac_affine_inv_0_1_BLUE*tmp_moved_constant_5 + jac_affine_inv_1_0_BLUE*tmp_moved_constant_2 + jac_affine_inv_1_1_BLUE*tmp_moved_constant_5; + const real_t tmp_moved_constant_7 = (jac_affine_inv_0_0_BLUE*jac_affine_inv_0_0_BLUE)*4.0 + (jac_affine_inv_0_1_BLUE*jac_affine_inv_0_1_BLUE)*4.0; + const real_t tmp_moved_constant_8 = (jac_affine_inv_1_0_BLUE*jac_affine_inv_1_0_BLUE)*4.0 + (jac_affine_inv_1_1_BLUE*jac_affine_inv_1_1_BLUE)*4.0; + const real_t tmp_moved_constant_9 = jac_affine_inv_1_0_BLUE*8.0; + const real_t tmp_moved_constant_10 = jac_affine_inv_1_1_BLUE*8.0; + const real_t tmp_moved_constant_11 = jac_affine_inv_0_0_BLUE*tmp_moved_constant_9 + jac_affine_inv_0_1_BLUE*tmp_moved_constant_10; + const real_t tmp_moved_constant_12 = jac_affine_inv_1_0_BLUE*tmp_moved_constant_0 + jac_affine_inv_1_1_BLUE*tmp_moved_constant_3; + const real_t tmp_moved_constant_13 = jac_affine_inv_1_0_BLUE*(-tmp_moved_constant_0 - tmp_moved_constant_9) + jac_affine_inv_1_1_BLUE*(-tmp_moved_constant_10 - tmp_moved_constant_3) - tmp_moved_constant_12; + const real_t tmp_moved_constant_14 = jac_affine_inv_0_0_BLUE*(jac_affine_inv_0_0_BLUE*-8.0 - tmp_moved_constant_1) + jac_affine_inv_0_1_BLUE*(jac_affine_inv_0_1_BLUE*-8.0 - tmp_moved_constant_4) - tmp_moved_constant_12; + { + /* FaceType.BLUE */ + for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1) + for (int64_t ctr_0 = 0; ctr_0 < -ctr_1 + micro_edges_per_macro_edge - 1; ctr_0 += 1) + { + + const int64_t phantom_ctr_0 = ctr_0; + real_t _data_float_loop_ctr_array_dim_0[4]; + _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0; + _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1; + _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2; + _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3; + real_t _data_float_loop_ctr_array_dim_1[4]; + _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1; + + const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; + const real_t src_dof_0 = _data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + const real_t src_dof_1 = _data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t src_dof_2 = _data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]; + const real_t src_dof_3 = _data_srcEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t src_dof_4 = _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]; + const real_t src_dof_5 = _data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t diffusivity_times_delta_dof_0 = _data_diffusivity_times_deltaVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + const real_t diffusivity_times_delta_dof_1 = _data_diffusivity_times_deltaVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t diffusivity_times_delta_dof_2 = _data_diffusivity_times_deltaVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]; + const real_t diffusivity_times_delta_dof_3 = _data_diffusivity_times_deltaEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t diffusivity_times_delta_dof_4 = _data_diffusivity_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]; + const real_t diffusivity_times_delta_dof_5 = _data_diffusivity_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t wx_dof_0 = _data_wxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + const real_t wx_dof_1 = _data_wxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t wx_dof_2 = _data_wxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]; + const real_t wx_dof_3 = _data_wxEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t wx_dof_4 = _data_wxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]; + const real_t wx_dof_5 = _data_wxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t wy_dof_0 = _data_wyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + const real_t wy_dof_1 = _data_wyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t wy_dof_2 = _data_wyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]; + const real_t wy_dof_3 = _data_wyEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t wy_dof_4 = _data_wyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]; + const real_t wy_dof_5 = _data_wyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + real_t q_acc_0_0 = 0.0; + real_t q_acc_0_1 = 0.0; + real_t q_acc_0_2 = 0.0; + real_t q_acc_0_3 = 0.0; + real_t q_acc_0_4 = 0.0; + real_t q_acc_0_5 = 0.0; + real_t q_acc_1_0 = 0.0; + real_t q_acc_1_1 = 0.0; + real_t q_acc_1_2 = 0.0; + real_t q_acc_1_3 = 0.0; + real_t q_acc_1_4 = 0.0; + real_t q_acc_1_5 = 0.0; + real_t q_acc_2_0 = 0.0; + real_t q_acc_2_1 = 0.0; + real_t q_acc_2_2 = 0.0; + real_t q_acc_2_3 = 0.0; + real_t q_acc_2_4 = 0.0; + real_t q_acc_2_5 = 0.0; + real_t q_acc_3_0 = 0.0; + real_t q_acc_3_1 = 0.0; + real_t q_acc_3_2 = 0.0; + real_t q_acc_3_3 = 0.0; + real_t q_acc_3_4 = 0.0; + real_t q_acc_3_5 = 0.0; + real_t q_acc_4_0 = 0.0; + real_t q_acc_4_1 = 0.0; + real_t q_acc_4_2 = 0.0; + real_t q_acc_4_3 = 0.0; + real_t q_acc_4_4 = 0.0; + real_t q_acc_4_5 = 0.0; + real_t q_acc_5_0 = 0.0; + real_t q_acc_5_1 = 0.0; + real_t q_acc_5_2 = 0.0; + real_t q_acc_5_3 = 0.0; + real_t q_acc_5_4 = 0.0; + real_t q_acc_5_5 = 0.0; + for (int64_t q = 0; q < 4; q += 1) + { + const real_t tmp_qloop_7 = 4.0*_data_q_p_0[q]; + const real_t tmp_qloop_8 = 4.0*_data_q_p_1[q]; + const real_t tmp_qloop_9 = tmp_qloop_7 + tmp_qloop_8 - 3.0; + const real_t tmp_qloop_10 = tmp_qloop_7*_data_q_p_1[q]; + const real_t tmp_qloop_11 = (_data_q_p_0[q]*_data_q_p_0[q]); + const real_t tmp_qloop_12 = tmp_qloop_11*2.0; + const real_t tmp_qloop_13 = tmp_qloop_12 - _data_q_p_0[q]; + const real_t tmp_qloop_14 = (_data_q_p_1[q]*_data_q_p_1[q]); + const real_t tmp_qloop_15 = tmp_qloop_14*2.0; + const real_t tmp_qloop_16 = tmp_qloop_15 - _data_q_p_1[q]; + const real_t tmp_qloop_17 = -tmp_qloop_10 + tmp_qloop_14*-4.0 + tmp_qloop_8; + const real_t tmp_qloop_18 = -tmp_qloop_10 + tmp_qloop_11*-4.0 + tmp_qloop_7; + const real_t tmp_qloop_19 = tmp_qloop_10 + tmp_qloop_12 + tmp_qloop_15 - 3.0*_data_q_p_0[q] - 3.0*_data_q_p_1[q] + 1.0; + const real_t tmp_qloop_20 = tmp_qloop_10*wx_dof_3 + tmp_qloop_13*wx_dof_1 + tmp_qloop_16*wx_dof_2 + tmp_qloop_17*wx_dof_4 + tmp_qloop_18*wx_dof_5 + tmp_qloop_19*wx_dof_0; + const real_t tmp_qloop_21 = tmp_qloop_10*wy_dof_3 + tmp_qloop_13*wy_dof_1 + tmp_qloop_16*wy_dof_2 + tmp_qloop_17*wy_dof_4 + tmp_qloop_18*wy_dof_5 + tmp_qloop_19*wy_dof_0; + const real_t tmp_qloop_22 = abs_det_jac_affine_BLUE*(diffusivity_times_delta_dof_0*tmp_qloop_19 + diffusivity_times_delta_dof_1*tmp_qloop_13 + diffusivity_times_delta_dof_2*tmp_qloop_16 + diffusivity_times_delta_dof_3*tmp_qloop_10 + diffusivity_times_delta_dof_4*tmp_qloop_17 + diffusivity_times_delta_dof_5*tmp_qloop_18)*_data_q_w[q]; + const real_t tmp_qloop_23 = tmp_qloop_22*(tmp_qloop_20*(jac_affine_inv_0_0_BLUE*tmp_qloop_9 + jac_affine_inv_1_0_BLUE*tmp_qloop_9) + tmp_qloop_21*(jac_affine_inv_0_1_BLUE*tmp_qloop_9 + jac_affine_inv_1_1_BLUE*tmp_qloop_9)); + const real_t tmp_qloop_32 = tmp_qloop_7 - 1.0; + const real_t tmp_qloop_33 = tmp_qloop_22*(jac_affine_inv_0_0_BLUE*tmp_qloop_20*tmp_qloop_32 + jac_affine_inv_0_1_BLUE*tmp_qloop_21*tmp_qloop_32); + const real_t tmp_qloop_34 = tmp_qloop_8 - 1.0; + const real_t tmp_qloop_35 = tmp_qloop_22*(jac_affine_inv_1_0_BLUE*tmp_qloop_20*tmp_qloop_34 + jac_affine_inv_1_1_BLUE*tmp_qloop_21*tmp_qloop_34); + const real_t tmp_qloop_36 = jac_affine_inv_1_0_BLUE*tmp_qloop_7; + const real_t tmp_qloop_37 = jac_affine_inv_0_0_BLUE*tmp_qloop_8; + const real_t tmp_qloop_38 = jac_affine_inv_1_1_BLUE*tmp_qloop_7; + const real_t tmp_qloop_39 = jac_affine_inv_0_1_BLUE*tmp_qloop_8; + const real_t tmp_qloop_40 = tmp_qloop_22*(tmp_qloop_20*(tmp_qloop_36 + tmp_qloop_37) + tmp_qloop_21*(tmp_qloop_38 + tmp_qloop_39)); + const real_t tmp_qloop_41 = -tmp_qloop_7 - 8.0*_data_q_p_1[q] + 4.0; + const real_t tmp_qloop_42 = tmp_qloop_22*(tmp_qloop_20*(jac_affine_inv_1_0_BLUE*tmp_qloop_41 - tmp_qloop_37) + tmp_qloop_21*(jac_affine_inv_1_1_BLUE*tmp_qloop_41 - tmp_qloop_39)); + const real_t tmp_qloop_43 = -tmp_qloop_8 - 8.0*_data_q_p_0[q] + 4.0; + const real_t tmp_qloop_44 = tmp_qloop_22*(tmp_qloop_20*(jac_affine_inv_0_0_BLUE*tmp_qloop_43 - tmp_qloop_36) + tmp_qloop_21*(jac_affine_inv_0_1_BLUE*tmp_qloop_43 - tmp_qloop_38)); + const real_t q_tmp_0_0 = tmp_moved_constant_6*tmp_qloop_23; + const real_t q_tmp_0_1 = tmp_moved_constant_7*tmp_qloop_23; + const real_t q_tmp_0_2 = tmp_moved_constant_8*tmp_qloop_23; + const real_t q_tmp_0_3 = tmp_moved_constant_11*tmp_qloop_23; + const real_t q_tmp_0_4 = tmp_moved_constant_13*tmp_qloop_23; + const real_t q_tmp_0_5 = tmp_moved_constant_14*tmp_qloop_23; + const real_t q_tmp_1_0 = tmp_moved_constant_6*tmp_qloop_33; + const real_t q_tmp_1_1 = tmp_moved_constant_7*tmp_qloop_33; + const real_t q_tmp_1_2 = tmp_moved_constant_8*tmp_qloop_33; + const real_t q_tmp_1_3 = tmp_moved_constant_11*tmp_qloop_33; + const real_t q_tmp_1_4 = tmp_moved_constant_13*tmp_qloop_33; + const real_t q_tmp_1_5 = tmp_moved_constant_14*tmp_qloop_33; + const real_t q_tmp_2_0 = tmp_moved_constant_6*tmp_qloop_35; + const real_t q_tmp_2_1 = tmp_moved_constant_7*tmp_qloop_35; + const real_t q_tmp_2_2 = tmp_moved_constant_8*tmp_qloop_35; + const real_t q_tmp_2_3 = tmp_moved_constant_11*tmp_qloop_35; + const real_t q_tmp_2_4 = tmp_moved_constant_13*tmp_qloop_35; + const real_t q_tmp_2_5 = tmp_moved_constant_14*tmp_qloop_35; + const real_t q_tmp_3_0 = tmp_moved_constant_6*tmp_qloop_40; + const real_t q_tmp_3_1 = tmp_moved_constant_7*tmp_qloop_40; + const real_t q_tmp_3_2 = tmp_moved_constant_8*tmp_qloop_40; + const real_t q_tmp_3_3 = tmp_moved_constant_11*tmp_qloop_40; + const real_t q_tmp_3_4 = tmp_moved_constant_13*tmp_qloop_40; + const real_t q_tmp_3_5 = tmp_moved_constant_14*tmp_qloop_40; + const real_t q_tmp_4_0 = tmp_moved_constant_6*tmp_qloop_42; + const real_t q_tmp_4_1 = tmp_moved_constant_7*tmp_qloop_42; + const real_t q_tmp_4_2 = tmp_moved_constant_8*tmp_qloop_42; + const real_t q_tmp_4_3 = tmp_moved_constant_11*tmp_qloop_42; + const real_t q_tmp_4_4 = tmp_moved_constant_13*tmp_qloop_42; + const real_t q_tmp_4_5 = tmp_moved_constant_14*tmp_qloop_42; + const real_t q_tmp_5_0 = tmp_moved_constant_6*tmp_qloop_44; + const real_t q_tmp_5_1 = tmp_moved_constant_7*tmp_qloop_44; + const real_t q_tmp_5_2 = tmp_moved_constant_8*tmp_qloop_44; + const real_t q_tmp_5_3 = tmp_moved_constant_11*tmp_qloop_44; + const real_t q_tmp_5_4 = tmp_moved_constant_13*tmp_qloop_44; + const real_t q_tmp_5_5 = tmp_moved_constant_14*tmp_qloop_44; + q_acc_0_0 = q_acc_0_0 + q_tmp_0_0; + q_acc_0_1 = q_acc_0_1 + q_tmp_0_1; + q_acc_0_2 = q_acc_0_2 + q_tmp_0_2; + q_acc_0_3 = q_acc_0_3 + q_tmp_0_3; + q_acc_0_4 = q_acc_0_4 + q_tmp_0_4; + q_acc_0_5 = q_acc_0_5 + q_tmp_0_5; + q_acc_1_0 = q_acc_1_0 + q_tmp_1_0; + q_acc_1_1 = q_acc_1_1 + q_tmp_1_1; + q_acc_1_2 = q_acc_1_2 + q_tmp_1_2; + q_acc_1_3 = q_acc_1_3 + q_tmp_1_3; + q_acc_1_4 = q_acc_1_4 + q_tmp_1_4; + q_acc_1_5 = q_acc_1_5 + q_tmp_1_5; + q_acc_2_0 = q_acc_2_0 + q_tmp_2_0; + q_acc_2_1 = q_acc_2_1 + q_tmp_2_1; + q_acc_2_2 = q_acc_2_2 + q_tmp_2_2; + q_acc_2_3 = q_acc_2_3 + q_tmp_2_3; + q_acc_2_4 = q_acc_2_4 + q_tmp_2_4; + q_acc_2_5 = q_acc_2_5 + q_tmp_2_5; + q_acc_3_0 = q_acc_3_0 + q_tmp_3_0; + q_acc_3_1 = q_acc_3_1 + q_tmp_3_1; + q_acc_3_2 = q_acc_3_2 + q_tmp_3_2; + q_acc_3_3 = q_acc_3_3 + q_tmp_3_3; + q_acc_3_4 = q_acc_3_4 + q_tmp_3_4; + q_acc_3_5 = q_acc_3_5 + q_tmp_3_5; + q_acc_4_0 = q_acc_4_0 + q_tmp_4_0; + q_acc_4_1 = q_acc_4_1 + q_tmp_4_1; + q_acc_4_2 = q_acc_4_2 + q_tmp_4_2; + q_acc_4_3 = q_acc_4_3 + q_tmp_4_3; + q_acc_4_4 = q_acc_4_4 + q_tmp_4_4; + q_acc_4_5 = q_acc_4_5 + q_tmp_4_5; + q_acc_5_0 = q_acc_5_0 + q_tmp_5_0; + q_acc_5_1 = q_acc_5_1 + q_tmp_5_1; + q_acc_5_2 = q_acc_5_2 + q_tmp_5_2; + q_acc_5_3 = q_acc_5_3 + q_tmp_5_3; + q_acc_5_4 = q_acc_5_4 + q_tmp_5_4; + q_acc_5_5 = q_acc_5_5 + q_tmp_5_5; + } + const real_t elMatVec_0 = q_acc_0_0*src_dof_0 + q_acc_0_1*src_dof_1 + q_acc_0_2*src_dof_2 + q_acc_0_3*src_dof_3 + q_acc_0_4*src_dof_4 + q_acc_0_5*src_dof_5; + const real_t elMatVec_1 = q_acc_1_0*src_dof_0 + q_acc_1_1*src_dof_1 + q_acc_1_2*src_dof_2 + q_acc_1_3*src_dof_3 + q_acc_1_4*src_dof_4 + q_acc_1_5*src_dof_5; + const real_t elMatVec_2 = q_acc_2_0*src_dof_0 + q_acc_2_1*src_dof_1 + q_acc_2_2*src_dof_2 + q_acc_2_3*src_dof_3 + q_acc_2_4*src_dof_4 + q_acc_2_5*src_dof_5; + const real_t elMatVec_3 = q_acc_3_0*src_dof_0 + q_acc_3_1*src_dof_1 + q_acc_3_2*src_dof_2 + q_acc_3_3*src_dof_3 + q_acc_3_4*src_dof_4 + q_acc_3_5*src_dof_5; + const real_t elMatVec_4 = q_acc_4_0*src_dof_0 + q_acc_4_1*src_dof_1 + q_acc_4_2*src_dof_2 + q_acc_4_3*src_dof_3 + q_acc_4_4*src_dof_4 + q_acc_4_5*src_dof_5; + const real_t elMatVec_5 = q_acc_5_0*src_dof_0 + q_acc_5_1*src_dof_1 + q_acc_5_2*src_dof_2 + q_acc_5_3*src_dof_3 + q_acc_5_4*src_dof_4 + q_acc_5_5*src_dof_5; + _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1] = elMatVec_0 + _data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatVec_1 + _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1] = elMatVec_2 + _data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]; + _data_dstEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatVec_3 + _data_dstEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1] = elMatVec_4 + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]; + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))] = elMatVec_5 + _data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + } + } + } +} +} // namespace operatorgeneration + +} // namespace hyteg diff --git a/operators/supg_diffusion/noarch/P2ElementwiseSupgDiffusion_computeInverseDiagonalOperatorValues_P2ElementwiseSupgDiffusion_macro_2D.cpp b/operators/supg_diffusion/noarch/P2ElementwiseSupgDiffusion_computeInverseDiagonalOperatorValues_P2ElementwiseSupgDiffusion_macro_2D.cpp new file mode 100644 index 0000000000000000000000000000000000000000..930e1257b8601da2a6c7186d9666453ef2c6f1dd --- /dev/null +++ b/operators/supg_diffusion/noarch/P2ElementwiseSupgDiffusion_computeInverseDiagonalOperatorValues_P2ElementwiseSupgDiffusion_macro_2D.cpp @@ -0,0 +1,336 @@ +/* +* Copyright (c) 2017-2024 Nils Kohl, Daniel Bauer, Fabian Böhm. +* +* This file is part of HyTeG +* (see https://i10git.cs.fau.de/hyteg/hyteg). +* +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. +* +* You should have received a copy of the GNU General Public License +* along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +/* +* The entire file was generated with the HyTeG Operator Generator. +* +* Avoid modifying this file. If buggy, consider fixing the generator itself. +*/ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +#include "../P2ElementwiseSupgDiffusion.hpp" + +#define FUNC_PREFIX + +namespace hyteg { + +namespace operatorgeneration { + +void P2ElementwiseSupgDiffusion::computeInverseDiagonalOperatorValues_P2ElementwiseSupgDiffusion_macro_2D( real_t * RESTRICT _data_diffusivity_times_deltaEdge, real_t * RESTRICT _data_diffusivity_times_deltaVertex, real_t * RESTRICT _data_invDiag_Edge, real_t * RESTRICT _data_invDiag_Vertex, real_t * RESTRICT _data_wxEdge, real_t * RESTRICT _data_wxVertex, real_t * RESTRICT _data_wyEdge, real_t * RESTRICT _data_wyVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +{ + { + const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669}; + + const real_t _data_q_p_0 [] = {0.33333333333333331, 0.20000000000000001, 0.59999999999999998, 0.20000000000000001}; + + const real_t _data_q_p_1 [] = {0.33333333333333331, 0.59999999999999998, 0.20000000000000001, 0.20000000000000001}; + + const real_t tmp_coords_jac_0_GRAY = 1.0 / (micro_edges_per_macro_edge_float)*1.0; + const real_t p_affine_const_0_0_GRAY = macro_vertex_coord_id_0comp0; + const real_t p_affine_const_0_1_GRAY = macro_vertex_coord_id_0comp1; + const real_t p_affine_const_1_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0); + const real_t p_affine_const_1_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1); + const real_t p_affine_const_2_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0); + const real_t p_affine_const_2_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1); + const real_t jac_affine_0_0_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_1_0_GRAY; + const real_t jac_affine_0_1_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_2_0_GRAY; + const real_t jac_affine_1_0_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_1_1_GRAY; + const real_t jac_affine_1_1_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_2_1_GRAY; + const real_t tmp_coords_jac_1_GRAY = jac_affine_0_0_GRAY*jac_affine_1_1_GRAY - jac_affine_0_1_GRAY*jac_affine_1_0_GRAY; + const real_t tmp_coords_jac_2_GRAY = 1.0 / (tmp_coords_jac_1_GRAY); + const real_t jac_affine_inv_0_0_GRAY = jac_affine_1_1_GRAY*tmp_coords_jac_2_GRAY; + const real_t jac_affine_inv_0_1_GRAY = -jac_affine_0_1_GRAY*tmp_coords_jac_2_GRAY; + const real_t jac_affine_inv_1_0_GRAY = -jac_affine_1_0_GRAY*tmp_coords_jac_2_GRAY; + const real_t jac_affine_inv_1_1_GRAY = jac_affine_0_0_GRAY*tmp_coords_jac_2_GRAY; + const real_t abs_det_jac_affine_GRAY = abs(tmp_coords_jac_1_GRAY); + const real_t tmp_qloop_15 = jac_affine_inv_0_0_GRAY*4.0; + const real_t tmp_qloop_16 = jac_affine_inv_1_0_GRAY*4.0; + const real_t tmp_qloop_17 = tmp_qloop_15 + tmp_qloop_16; + const real_t tmp_qloop_18 = jac_affine_inv_0_1_GRAY*4.0; + const real_t tmp_qloop_19 = jac_affine_inv_1_1_GRAY*4.0; + const real_t tmp_qloop_20 = tmp_qloop_18 + tmp_qloop_19; + const real_t tmp_qloop_28 = jac_affine_inv_1_0_GRAY*8.0; + const real_t tmp_qloop_29 = jac_affine_inv_1_1_GRAY*8.0; + const real_t tmp_qloop_31 = jac_affine_inv_1_0_GRAY*tmp_qloop_15 + jac_affine_inv_1_1_GRAY*tmp_qloop_18; + { + /* FaceType.GRAY */ + for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1) + for (int64_t ctr_0 = 0; ctr_0 < -ctr_1 + micro_edges_per_macro_edge; ctr_0 += 1) + { + + const int64_t phantom_ctr_0 = ctr_0; + real_t _data_float_loop_ctr_array_dim_0[4]; + _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0; + _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1; + _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2; + _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3; + real_t _data_float_loop_ctr_array_dim_1[4]; + _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1; + + const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; + const real_t diffusivity_times_delta_dof_0 = _data_diffusivity_times_deltaVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]; + const real_t diffusivity_times_delta_dof_1 = _data_diffusivity_times_deltaVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + const real_t diffusivity_times_delta_dof_2 = _data_diffusivity_times_deltaVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t diffusivity_times_delta_dof_3 = _data_diffusivity_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t diffusivity_times_delta_dof_4 = _data_diffusivity_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t diffusivity_times_delta_dof_5 = _data_diffusivity_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]; + const real_t wx_dof_0 = _data_wxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]; + const real_t wx_dof_1 = _data_wxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + const real_t wx_dof_2 = _data_wxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t wx_dof_3 = _data_wxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t wx_dof_4 = _data_wxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t wx_dof_5 = _data_wxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]; + const real_t wy_dof_0 = _data_wyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]; + const real_t wy_dof_1 = _data_wyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + const real_t wy_dof_2 = _data_wyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t wy_dof_3 = _data_wyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t wy_dof_4 = _data_wyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t wy_dof_5 = _data_wyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]; + real_t q_acc_0_0 = 0.0; + real_t q_acc_1_1 = 0.0; + real_t q_acc_2_2 = 0.0; + real_t q_acc_3_3 = 0.0; + real_t q_acc_4_4 = 0.0; + real_t q_acc_5_5 = 0.0; + for (int64_t q = 0; q < 4; q += 1) + { + const real_t tmp_qloop_0 = 4.0*_data_q_p_0[q]; + const real_t tmp_qloop_1 = 4.0*_data_q_p_1[q]; + const real_t tmp_qloop_2 = tmp_qloop_0 + tmp_qloop_1 - 3.0; + const real_t tmp_qloop_3 = tmp_qloop_0*_data_q_p_1[q]; + const real_t tmp_qloop_4 = (_data_q_p_0[q]*_data_q_p_0[q]); + const real_t tmp_qloop_5 = tmp_qloop_4*2.0; + const real_t tmp_qloop_6 = tmp_qloop_5 - _data_q_p_0[q]; + const real_t tmp_qloop_7 = (_data_q_p_1[q]*_data_q_p_1[q]); + const real_t tmp_qloop_8 = tmp_qloop_7*2.0; + const real_t tmp_qloop_9 = tmp_qloop_8 - _data_q_p_1[q]; + const real_t tmp_qloop_10 = tmp_qloop_1 - tmp_qloop_3 + tmp_qloop_7*-4.0; + const real_t tmp_qloop_11 = tmp_qloop_0 - tmp_qloop_3 + tmp_qloop_4*-4.0; + const real_t tmp_qloop_12 = tmp_qloop_3 + tmp_qloop_5 + tmp_qloop_8 - 3.0*_data_q_p_0[q] - 3.0*_data_q_p_1[q] + 1.0; + const real_t tmp_qloop_13 = tmp_qloop_10*wx_dof_4 + tmp_qloop_11*wx_dof_5 + tmp_qloop_12*wx_dof_0 + tmp_qloop_3*wx_dof_3 + tmp_qloop_6*wx_dof_1 + tmp_qloop_9*wx_dof_2; + const real_t tmp_qloop_14 = tmp_qloop_10*wy_dof_4 + tmp_qloop_11*wy_dof_5 + tmp_qloop_12*wy_dof_0 + tmp_qloop_3*wy_dof_3 + tmp_qloop_6*wy_dof_1 + tmp_qloop_9*wy_dof_2; + const real_t tmp_qloop_21 = abs_det_jac_affine_GRAY*(diffusivity_times_delta_dof_0*tmp_qloop_12 + diffusivity_times_delta_dof_1*tmp_qloop_6 + diffusivity_times_delta_dof_2*tmp_qloop_9 + diffusivity_times_delta_dof_3*tmp_qloop_3 + diffusivity_times_delta_dof_4*tmp_qloop_10 + diffusivity_times_delta_dof_5*tmp_qloop_11)*_data_q_w[q]; + const real_t tmp_qloop_22 = tmp_qloop_0 - 1.0; + const real_t tmp_qloop_23 = tmp_qloop_1 - 1.0; + const real_t tmp_qloop_24 = jac_affine_inv_1_0_GRAY*tmp_qloop_0; + const real_t tmp_qloop_25 = jac_affine_inv_0_0_GRAY*tmp_qloop_1; + const real_t tmp_qloop_26 = jac_affine_inv_1_1_GRAY*tmp_qloop_0; + const real_t tmp_qloop_27 = jac_affine_inv_0_1_GRAY*tmp_qloop_1; + const real_t tmp_qloop_30 = -tmp_qloop_0 - 8.0*_data_q_p_1[q] + 4.0; + const real_t tmp_qloop_32 = -tmp_qloop_1 - 8.0*_data_q_p_0[q] + 4.0; + const real_t q_tmp_0_0 = tmp_qloop_21*(tmp_qloop_13*(jac_affine_inv_0_0_GRAY*tmp_qloop_2 + jac_affine_inv_1_0_GRAY*tmp_qloop_2) + tmp_qloop_14*(jac_affine_inv_0_1_GRAY*tmp_qloop_2 + jac_affine_inv_1_1_GRAY*tmp_qloop_2))*(jac_affine_inv_0_0_GRAY*tmp_qloop_17 + jac_affine_inv_0_1_GRAY*tmp_qloop_20 + jac_affine_inv_1_0_GRAY*tmp_qloop_17 + jac_affine_inv_1_1_GRAY*tmp_qloop_20); + const real_t q_tmp_1_1 = tmp_qloop_21*((jac_affine_inv_0_0_GRAY*jac_affine_inv_0_0_GRAY)*4.0 + (jac_affine_inv_0_1_GRAY*jac_affine_inv_0_1_GRAY)*4.0)*(jac_affine_inv_0_0_GRAY*tmp_qloop_13*tmp_qloop_22 + jac_affine_inv_0_1_GRAY*tmp_qloop_14*tmp_qloop_22); + const real_t q_tmp_2_2 = tmp_qloop_21*((jac_affine_inv_1_0_GRAY*jac_affine_inv_1_0_GRAY)*4.0 + (jac_affine_inv_1_1_GRAY*jac_affine_inv_1_1_GRAY)*4.0)*(jac_affine_inv_1_0_GRAY*tmp_qloop_13*tmp_qloop_23 + jac_affine_inv_1_1_GRAY*tmp_qloop_14*tmp_qloop_23); + const real_t q_tmp_3_3 = tmp_qloop_21*(jac_affine_inv_0_0_GRAY*tmp_qloop_28 + jac_affine_inv_0_1_GRAY*tmp_qloop_29)*(tmp_qloop_13*(tmp_qloop_24 + tmp_qloop_25) + tmp_qloop_14*(tmp_qloop_26 + tmp_qloop_27)); + const real_t q_tmp_4_4 = tmp_qloop_21*(tmp_qloop_13*(jac_affine_inv_1_0_GRAY*tmp_qloop_30 - tmp_qloop_25) + tmp_qloop_14*(jac_affine_inv_1_1_GRAY*tmp_qloop_30 - tmp_qloop_27))*(jac_affine_inv_1_0_GRAY*(-tmp_qloop_15 - tmp_qloop_28) + jac_affine_inv_1_1_GRAY*(-tmp_qloop_18 - tmp_qloop_29) - tmp_qloop_31); + const real_t q_tmp_5_5 = tmp_qloop_21*(tmp_qloop_13*(jac_affine_inv_0_0_GRAY*tmp_qloop_32 - tmp_qloop_24) + tmp_qloop_14*(jac_affine_inv_0_1_GRAY*tmp_qloop_32 - tmp_qloop_26))*(jac_affine_inv_0_0_GRAY*(jac_affine_inv_0_0_GRAY*-8.0 - tmp_qloop_16) + jac_affine_inv_0_1_GRAY*(jac_affine_inv_0_1_GRAY*-8.0 - tmp_qloop_19) - tmp_qloop_31); + q_acc_0_0 = q_acc_0_0 + q_tmp_0_0; + q_acc_1_1 = q_acc_1_1 + q_tmp_1_1; + q_acc_2_2 = q_acc_2_2 + q_tmp_2_2; + q_acc_3_3 = q_acc_3_3 + q_tmp_3_3; + q_acc_4_4 = q_acc_4_4 + q_tmp_4_4; + q_acc_5_5 = q_acc_5_5 + q_tmp_5_5; + } + const real_t elMatDiag_0 = q_acc_0_0; + const real_t elMatDiag_1 = q_acc_1_1; + const real_t elMatDiag_2 = q_acc_2_2; + const real_t elMatDiag_3 = q_acc_3_3; + const real_t elMatDiag_4 = q_acc_4_4; + const real_t elMatDiag_5 = q_acc_5_5; + _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))] = elMatDiag_0 + _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]; + _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1] = elMatDiag_1 + _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatDiag_2 + _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))] = elMatDiag_3 + _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))] = elMatDiag_4 + _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))] = elMatDiag_5 + _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]; + } + } + const real_t tmp_coords_jac_0_BLUE = 1.0 / (micro_edges_per_macro_edge_float)*1.0; + const real_t tmp_coords_jac_1_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0); + const real_t tmp_coords_jac_2_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1); + const real_t tmp_coords_jac_3_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0); + const real_t tmp_coords_jac_4_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1); + const real_t p_affine_const_0_0_BLUE = tmp_coords_jac_1_BLUE; + const real_t p_affine_const_0_1_BLUE = tmp_coords_jac_2_BLUE; + const real_t p_affine_const_1_0_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_3_BLUE; + const real_t p_affine_const_1_1_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_4_BLUE; + const real_t p_affine_const_2_0_BLUE = tmp_coords_jac_1_BLUE + tmp_coords_jac_3_BLUE; + const real_t p_affine_const_2_1_BLUE = tmp_coords_jac_2_BLUE + tmp_coords_jac_4_BLUE; + const real_t jac_affine_0_0_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_1_0_BLUE; + const real_t jac_affine_0_1_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_2_0_BLUE; + const real_t jac_affine_1_0_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_1_1_BLUE; + const real_t jac_affine_1_1_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_2_1_BLUE; + const real_t tmp_coords_jac_5_BLUE = jac_affine_0_0_BLUE*jac_affine_1_1_BLUE - jac_affine_0_1_BLUE*jac_affine_1_0_BLUE; + const real_t tmp_coords_jac_6_BLUE = 1.0 / (tmp_coords_jac_5_BLUE); + const real_t jac_affine_inv_0_0_BLUE = jac_affine_1_1_BLUE*tmp_coords_jac_6_BLUE; + const real_t jac_affine_inv_0_1_BLUE = -jac_affine_0_1_BLUE*tmp_coords_jac_6_BLUE; + const real_t jac_affine_inv_1_0_BLUE = -jac_affine_1_0_BLUE*tmp_coords_jac_6_BLUE; + const real_t jac_affine_inv_1_1_BLUE = jac_affine_0_0_BLUE*tmp_coords_jac_6_BLUE; + const real_t abs_det_jac_affine_BLUE = abs(tmp_coords_jac_5_BLUE); + const real_t tmp_moved_constant_0 = jac_affine_inv_0_0_BLUE*4.0; + const real_t tmp_moved_constant_1 = jac_affine_inv_1_0_BLUE*4.0; + const real_t tmp_moved_constant_2 = tmp_moved_constant_0 + tmp_moved_constant_1; + const real_t tmp_moved_constant_3 = jac_affine_inv_0_1_BLUE*4.0; + const real_t tmp_moved_constant_4 = jac_affine_inv_1_1_BLUE*4.0; + const real_t tmp_moved_constant_5 = tmp_moved_constant_3 + tmp_moved_constant_4; + const real_t tmp_moved_constant_6 = jac_affine_inv_1_0_BLUE*8.0; + const real_t tmp_moved_constant_7 = jac_affine_inv_1_1_BLUE*8.0; + const real_t tmp_moved_constant_8 = jac_affine_inv_1_0_BLUE*tmp_moved_constant_0 + jac_affine_inv_1_1_BLUE*tmp_moved_constant_3; + { + /* FaceType.BLUE */ + for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1) + for (int64_t ctr_0 = 0; ctr_0 < -ctr_1 + micro_edges_per_macro_edge - 1; ctr_0 += 1) + { + + const int64_t phantom_ctr_0 = ctr_0; + real_t _data_float_loop_ctr_array_dim_0[4]; + _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0; + _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1; + _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2; + _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3; + real_t _data_float_loop_ctr_array_dim_1[4]; + _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1; + + const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; + const real_t diffusivity_times_delta_dof_0 = _data_diffusivity_times_deltaVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + const real_t diffusivity_times_delta_dof_1 = _data_diffusivity_times_deltaVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t diffusivity_times_delta_dof_2 = _data_diffusivity_times_deltaVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]; + const real_t diffusivity_times_delta_dof_3 = _data_diffusivity_times_deltaEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t diffusivity_times_delta_dof_4 = _data_diffusivity_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]; + const real_t diffusivity_times_delta_dof_5 = _data_diffusivity_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t wx_dof_0 = _data_wxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + const real_t wx_dof_1 = _data_wxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t wx_dof_2 = _data_wxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]; + const real_t wx_dof_3 = _data_wxEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t wx_dof_4 = _data_wxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]; + const real_t wx_dof_5 = _data_wxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t wy_dof_0 = _data_wyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + const real_t wy_dof_1 = _data_wyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t wy_dof_2 = _data_wyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]; + const real_t wy_dof_3 = _data_wyEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t wy_dof_4 = _data_wyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]; + const real_t wy_dof_5 = _data_wyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + real_t q_acc_0_0 = 0.0; + real_t q_acc_1_1 = 0.0; + real_t q_acc_2_2 = 0.0; + real_t q_acc_3_3 = 0.0; + real_t q_acc_4_4 = 0.0; + real_t q_acc_5_5 = 0.0; + for (int64_t q = 0; q < 4; q += 1) + { + const real_t tmp_qloop_0 = 4.0*_data_q_p_0[q]; + const real_t tmp_qloop_1 = 4.0*_data_q_p_1[q]; + const real_t tmp_qloop_2 = tmp_qloop_0 + tmp_qloop_1 - 3.0; + const real_t tmp_qloop_3 = tmp_qloop_0*_data_q_p_1[q]; + const real_t tmp_qloop_4 = (_data_q_p_0[q]*_data_q_p_0[q]); + const real_t tmp_qloop_5 = tmp_qloop_4*2.0; + const real_t tmp_qloop_6 = tmp_qloop_5 - _data_q_p_0[q]; + const real_t tmp_qloop_7 = (_data_q_p_1[q]*_data_q_p_1[q]); + const real_t tmp_qloop_8 = tmp_qloop_7*2.0; + const real_t tmp_qloop_9 = tmp_qloop_8 - _data_q_p_1[q]; + const real_t tmp_qloop_10 = tmp_qloop_1 - tmp_qloop_3 + tmp_qloop_7*-4.0; + const real_t tmp_qloop_11 = tmp_qloop_0 - tmp_qloop_3 + tmp_qloop_4*-4.0; + const real_t tmp_qloop_12 = tmp_qloop_3 + tmp_qloop_5 + tmp_qloop_8 - 3.0*_data_q_p_0[q] - 3.0*_data_q_p_1[q] + 1.0; + const real_t tmp_qloop_13 = tmp_qloop_10*wx_dof_4 + tmp_qloop_11*wx_dof_5 + tmp_qloop_12*wx_dof_0 + tmp_qloop_3*wx_dof_3 + tmp_qloop_6*wx_dof_1 + tmp_qloop_9*wx_dof_2; + const real_t tmp_qloop_14 = tmp_qloop_10*wy_dof_4 + tmp_qloop_11*wy_dof_5 + tmp_qloop_12*wy_dof_0 + tmp_qloop_3*wy_dof_3 + tmp_qloop_6*wy_dof_1 + tmp_qloop_9*wy_dof_2; + const real_t tmp_qloop_21 = abs_det_jac_affine_BLUE*(diffusivity_times_delta_dof_0*tmp_qloop_12 + diffusivity_times_delta_dof_1*tmp_qloop_6 + diffusivity_times_delta_dof_2*tmp_qloop_9 + diffusivity_times_delta_dof_3*tmp_qloop_3 + diffusivity_times_delta_dof_4*tmp_qloop_10 + diffusivity_times_delta_dof_5*tmp_qloop_11)*_data_q_w[q]; + const real_t tmp_qloop_22 = tmp_qloop_0 - 1.0; + const real_t tmp_qloop_23 = tmp_qloop_1 - 1.0; + const real_t tmp_qloop_24 = jac_affine_inv_1_0_BLUE*tmp_qloop_0; + const real_t tmp_qloop_25 = jac_affine_inv_0_0_BLUE*tmp_qloop_1; + const real_t tmp_qloop_26 = jac_affine_inv_1_1_BLUE*tmp_qloop_0; + const real_t tmp_qloop_27 = jac_affine_inv_0_1_BLUE*tmp_qloop_1; + const real_t tmp_qloop_30 = -tmp_qloop_0 - 8.0*_data_q_p_1[q] + 4.0; + const real_t tmp_qloop_32 = -tmp_qloop_1 - 8.0*_data_q_p_0[q] + 4.0; + const real_t q_tmp_0_0 = tmp_qloop_21*(tmp_qloop_13*(jac_affine_inv_0_0_BLUE*tmp_qloop_2 + jac_affine_inv_1_0_BLUE*tmp_qloop_2) + tmp_qloop_14*(jac_affine_inv_0_1_BLUE*tmp_qloop_2 + jac_affine_inv_1_1_BLUE*tmp_qloop_2))*(jac_affine_inv_0_0_BLUE*tmp_moved_constant_2 + jac_affine_inv_0_1_BLUE*tmp_moved_constant_5 + jac_affine_inv_1_0_BLUE*tmp_moved_constant_2 + jac_affine_inv_1_1_BLUE*tmp_moved_constant_5); + const real_t q_tmp_1_1 = tmp_qloop_21*((jac_affine_inv_0_0_BLUE*jac_affine_inv_0_0_BLUE)*4.0 + (jac_affine_inv_0_1_BLUE*jac_affine_inv_0_1_BLUE)*4.0)*(jac_affine_inv_0_0_BLUE*tmp_qloop_13*tmp_qloop_22 + jac_affine_inv_0_1_BLUE*tmp_qloop_14*tmp_qloop_22); + const real_t q_tmp_2_2 = tmp_qloop_21*((jac_affine_inv_1_0_BLUE*jac_affine_inv_1_0_BLUE)*4.0 + (jac_affine_inv_1_1_BLUE*jac_affine_inv_1_1_BLUE)*4.0)*(jac_affine_inv_1_0_BLUE*tmp_qloop_13*tmp_qloop_23 + jac_affine_inv_1_1_BLUE*tmp_qloop_14*tmp_qloop_23); + const real_t q_tmp_3_3 = tmp_qloop_21*(jac_affine_inv_0_0_BLUE*tmp_moved_constant_6 + jac_affine_inv_0_1_BLUE*tmp_moved_constant_7)*(tmp_qloop_13*(tmp_qloop_24 + tmp_qloop_25) + tmp_qloop_14*(tmp_qloop_26 + tmp_qloop_27)); + const real_t q_tmp_4_4 = tmp_qloop_21*(tmp_qloop_13*(jac_affine_inv_1_0_BLUE*tmp_qloop_30 - tmp_qloop_25) + tmp_qloop_14*(jac_affine_inv_1_1_BLUE*tmp_qloop_30 - tmp_qloop_27))*(jac_affine_inv_1_0_BLUE*(-tmp_moved_constant_0 - tmp_moved_constant_6) + jac_affine_inv_1_1_BLUE*(-tmp_moved_constant_3 - tmp_moved_constant_7) - tmp_moved_constant_8); + const real_t q_tmp_5_5 = tmp_qloop_21*(tmp_qloop_13*(jac_affine_inv_0_0_BLUE*tmp_qloop_32 - tmp_qloop_24) + tmp_qloop_14*(jac_affine_inv_0_1_BLUE*tmp_qloop_32 - tmp_qloop_26))*(jac_affine_inv_0_0_BLUE*(jac_affine_inv_0_0_BLUE*-8.0 - tmp_moved_constant_1) + jac_affine_inv_0_1_BLUE*(jac_affine_inv_0_1_BLUE*-8.0 - tmp_moved_constant_4) - tmp_moved_constant_8); + q_acc_0_0 = q_acc_0_0 + q_tmp_0_0; + q_acc_1_1 = q_acc_1_1 + q_tmp_1_1; + q_acc_2_2 = q_acc_2_2 + q_tmp_2_2; + q_acc_3_3 = q_acc_3_3 + q_tmp_3_3; + q_acc_4_4 = q_acc_4_4 + q_tmp_4_4; + q_acc_5_5 = q_acc_5_5 + q_tmp_5_5; + } + const real_t elMatDiag_0 = q_acc_0_0; + const real_t elMatDiag_1 = q_acc_1_1; + const real_t elMatDiag_2 = q_acc_2_2; + const real_t elMatDiag_3 = q_acc_3_3; + const real_t elMatDiag_4 = q_acc_4_4; + const real_t elMatDiag_5 = q_acc_5_5; + _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1] = elMatDiag_0 + _data_invDiag_Vertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatDiag_1 + _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1] = elMatDiag_2 + _data_invDiag_Vertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]; + _data_invDiag_Edge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))] = elMatDiag_3 + _data_invDiag_Edge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1] = elMatDiag_4 + _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]; + _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))] = elMatDiag_5 + _data_invDiag_Edge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + } + } + } +} +} // namespace operatorgeneration + +} // namespace hyteg diff --git a/operators/supg_diffusion/noarch/P2ElementwiseSupgDiffusion_toMatrix_P2ElementwiseSupgDiffusion_macro_2D.cpp b/operators/supg_diffusion/noarch/P2ElementwiseSupgDiffusion_toMatrix_P2ElementwiseSupgDiffusion_macro_2D.cpp new file mode 100644 index 0000000000000000000000000000000000000000..abc9a3672135752f8af08677996ff7cd1603825e --- /dev/null +++ b/operators/supg_diffusion/noarch/P2ElementwiseSupgDiffusion_toMatrix_P2ElementwiseSupgDiffusion_macro_2D.cpp @@ -0,0 +1,710 @@ +/* +* Copyright (c) 2017-2024 Nils Kohl, Daniel Bauer, Fabian Böhm. +* +* This file is part of HyTeG +* (see https://i10git.cs.fau.de/hyteg/hyteg). +* +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. +* +* You should have received a copy of the GNU General Public License +* along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +/* +* The entire file was generated with the HyTeG Operator Generator. +* +* Avoid modifying this file. If buggy, consider fixing the generator itself. +*/ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +#include "../P2ElementwiseSupgDiffusion.hpp" + +#define FUNC_PREFIX + +namespace hyteg { + +namespace operatorgeneration { + +void P2ElementwiseSupgDiffusion::toMatrix_P2ElementwiseSupgDiffusion_macro_2D( real_t * RESTRICT _data_diffusivity_times_deltaEdge, real_t * RESTRICT _data_diffusivity_times_deltaVertex, idx_t * RESTRICT _data_dstEdge, idx_t * RESTRICT _data_dstVertex, idx_t * RESTRICT _data_srcEdge, idx_t * RESTRICT _data_srcVertex, real_t * RESTRICT _data_wxEdge, real_t * RESTRICT _data_wxVertex, real_t * RESTRICT _data_wyEdge, real_t * RESTRICT _data_wyVertex, real_t macro_vertex_coord_id_0comp0, real_t macro_vertex_coord_id_0comp1, real_t macro_vertex_coord_id_1comp0, real_t macro_vertex_coord_id_1comp1, real_t macro_vertex_coord_id_2comp0, real_t macro_vertex_coord_id_2comp1, std::shared_ptr< SparseMatrixProxy > mat, int64_t micro_edges_per_macro_edge, real_t micro_edges_per_macro_edge_float ) const +{ + { + const real_t _data_q_w [] = {-0.28125, 0.26041666666666669, 0.26041666666666669, 0.26041666666666669}; + + const real_t _data_q_p_0 [] = {0.33333333333333331, 0.20000000000000001, 0.59999999999999998, 0.20000000000000001}; + + const real_t _data_q_p_1 [] = {0.33333333333333331, 0.59999999999999998, 0.20000000000000001, 0.20000000000000001}; + + const real_t tmp_coords_jac_0_GRAY = 1.0 / (micro_edges_per_macro_edge_float)*1.0; + const real_t p_affine_const_0_0_GRAY = macro_vertex_coord_id_0comp0; + const real_t p_affine_const_0_1_GRAY = macro_vertex_coord_id_0comp1; + const real_t p_affine_const_1_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0); + const real_t p_affine_const_1_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1); + const real_t p_affine_const_2_0_GRAY = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0); + const real_t p_affine_const_2_1_GRAY = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_GRAY*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1); + const real_t jac_affine_0_0_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_1_0_GRAY; + const real_t jac_affine_0_1_GRAY = -p_affine_const_0_0_GRAY + p_affine_const_2_0_GRAY; + const real_t jac_affine_1_0_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_1_1_GRAY; + const real_t jac_affine_1_1_GRAY = -p_affine_const_0_1_GRAY + p_affine_const_2_1_GRAY; + const real_t tmp_coords_jac_1_GRAY = jac_affine_0_0_GRAY*jac_affine_1_1_GRAY - jac_affine_0_1_GRAY*jac_affine_1_0_GRAY; + const real_t tmp_coords_jac_2_GRAY = 1.0 / (tmp_coords_jac_1_GRAY); + const real_t jac_affine_inv_0_0_GRAY = jac_affine_1_1_GRAY*tmp_coords_jac_2_GRAY; + const real_t jac_affine_inv_0_1_GRAY = -jac_affine_0_1_GRAY*tmp_coords_jac_2_GRAY; + const real_t jac_affine_inv_1_0_GRAY = -jac_affine_1_0_GRAY*tmp_coords_jac_2_GRAY; + const real_t jac_affine_inv_1_1_GRAY = jac_affine_0_0_GRAY*tmp_coords_jac_2_GRAY; + const real_t abs_det_jac_affine_GRAY = abs(tmp_coords_jac_1_GRAY); + const real_t tmp_qloop_0 = jac_affine_inv_0_0_GRAY*4.0; + const real_t tmp_qloop_1 = jac_affine_inv_1_0_GRAY*4.0; + const real_t tmp_qloop_2 = tmp_qloop_0 + tmp_qloop_1; + const real_t tmp_qloop_3 = jac_affine_inv_0_1_GRAY*4.0; + const real_t tmp_qloop_4 = jac_affine_inv_1_1_GRAY*4.0; + const real_t tmp_qloop_5 = tmp_qloop_3 + tmp_qloop_4; + const real_t tmp_qloop_6 = jac_affine_inv_0_0_GRAY*tmp_qloop_2 + jac_affine_inv_0_1_GRAY*tmp_qloop_5 + jac_affine_inv_1_0_GRAY*tmp_qloop_2 + jac_affine_inv_1_1_GRAY*tmp_qloop_5; + const real_t tmp_qloop_24 = (jac_affine_inv_0_0_GRAY*jac_affine_inv_0_0_GRAY)*4.0 + (jac_affine_inv_0_1_GRAY*jac_affine_inv_0_1_GRAY)*4.0; + const real_t tmp_qloop_25 = (jac_affine_inv_1_0_GRAY*jac_affine_inv_1_0_GRAY)*4.0 + (jac_affine_inv_1_1_GRAY*jac_affine_inv_1_1_GRAY)*4.0; + const real_t tmp_qloop_26 = jac_affine_inv_1_0_GRAY*8.0; + const real_t tmp_qloop_27 = jac_affine_inv_1_1_GRAY*8.0; + const real_t tmp_qloop_28 = jac_affine_inv_0_0_GRAY*tmp_qloop_26 + jac_affine_inv_0_1_GRAY*tmp_qloop_27; + const real_t tmp_qloop_29 = jac_affine_inv_1_0_GRAY*tmp_qloop_0 + jac_affine_inv_1_1_GRAY*tmp_qloop_3; + const real_t tmp_qloop_30 = jac_affine_inv_1_0_GRAY*(-tmp_qloop_0 - tmp_qloop_26) + jac_affine_inv_1_1_GRAY*(-tmp_qloop_27 - tmp_qloop_3) - tmp_qloop_29; + const real_t tmp_qloop_31 = jac_affine_inv_0_0_GRAY*(jac_affine_inv_0_0_GRAY*-8.0 - tmp_qloop_1) + jac_affine_inv_0_1_GRAY*(jac_affine_inv_0_1_GRAY*-8.0 - tmp_qloop_4) - tmp_qloop_29; + { + /* FaceType.GRAY */ + for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1) + for (int64_t ctr_0 = 0; ctr_0 < -ctr_1 + micro_edges_per_macro_edge; ctr_0 += 1) + { + + const int64_t phantom_ctr_0 = ctr_0; + real_t _data_float_loop_ctr_array_dim_0[4]; + _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0; + _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1; + _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2; + _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3; + real_t _data_float_loop_ctr_array_dim_1[4]; + _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1; + + const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; + const real_t diffusivity_times_delta_dof_0 = _data_diffusivity_times_deltaVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]; + const real_t diffusivity_times_delta_dof_1 = _data_diffusivity_times_deltaVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + const real_t diffusivity_times_delta_dof_2 = _data_diffusivity_times_deltaVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t diffusivity_times_delta_dof_3 = _data_diffusivity_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t diffusivity_times_delta_dof_4 = _data_diffusivity_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t diffusivity_times_delta_dof_5 = _data_diffusivity_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]; + const real_t wx_dof_0 = _data_wxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]; + const real_t wx_dof_1 = _data_wxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + const real_t wx_dof_2 = _data_wxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t wx_dof_3 = _data_wxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t wx_dof_4 = _data_wxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t wx_dof_5 = _data_wxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]; + const real_t wy_dof_0 = _data_wyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))]; + const real_t wy_dof_1 = _data_wyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + const real_t wy_dof_2 = _data_wyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t wy_dof_3 = _data_wyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t wy_dof_4 = _data_wyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t wy_dof_5 = _data_wyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))]; + real_t q_acc_0_0 = 0.0; + real_t q_acc_0_1 = 0.0; + real_t q_acc_0_2 = 0.0; + real_t q_acc_0_3 = 0.0; + real_t q_acc_0_4 = 0.0; + real_t q_acc_0_5 = 0.0; + real_t q_acc_1_0 = 0.0; + real_t q_acc_1_1 = 0.0; + real_t q_acc_1_2 = 0.0; + real_t q_acc_1_3 = 0.0; + real_t q_acc_1_4 = 0.0; + real_t q_acc_1_5 = 0.0; + real_t q_acc_2_0 = 0.0; + real_t q_acc_2_1 = 0.0; + real_t q_acc_2_2 = 0.0; + real_t q_acc_2_3 = 0.0; + real_t q_acc_2_4 = 0.0; + real_t q_acc_2_5 = 0.0; + real_t q_acc_3_0 = 0.0; + real_t q_acc_3_1 = 0.0; + real_t q_acc_3_2 = 0.0; + real_t q_acc_3_3 = 0.0; + real_t q_acc_3_4 = 0.0; + real_t q_acc_3_5 = 0.0; + real_t q_acc_4_0 = 0.0; + real_t q_acc_4_1 = 0.0; + real_t q_acc_4_2 = 0.0; + real_t q_acc_4_3 = 0.0; + real_t q_acc_4_4 = 0.0; + real_t q_acc_4_5 = 0.0; + real_t q_acc_5_0 = 0.0; + real_t q_acc_5_1 = 0.0; + real_t q_acc_5_2 = 0.0; + real_t q_acc_5_3 = 0.0; + real_t q_acc_5_4 = 0.0; + real_t q_acc_5_5 = 0.0; + for (int64_t q = 0; q < 4; q += 1) + { + const real_t tmp_qloop_7 = 4.0*_data_q_p_0[q]; + const real_t tmp_qloop_8 = 4.0*_data_q_p_1[q]; + const real_t tmp_qloop_9 = tmp_qloop_7 + tmp_qloop_8 - 3.0; + const real_t tmp_qloop_10 = tmp_qloop_7*_data_q_p_1[q]; + const real_t tmp_qloop_11 = (_data_q_p_0[q]*_data_q_p_0[q]); + const real_t tmp_qloop_12 = tmp_qloop_11*2.0; + const real_t tmp_qloop_13 = tmp_qloop_12 - _data_q_p_0[q]; + const real_t tmp_qloop_14 = (_data_q_p_1[q]*_data_q_p_1[q]); + const real_t tmp_qloop_15 = tmp_qloop_14*2.0; + const real_t tmp_qloop_16 = tmp_qloop_15 - _data_q_p_1[q]; + const real_t tmp_qloop_17 = -tmp_qloop_10 + tmp_qloop_14*-4.0 + tmp_qloop_8; + const real_t tmp_qloop_18 = -tmp_qloop_10 + tmp_qloop_11*-4.0 + tmp_qloop_7; + const real_t tmp_qloop_19 = tmp_qloop_10 + tmp_qloop_12 + tmp_qloop_15 - 3.0*_data_q_p_0[q] - 3.0*_data_q_p_1[q] + 1.0; + const real_t tmp_qloop_20 = tmp_qloop_10*wx_dof_3 + tmp_qloop_13*wx_dof_1 + tmp_qloop_16*wx_dof_2 + tmp_qloop_17*wx_dof_4 + tmp_qloop_18*wx_dof_5 + tmp_qloop_19*wx_dof_0; + const real_t tmp_qloop_21 = tmp_qloop_10*wy_dof_3 + tmp_qloop_13*wy_dof_1 + tmp_qloop_16*wy_dof_2 + tmp_qloop_17*wy_dof_4 + tmp_qloop_18*wy_dof_5 + tmp_qloop_19*wy_dof_0; + const real_t tmp_qloop_22 = abs_det_jac_affine_GRAY*(diffusivity_times_delta_dof_0*tmp_qloop_19 + diffusivity_times_delta_dof_1*tmp_qloop_13 + diffusivity_times_delta_dof_2*tmp_qloop_16 + diffusivity_times_delta_dof_3*tmp_qloop_10 + diffusivity_times_delta_dof_4*tmp_qloop_17 + diffusivity_times_delta_dof_5*tmp_qloop_18)*_data_q_w[q]; + const real_t tmp_qloop_23 = tmp_qloop_22*(tmp_qloop_20*(jac_affine_inv_0_0_GRAY*tmp_qloop_9 + jac_affine_inv_1_0_GRAY*tmp_qloop_9) + tmp_qloop_21*(jac_affine_inv_0_1_GRAY*tmp_qloop_9 + jac_affine_inv_1_1_GRAY*tmp_qloop_9)); + const real_t tmp_qloop_32 = tmp_qloop_7 - 1.0; + const real_t tmp_qloop_33 = tmp_qloop_22*(jac_affine_inv_0_0_GRAY*tmp_qloop_20*tmp_qloop_32 + jac_affine_inv_0_1_GRAY*tmp_qloop_21*tmp_qloop_32); + const real_t tmp_qloop_34 = tmp_qloop_8 - 1.0; + const real_t tmp_qloop_35 = tmp_qloop_22*(jac_affine_inv_1_0_GRAY*tmp_qloop_20*tmp_qloop_34 + jac_affine_inv_1_1_GRAY*tmp_qloop_21*tmp_qloop_34); + const real_t tmp_qloop_36 = jac_affine_inv_1_0_GRAY*tmp_qloop_7; + const real_t tmp_qloop_37 = jac_affine_inv_0_0_GRAY*tmp_qloop_8; + const real_t tmp_qloop_38 = jac_affine_inv_1_1_GRAY*tmp_qloop_7; + const real_t tmp_qloop_39 = jac_affine_inv_0_1_GRAY*tmp_qloop_8; + const real_t tmp_qloop_40 = tmp_qloop_22*(tmp_qloop_20*(tmp_qloop_36 + tmp_qloop_37) + tmp_qloop_21*(tmp_qloop_38 + tmp_qloop_39)); + const real_t tmp_qloop_41 = -tmp_qloop_7 - 8.0*_data_q_p_1[q] + 4.0; + const real_t tmp_qloop_42 = tmp_qloop_22*(tmp_qloop_20*(jac_affine_inv_1_0_GRAY*tmp_qloop_41 - tmp_qloop_37) + tmp_qloop_21*(jac_affine_inv_1_1_GRAY*tmp_qloop_41 - tmp_qloop_39)); + const real_t tmp_qloop_43 = -tmp_qloop_8 - 8.0*_data_q_p_0[q] + 4.0; + const real_t tmp_qloop_44 = tmp_qloop_22*(tmp_qloop_20*(jac_affine_inv_0_0_GRAY*tmp_qloop_43 - tmp_qloop_36) + tmp_qloop_21*(jac_affine_inv_0_1_GRAY*tmp_qloop_43 - tmp_qloop_38)); + const real_t q_tmp_0_0 = tmp_qloop_23*tmp_qloop_6; + const real_t q_tmp_0_1 = tmp_qloop_23*tmp_qloop_24; + const real_t q_tmp_0_2 = tmp_qloop_23*tmp_qloop_25; + const real_t q_tmp_0_3 = tmp_qloop_23*tmp_qloop_28; + const real_t q_tmp_0_4 = tmp_qloop_23*tmp_qloop_30; + const real_t q_tmp_0_5 = tmp_qloop_23*tmp_qloop_31; + const real_t q_tmp_1_0 = tmp_qloop_33*tmp_qloop_6; + const real_t q_tmp_1_1 = tmp_qloop_24*tmp_qloop_33; + const real_t q_tmp_1_2 = tmp_qloop_25*tmp_qloop_33; + const real_t q_tmp_1_3 = tmp_qloop_28*tmp_qloop_33; + const real_t q_tmp_1_4 = tmp_qloop_30*tmp_qloop_33; + const real_t q_tmp_1_5 = tmp_qloop_31*tmp_qloop_33; + const real_t q_tmp_2_0 = tmp_qloop_35*tmp_qloop_6; + const real_t q_tmp_2_1 = tmp_qloop_24*tmp_qloop_35; + const real_t q_tmp_2_2 = tmp_qloop_25*tmp_qloop_35; + const real_t q_tmp_2_3 = tmp_qloop_28*tmp_qloop_35; + const real_t q_tmp_2_4 = tmp_qloop_30*tmp_qloop_35; + const real_t q_tmp_2_5 = tmp_qloop_31*tmp_qloop_35; + const real_t q_tmp_3_0 = tmp_qloop_40*tmp_qloop_6; + const real_t q_tmp_3_1 = tmp_qloop_24*tmp_qloop_40; + const real_t q_tmp_3_2 = tmp_qloop_25*tmp_qloop_40; + const real_t q_tmp_3_3 = tmp_qloop_28*tmp_qloop_40; + const real_t q_tmp_3_4 = tmp_qloop_30*tmp_qloop_40; + const real_t q_tmp_3_5 = tmp_qloop_31*tmp_qloop_40; + const real_t q_tmp_4_0 = tmp_qloop_42*tmp_qloop_6; + const real_t q_tmp_4_1 = tmp_qloop_24*tmp_qloop_42; + const real_t q_tmp_4_2 = tmp_qloop_25*tmp_qloop_42; + const real_t q_tmp_4_3 = tmp_qloop_28*tmp_qloop_42; + const real_t q_tmp_4_4 = tmp_qloop_30*tmp_qloop_42; + const real_t q_tmp_4_5 = tmp_qloop_31*tmp_qloop_42; + const real_t q_tmp_5_0 = tmp_qloop_44*tmp_qloop_6; + const real_t q_tmp_5_1 = tmp_qloop_24*tmp_qloop_44; + const real_t q_tmp_5_2 = tmp_qloop_25*tmp_qloop_44; + const real_t q_tmp_5_3 = tmp_qloop_28*tmp_qloop_44; + const real_t q_tmp_5_4 = tmp_qloop_30*tmp_qloop_44; + const real_t q_tmp_5_5 = tmp_qloop_31*tmp_qloop_44; + q_acc_0_0 = q_acc_0_0 + q_tmp_0_0; + q_acc_0_1 = q_acc_0_1 + q_tmp_0_1; + q_acc_0_2 = q_acc_0_2 + q_tmp_0_2; + q_acc_0_3 = q_acc_0_3 + q_tmp_0_3; + q_acc_0_4 = q_acc_0_4 + q_tmp_0_4; + q_acc_0_5 = q_acc_0_5 + q_tmp_0_5; + q_acc_1_0 = q_acc_1_0 + q_tmp_1_0; + q_acc_1_1 = q_acc_1_1 + q_tmp_1_1; + q_acc_1_2 = q_acc_1_2 + q_tmp_1_2; + q_acc_1_3 = q_acc_1_3 + q_tmp_1_3; + q_acc_1_4 = q_acc_1_4 + q_tmp_1_4; + q_acc_1_5 = q_acc_1_5 + q_tmp_1_5; + q_acc_2_0 = q_acc_2_0 + q_tmp_2_0; + q_acc_2_1 = q_acc_2_1 + q_tmp_2_1; + q_acc_2_2 = q_acc_2_2 + q_tmp_2_2; + q_acc_2_3 = q_acc_2_3 + q_tmp_2_3; + q_acc_2_4 = q_acc_2_4 + q_tmp_2_4; + q_acc_2_5 = q_acc_2_5 + q_tmp_2_5; + q_acc_3_0 = q_acc_3_0 + q_tmp_3_0; + q_acc_3_1 = q_acc_3_1 + q_tmp_3_1; + q_acc_3_2 = q_acc_3_2 + q_tmp_3_2; + q_acc_3_3 = q_acc_3_3 + q_tmp_3_3; + q_acc_3_4 = q_acc_3_4 + q_tmp_3_4; + q_acc_3_5 = q_acc_3_5 + q_tmp_3_5; + q_acc_4_0 = q_acc_4_0 + q_tmp_4_0; + q_acc_4_1 = q_acc_4_1 + q_tmp_4_1; + q_acc_4_2 = q_acc_4_2 + q_tmp_4_2; + q_acc_4_3 = q_acc_4_3 + q_tmp_4_3; + q_acc_4_4 = q_acc_4_4 + q_tmp_4_4; + q_acc_4_5 = q_acc_4_5 + q_tmp_4_5; + q_acc_5_0 = q_acc_5_0 + q_tmp_5_0; + q_acc_5_1 = q_acc_5_1 + q_tmp_5_1; + q_acc_5_2 = q_acc_5_2 + q_tmp_5_2; + q_acc_5_3 = q_acc_5_3 + q_tmp_5_3; + q_acc_5_4 = q_acc_5_4 + q_tmp_5_4; + q_acc_5_5 = q_acc_5_5 + q_tmp_5_5; + } + const real_t elMat_0_0 = q_acc_0_0; + const real_t elMat_0_1 = q_acc_0_1; + const real_t elMat_0_2 = q_acc_0_2; + const real_t elMat_0_3 = q_acc_0_3; + const real_t elMat_0_4 = q_acc_0_4; + const real_t elMat_0_5 = q_acc_0_5; + const real_t elMat_1_0 = q_acc_1_0; + const real_t elMat_1_1 = q_acc_1_1; + const real_t elMat_1_2 = q_acc_1_2; + const real_t elMat_1_3 = q_acc_1_3; + const real_t elMat_1_4 = q_acc_1_4; + const real_t elMat_1_5 = q_acc_1_5; + const real_t elMat_2_0 = q_acc_2_0; + const real_t elMat_2_1 = q_acc_2_1; + const real_t elMat_2_2 = q_acc_2_2; + const real_t elMat_2_3 = q_acc_2_3; + const real_t elMat_2_4 = q_acc_2_4; + const real_t elMat_2_5 = q_acc_2_5; + const real_t elMat_3_0 = q_acc_3_0; + const real_t elMat_3_1 = q_acc_3_1; + const real_t elMat_3_2 = q_acc_3_2; + const real_t elMat_3_3 = q_acc_3_3; + const real_t elMat_3_4 = q_acc_3_4; + const real_t elMat_3_5 = q_acc_3_5; + const real_t elMat_4_0 = q_acc_4_0; + const real_t elMat_4_1 = q_acc_4_1; + const real_t elMat_4_2 = q_acc_4_2; + const real_t elMat_4_3 = q_acc_4_3; + const real_t elMat_4_4 = q_acc_4_4; + const real_t elMat_4_5 = q_acc_4_5; + const real_t elMat_5_0 = q_acc_5_0; + const real_t elMat_5_1 = q_acc_5_1; + const real_t elMat_5_2 = q_acc_5_2; + const real_t elMat_5_3 = q_acc_5_3; + const real_t elMat_5_4 = q_acc_5_4; + const real_t elMat_5_5 = q_acc_5_5; + + std::vector< uint_t > _data_rowIdx( 6 ); + std::vector< uint_t > _data_colIdx( 6 ); + std::vector< real_t > _data_mat( 36 ); + + _data_rowIdx[0] = ((uint64_t)(_data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))])); + _data_rowIdx[1] = ((uint64_t)(_data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1])); + _data_rowIdx[2] = ((uint64_t)(_data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))])); + _data_rowIdx[3] = ((uint64_t)(_data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))])); + _data_rowIdx[4] = ((uint64_t)(_data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))])); + _data_rowIdx[5] = ((uint64_t)(_data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))])); + _data_colIdx[0] = ((uint64_t)(_data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2))])); + _data_colIdx[1] = ((uint64_t)(_data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1])); + _data_colIdx[2] = ((uint64_t)(_data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))])); + _data_colIdx[3] = ((uint64_t)(_data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))])); + _data_colIdx[4] = ((uint64_t)(_data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))])); + _data_colIdx[5] = ((uint64_t)(_data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2))])); + + /* Apply basis transformation */ + + + + _data_mat[0] = ((real_t)(elMat_0_0)); + _data_mat[1] = ((real_t)(elMat_0_1)); + _data_mat[2] = ((real_t)(elMat_0_2)); + _data_mat[3] = ((real_t)(elMat_0_3)); + _data_mat[4] = ((real_t)(elMat_0_4)); + _data_mat[5] = ((real_t)(elMat_0_5)); + _data_mat[6] = ((real_t)(elMat_1_0)); + _data_mat[7] = ((real_t)(elMat_1_1)); + _data_mat[8] = ((real_t)(elMat_1_2)); + _data_mat[9] = ((real_t)(elMat_1_3)); + _data_mat[10] = ((real_t)(elMat_1_4)); + _data_mat[11] = ((real_t)(elMat_1_5)); + _data_mat[12] = ((real_t)(elMat_2_0)); + _data_mat[13] = ((real_t)(elMat_2_1)); + _data_mat[14] = ((real_t)(elMat_2_2)); + _data_mat[15] = ((real_t)(elMat_2_3)); + _data_mat[16] = ((real_t)(elMat_2_4)); + _data_mat[17] = ((real_t)(elMat_2_5)); + _data_mat[18] = ((real_t)(elMat_3_0)); + _data_mat[19] = ((real_t)(elMat_3_1)); + _data_mat[20] = ((real_t)(elMat_3_2)); + _data_mat[21] = ((real_t)(elMat_3_3)); + _data_mat[22] = ((real_t)(elMat_3_4)); + _data_mat[23] = ((real_t)(elMat_3_5)); + _data_mat[24] = ((real_t)(elMat_4_0)); + _data_mat[25] = ((real_t)(elMat_4_1)); + _data_mat[26] = ((real_t)(elMat_4_2)); + _data_mat[27] = ((real_t)(elMat_4_3)); + _data_mat[28] = ((real_t)(elMat_4_4)); + _data_mat[29] = ((real_t)(elMat_4_5)); + _data_mat[30] = ((real_t)(elMat_5_0)); + _data_mat[31] = ((real_t)(elMat_5_1)); + _data_mat[32] = ((real_t)(elMat_5_2)); + _data_mat[33] = ((real_t)(elMat_5_3)); + _data_mat[34] = ((real_t)(elMat_5_4)); + _data_mat[35] = ((real_t)(elMat_5_5)); + + + mat->addValues( _data_rowIdx, _data_colIdx, _data_mat ); + } + } + const real_t tmp_coords_jac_0_BLUE = 1.0 / (micro_edges_per_macro_edge_float)*1.0; + const real_t tmp_coords_jac_1_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0); + const real_t tmp_coords_jac_2_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1); + const real_t tmp_coords_jac_3_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0); + const real_t tmp_coords_jac_4_BLUE = tmp_coords_jac_0_BLUE*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1); + const real_t p_affine_const_0_0_BLUE = tmp_coords_jac_1_BLUE; + const real_t p_affine_const_0_1_BLUE = tmp_coords_jac_2_BLUE; + const real_t p_affine_const_1_0_BLUE = macro_vertex_coord_id_0comp0 + tmp_coords_jac_3_BLUE; + const real_t p_affine_const_1_1_BLUE = macro_vertex_coord_id_0comp1 + tmp_coords_jac_4_BLUE; + const real_t p_affine_const_2_0_BLUE = tmp_coords_jac_1_BLUE + tmp_coords_jac_3_BLUE; + const real_t p_affine_const_2_1_BLUE = tmp_coords_jac_2_BLUE + tmp_coords_jac_4_BLUE; + const real_t jac_affine_0_0_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_1_0_BLUE; + const real_t jac_affine_0_1_BLUE = -p_affine_const_0_0_BLUE + p_affine_const_2_0_BLUE; + const real_t jac_affine_1_0_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_1_1_BLUE; + const real_t jac_affine_1_1_BLUE = -p_affine_const_0_1_BLUE + p_affine_const_2_1_BLUE; + const real_t tmp_coords_jac_5_BLUE = jac_affine_0_0_BLUE*jac_affine_1_1_BLUE - jac_affine_0_1_BLUE*jac_affine_1_0_BLUE; + const real_t tmp_coords_jac_6_BLUE = 1.0 / (tmp_coords_jac_5_BLUE); + const real_t jac_affine_inv_0_0_BLUE = jac_affine_1_1_BLUE*tmp_coords_jac_6_BLUE; + const real_t jac_affine_inv_0_1_BLUE = -jac_affine_0_1_BLUE*tmp_coords_jac_6_BLUE; + const real_t jac_affine_inv_1_0_BLUE = -jac_affine_1_0_BLUE*tmp_coords_jac_6_BLUE; + const real_t jac_affine_inv_1_1_BLUE = jac_affine_0_0_BLUE*tmp_coords_jac_6_BLUE; + const real_t abs_det_jac_affine_BLUE = abs(tmp_coords_jac_5_BLUE); + const real_t tmp_moved_constant_0 = jac_affine_inv_0_0_BLUE*4.0; + const real_t tmp_moved_constant_1 = jac_affine_inv_1_0_BLUE*4.0; + const real_t tmp_moved_constant_2 = tmp_moved_constant_0 + tmp_moved_constant_1; + const real_t tmp_moved_constant_3 = jac_affine_inv_0_1_BLUE*4.0; + const real_t tmp_moved_constant_4 = jac_affine_inv_1_1_BLUE*4.0; + const real_t tmp_moved_constant_5 = tmp_moved_constant_3 + tmp_moved_constant_4; + const real_t tmp_moved_constant_6 = jac_affine_inv_0_0_BLUE*tmp_moved_constant_2 + jac_affine_inv_0_1_BLUE*tmp_moved_constant_5 + jac_affine_inv_1_0_BLUE*tmp_moved_constant_2 + jac_affine_inv_1_1_BLUE*tmp_moved_constant_5; + const real_t tmp_moved_constant_7 = (jac_affine_inv_0_0_BLUE*jac_affine_inv_0_0_BLUE)*4.0 + (jac_affine_inv_0_1_BLUE*jac_affine_inv_0_1_BLUE)*4.0; + const real_t tmp_moved_constant_8 = (jac_affine_inv_1_0_BLUE*jac_affine_inv_1_0_BLUE)*4.0 + (jac_affine_inv_1_1_BLUE*jac_affine_inv_1_1_BLUE)*4.0; + const real_t tmp_moved_constant_9 = jac_affine_inv_1_0_BLUE*8.0; + const real_t tmp_moved_constant_10 = jac_affine_inv_1_1_BLUE*8.0; + const real_t tmp_moved_constant_11 = jac_affine_inv_0_0_BLUE*tmp_moved_constant_9 + jac_affine_inv_0_1_BLUE*tmp_moved_constant_10; + const real_t tmp_moved_constant_12 = jac_affine_inv_1_0_BLUE*tmp_moved_constant_0 + jac_affine_inv_1_1_BLUE*tmp_moved_constant_3; + const real_t tmp_moved_constant_13 = jac_affine_inv_1_0_BLUE*(-tmp_moved_constant_0 - tmp_moved_constant_9) + jac_affine_inv_1_1_BLUE*(-tmp_moved_constant_10 - tmp_moved_constant_3) - tmp_moved_constant_12; + const real_t tmp_moved_constant_14 = jac_affine_inv_0_0_BLUE*(jac_affine_inv_0_0_BLUE*-8.0 - tmp_moved_constant_1) + jac_affine_inv_0_1_BLUE*(jac_affine_inv_0_1_BLUE*-8.0 - tmp_moved_constant_4) - tmp_moved_constant_12; + { + /* FaceType.BLUE */ + for (int64_t ctr_1 = 0; ctr_1 < micro_edges_per_macro_edge; ctr_1 += 1) + for (int64_t ctr_0 = 0; ctr_0 < -ctr_1 + micro_edges_per_macro_edge - 1; ctr_0 += 1) + { + + const int64_t phantom_ctr_0 = ctr_0; + real_t _data_float_loop_ctr_array_dim_0[4]; + _data_float_loop_ctr_array_dim_0[0] = (real_t) ctr_0+ 0; + _data_float_loop_ctr_array_dim_0[1] = (real_t) ctr_0+ 1; + _data_float_loop_ctr_array_dim_0[2] = (real_t) ctr_0+ 2; + _data_float_loop_ctr_array_dim_0[3] = (real_t) ctr_0+ 3; + real_t _data_float_loop_ctr_array_dim_1[4]; + _data_float_loop_ctr_array_dim_1[0] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[1] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[2] = (real_t) ctr_1; + _data_float_loop_ctr_array_dim_1[3] = (real_t) ctr_1; + + const real_t p_affine_0_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_0_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*1.0*_data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0]; + const real_t p_affine_1_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_1_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*1.0*_data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0] + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_2_0 = macro_vertex_coord_id_0comp0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_1comp0)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp0 + macro_vertex_coord_id_2comp0)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; + const real_t p_affine_2_1 = macro_vertex_coord_id_0comp1 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_1comp1)*(1.0 + _data_float_loop_ctr_array_dim_0[ctr_0 - phantom_ctr_0])*1.0 + 1.0 / (micro_edges_per_macro_edge_float)*(-macro_vertex_coord_id_0comp1 + macro_vertex_coord_id_2comp1)*(1.0 + _data_float_loop_ctr_array_dim_1[ctr_0 - phantom_ctr_0])*1.0; + const real_t diffusivity_times_delta_dof_0 = _data_diffusivity_times_deltaVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + const real_t diffusivity_times_delta_dof_1 = _data_diffusivity_times_deltaVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t diffusivity_times_delta_dof_2 = _data_diffusivity_times_deltaVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]; + const real_t diffusivity_times_delta_dof_3 = _data_diffusivity_times_deltaEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t diffusivity_times_delta_dof_4 = _data_diffusivity_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]; + const real_t diffusivity_times_delta_dof_5 = _data_diffusivity_times_deltaEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t wx_dof_0 = _data_wxVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + const real_t wx_dof_1 = _data_wxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t wx_dof_2 = _data_wxVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]; + const real_t wx_dof_3 = _data_wxEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t wx_dof_4 = _data_wxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]; + const real_t wx_dof_5 = _data_wxEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + const real_t wy_dof_0 = _data_wyVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1]; + const real_t wy_dof_1 = _data_wyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t wy_dof_2 = _data_wyVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1]; + const real_t wy_dof_3 = _data_wyEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))]; + const real_t wy_dof_4 = _data_wyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1]; + const real_t wy_dof_5 = _data_wyEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))]; + real_t q_acc_0_0 = 0.0; + real_t q_acc_0_1 = 0.0; + real_t q_acc_0_2 = 0.0; + real_t q_acc_0_3 = 0.0; + real_t q_acc_0_4 = 0.0; + real_t q_acc_0_5 = 0.0; + real_t q_acc_1_0 = 0.0; + real_t q_acc_1_1 = 0.0; + real_t q_acc_1_2 = 0.0; + real_t q_acc_1_3 = 0.0; + real_t q_acc_1_4 = 0.0; + real_t q_acc_1_5 = 0.0; + real_t q_acc_2_0 = 0.0; + real_t q_acc_2_1 = 0.0; + real_t q_acc_2_2 = 0.0; + real_t q_acc_2_3 = 0.0; + real_t q_acc_2_4 = 0.0; + real_t q_acc_2_5 = 0.0; + real_t q_acc_3_0 = 0.0; + real_t q_acc_3_1 = 0.0; + real_t q_acc_3_2 = 0.0; + real_t q_acc_3_3 = 0.0; + real_t q_acc_3_4 = 0.0; + real_t q_acc_3_5 = 0.0; + real_t q_acc_4_0 = 0.0; + real_t q_acc_4_1 = 0.0; + real_t q_acc_4_2 = 0.0; + real_t q_acc_4_3 = 0.0; + real_t q_acc_4_4 = 0.0; + real_t q_acc_4_5 = 0.0; + real_t q_acc_5_0 = 0.0; + real_t q_acc_5_1 = 0.0; + real_t q_acc_5_2 = 0.0; + real_t q_acc_5_3 = 0.0; + real_t q_acc_5_4 = 0.0; + real_t q_acc_5_5 = 0.0; + for (int64_t q = 0; q < 4; q += 1) + { + const real_t tmp_qloop_7 = 4.0*_data_q_p_0[q]; + const real_t tmp_qloop_8 = 4.0*_data_q_p_1[q]; + const real_t tmp_qloop_9 = tmp_qloop_7 + tmp_qloop_8 - 3.0; + const real_t tmp_qloop_10 = tmp_qloop_7*_data_q_p_1[q]; + const real_t tmp_qloop_11 = (_data_q_p_0[q]*_data_q_p_0[q]); + const real_t tmp_qloop_12 = tmp_qloop_11*2.0; + const real_t tmp_qloop_13 = tmp_qloop_12 - _data_q_p_0[q]; + const real_t tmp_qloop_14 = (_data_q_p_1[q]*_data_q_p_1[q]); + const real_t tmp_qloop_15 = tmp_qloop_14*2.0; + const real_t tmp_qloop_16 = tmp_qloop_15 - _data_q_p_1[q]; + const real_t tmp_qloop_17 = -tmp_qloop_10 + tmp_qloop_14*-4.0 + tmp_qloop_8; + const real_t tmp_qloop_18 = -tmp_qloop_10 + tmp_qloop_11*-4.0 + tmp_qloop_7; + const real_t tmp_qloop_19 = tmp_qloop_10 + tmp_qloop_12 + tmp_qloop_15 - 3.0*_data_q_p_0[q] - 3.0*_data_q_p_1[q] + 1.0; + const real_t tmp_qloop_20 = tmp_qloop_10*wx_dof_3 + tmp_qloop_13*wx_dof_1 + tmp_qloop_16*wx_dof_2 + tmp_qloop_17*wx_dof_4 + tmp_qloop_18*wx_dof_5 + tmp_qloop_19*wx_dof_0; + const real_t tmp_qloop_21 = tmp_qloop_10*wy_dof_3 + tmp_qloop_13*wy_dof_1 + tmp_qloop_16*wy_dof_2 + tmp_qloop_17*wy_dof_4 + tmp_qloop_18*wy_dof_5 + tmp_qloop_19*wy_dof_0; + const real_t tmp_qloop_22 = abs_det_jac_affine_BLUE*(diffusivity_times_delta_dof_0*tmp_qloop_19 + diffusivity_times_delta_dof_1*tmp_qloop_13 + diffusivity_times_delta_dof_2*tmp_qloop_16 + diffusivity_times_delta_dof_3*tmp_qloop_10 + diffusivity_times_delta_dof_4*tmp_qloop_17 + diffusivity_times_delta_dof_5*tmp_qloop_18)*_data_q_w[q]; + const real_t tmp_qloop_23 = tmp_qloop_22*(tmp_qloop_20*(jac_affine_inv_0_0_BLUE*tmp_qloop_9 + jac_affine_inv_1_0_BLUE*tmp_qloop_9) + tmp_qloop_21*(jac_affine_inv_0_1_BLUE*tmp_qloop_9 + jac_affine_inv_1_1_BLUE*tmp_qloop_9)); + const real_t tmp_qloop_32 = tmp_qloop_7 - 1.0; + const real_t tmp_qloop_33 = tmp_qloop_22*(jac_affine_inv_0_0_BLUE*tmp_qloop_20*tmp_qloop_32 + jac_affine_inv_0_1_BLUE*tmp_qloop_21*tmp_qloop_32); + const real_t tmp_qloop_34 = tmp_qloop_8 - 1.0; + const real_t tmp_qloop_35 = tmp_qloop_22*(jac_affine_inv_1_0_BLUE*tmp_qloop_20*tmp_qloop_34 + jac_affine_inv_1_1_BLUE*tmp_qloop_21*tmp_qloop_34); + const real_t tmp_qloop_36 = jac_affine_inv_1_0_BLUE*tmp_qloop_7; + const real_t tmp_qloop_37 = jac_affine_inv_0_0_BLUE*tmp_qloop_8; + const real_t tmp_qloop_38 = jac_affine_inv_1_1_BLUE*tmp_qloop_7; + const real_t tmp_qloop_39 = jac_affine_inv_0_1_BLUE*tmp_qloop_8; + const real_t tmp_qloop_40 = tmp_qloop_22*(tmp_qloop_20*(tmp_qloop_36 + tmp_qloop_37) + tmp_qloop_21*(tmp_qloop_38 + tmp_qloop_39)); + const real_t tmp_qloop_41 = -tmp_qloop_7 - 8.0*_data_q_p_1[q] + 4.0; + const real_t tmp_qloop_42 = tmp_qloop_22*(tmp_qloop_20*(jac_affine_inv_1_0_BLUE*tmp_qloop_41 - tmp_qloop_37) + tmp_qloop_21*(jac_affine_inv_1_1_BLUE*tmp_qloop_41 - tmp_qloop_39)); + const real_t tmp_qloop_43 = -tmp_qloop_8 - 8.0*_data_q_p_0[q] + 4.0; + const real_t tmp_qloop_44 = tmp_qloop_22*(tmp_qloop_20*(jac_affine_inv_0_0_BLUE*tmp_qloop_43 - tmp_qloop_36) + tmp_qloop_21*(jac_affine_inv_0_1_BLUE*tmp_qloop_43 - tmp_qloop_38)); + const real_t q_tmp_0_0 = tmp_moved_constant_6*tmp_qloop_23; + const real_t q_tmp_0_1 = tmp_moved_constant_7*tmp_qloop_23; + const real_t q_tmp_0_2 = tmp_moved_constant_8*tmp_qloop_23; + const real_t q_tmp_0_3 = tmp_moved_constant_11*tmp_qloop_23; + const real_t q_tmp_0_4 = tmp_moved_constant_13*tmp_qloop_23; + const real_t q_tmp_0_5 = tmp_moved_constant_14*tmp_qloop_23; + const real_t q_tmp_1_0 = tmp_moved_constant_6*tmp_qloop_33; + const real_t q_tmp_1_1 = tmp_moved_constant_7*tmp_qloop_33; + const real_t q_tmp_1_2 = tmp_moved_constant_8*tmp_qloop_33; + const real_t q_tmp_1_3 = tmp_moved_constant_11*tmp_qloop_33; + const real_t q_tmp_1_4 = tmp_moved_constant_13*tmp_qloop_33; + const real_t q_tmp_1_5 = tmp_moved_constant_14*tmp_qloop_33; + const real_t q_tmp_2_0 = tmp_moved_constant_6*tmp_qloop_35; + const real_t q_tmp_2_1 = tmp_moved_constant_7*tmp_qloop_35; + const real_t q_tmp_2_2 = tmp_moved_constant_8*tmp_qloop_35; + const real_t q_tmp_2_3 = tmp_moved_constant_11*tmp_qloop_35; + const real_t q_tmp_2_4 = tmp_moved_constant_13*tmp_qloop_35; + const real_t q_tmp_2_5 = tmp_moved_constant_14*tmp_qloop_35; + const real_t q_tmp_3_0 = tmp_moved_constant_6*tmp_qloop_40; + const real_t q_tmp_3_1 = tmp_moved_constant_7*tmp_qloop_40; + const real_t q_tmp_3_2 = tmp_moved_constant_8*tmp_qloop_40; + const real_t q_tmp_3_3 = tmp_moved_constant_11*tmp_qloop_40; + const real_t q_tmp_3_4 = tmp_moved_constant_13*tmp_qloop_40; + const real_t q_tmp_3_5 = tmp_moved_constant_14*tmp_qloop_40; + const real_t q_tmp_4_0 = tmp_moved_constant_6*tmp_qloop_42; + const real_t q_tmp_4_1 = tmp_moved_constant_7*tmp_qloop_42; + const real_t q_tmp_4_2 = tmp_moved_constant_8*tmp_qloop_42; + const real_t q_tmp_4_3 = tmp_moved_constant_11*tmp_qloop_42; + const real_t q_tmp_4_4 = tmp_moved_constant_13*tmp_qloop_42; + const real_t q_tmp_4_5 = tmp_moved_constant_14*tmp_qloop_42; + const real_t q_tmp_5_0 = tmp_moved_constant_6*tmp_qloop_44; + const real_t q_tmp_5_1 = tmp_moved_constant_7*tmp_qloop_44; + const real_t q_tmp_5_2 = tmp_moved_constant_8*tmp_qloop_44; + const real_t q_tmp_5_3 = tmp_moved_constant_11*tmp_qloop_44; + const real_t q_tmp_5_4 = tmp_moved_constant_13*tmp_qloop_44; + const real_t q_tmp_5_5 = tmp_moved_constant_14*tmp_qloop_44; + q_acc_0_0 = q_acc_0_0 + q_tmp_0_0; + q_acc_0_1 = q_acc_0_1 + q_tmp_0_1; + q_acc_0_2 = q_acc_0_2 + q_tmp_0_2; + q_acc_0_3 = q_acc_0_3 + q_tmp_0_3; + q_acc_0_4 = q_acc_0_4 + q_tmp_0_4; + q_acc_0_5 = q_acc_0_5 + q_tmp_0_5; + q_acc_1_0 = q_acc_1_0 + q_tmp_1_0; + q_acc_1_1 = q_acc_1_1 + q_tmp_1_1; + q_acc_1_2 = q_acc_1_2 + q_tmp_1_2; + q_acc_1_3 = q_acc_1_3 + q_tmp_1_3; + q_acc_1_4 = q_acc_1_4 + q_tmp_1_4; + q_acc_1_5 = q_acc_1_5 + q_tmp_1_5; + q_acc_2_0 = q_acc_2_0 + q_tmp_2_0; + q_acc_2_1 = q_acc_2_1 + q_tmp_2_1; + q_acc_2_2 = q_acc_2_2 + q_tmp_2_2; + q_acc_2_3 = q_acc_2_3 + q_tmp_2_3; + q_acc_2_4 = q_acc_2_4 + q_tmp_2_4; + q_acc_2_5 = q_acc_2_5 + q_tmp_2_5; + q_acc_3_0 = q_acc_3_0 + q_tmp_3_0; + q_acc_3_1 = q_acc_3_1 + q_tmp_3_1; + q_acc_3_2 = q_acc_3_2 + q_tmp_3_2; + q_acc_3_3 = q_acc_3_3 + q_tmp_3_3; + q_acc_3_4 = q_acc_3_4 + q_tmp_3_4; + q_acc_3_5 = q_acc_3_5 + q_tmp_3_5; + q_acc_4_0 = q_acc_4_0 + q_tmp_4_0; + q_acc_4_1 = q_acc_4_1 + q_tmp_4_1; + q_acc_4_2 = q_acc_4_2 + q_tmp_4_2; + q_acc_4_3 = q_acc_4_3 + q_tmp_4_3; + q_acc_4_4 = q_acc_4_4 + q_tmp_4_4; + q_acc_4_5 = q_acc_4_5 + q_tmp_4_5; + q_acc_5_0 = q_acc_5_0 + q_tmp_5_0; + q_acc_5_1 = q_acc_5_1 + q_tmp_5_1; + q_acc_5_2 = q_acc_5_2 + q_tmp_5_2; + q_acc_5_3 = q_acc_5_3 + q_tmp_5_3; + q_acc_5_4 = q_acc_5_4 + q_tmp_5_4; + q_acc_5_5 = q_acc_5_5 + q_tmp_5_5; + } + const real_t elMat_0_0 = q_acc_0_0; + const real_t elMat_0_1 = q_acc_0_1; + const real_t elMat_0_2 = q_acc_0_2; + const real_t elMat_0_3 = q_acc_0_3; + const real_t elMat_0_4 = q_acc_0_4; + const real_t elMat_0_5 = q_acc_0_5; + const real_t elMat_1_0 = q_acc_1_0; + const real_t elMat_1_1 = q_acc_1_1; + const real_t elMat_1_2 = q_acc_1_2; + const real_t elMat_1_3 = q_acc_1_3; + const real_t elMat_1_4 = q_acc_1_4; + const real_t elMat_1_5 = q_acc_1_5; + const real_t elMat_2_0 = q_acc_2_0; + const real_t elMat_2_1 = q_acc_2_1; + const real_t elMat_2_2 = q_acc_2_2; + const real_t elMat_2_3 = q_acc_2_3; + const real_t elMat_2_4 = q_acc_2_4; + const real_t elMat_2_5 = q_acc_2_5; + const real_t elMat_3_0 = q_acc_3_0; + const real_t elMat_3_1 = q_acc_3_1; + const real_t elMat_3_2 = q_acc_3_2; + const real_t elMat_3_3 = q_acc_3_3; + const real_t elMat_3_4 = q_acc_3_4; + const real_t elMat_3_5 = q_acc_3_5; + const real_t elMat_4_0 = q_acc_4_0; + const real_t elMat_4_1 = q_acc_4_1; + const real_t elMat_4_2 = q_acc_4_2; + const real_t elMat_4_3 = q_acc_4_3; + const real_t elMat_4_4 = q_acc_4_4; + const real_t elMat_4_5 = q_acc_4_5; + const real_t elMat_5_0 = q_acc_5_0; + const real_t elMat_5_1 = q_acc_5_1; + const real_t elMat_5_2 = q_acc_5_2; + const real_t elMat_5_3 = q_acc_5_3; + const real_t elMat_5_4 = q_acc_5_4; + const real_t elMat_5_5 = q_acc_5_5; + + std::vector< uint_t > _data_rowIdx( 6 ); + std::vector< uint_t > _data_colIdx( 6 ); + std::vector< real_t > _data_mat( 36 ); + + _data_rowIdx[0] = ((uint64_t)(_data_dstVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1])); + _data_rowIdx[1] = ((uint64_t)(_data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))])); + _data_rowIdx[2] = ((uint64_t)(_data_dstVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1])); + _data_rowIdx[3] = ((uint64_t)(_data_dstEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))])); + _data_rowIdx[4] = ((uint64_t)(_data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1])); + _data_rowIdx[5] = ((uint64_t)(_data_dstEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))])); + _data_colIdx[0] = ((uint64_t)(_data_srcVertex[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 2) - ((ctr_1*(ctr_1 + 1)) / (2)) + 1])); + _data_colIdx[1] = ((uint64_t)(_data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))])); + _data_colIdx[2] = ((uint64_t)(_data_srcVertex[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 2) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2)) + 1])); + _data_colIdx[3] = ((uint64_t)(_data_srcEdge[ctr_0 + (ctr_1 + 1)*(micro_edges_per_macro_edge + 1) - (((ctr_1 + 1)*(ctr_1 + 2)) / (2))])); + _data_colIdx[4] = ((uint64_t)(_data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + 2*((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2)) + 1])); + _data_colIdx[5] = ((uint64_t)(_data_srcEdge[ctr_0 + ctr_1*(micro_edges_per_macro_edge + 1) - ((ctr_1*(ctr_1 + 1)) / (2)) + ((micro_edges_per_macro_edge*(micro_edges_per_macro_edge + 1)) / (2))])); + + /* Apply basis transformation */ + + + + _data_mat[0] = ((real_t)(elMat_0_0)); + _data_mat[1] = ((real_t)(elMat_0_1)); + _data_mat[2] = ((real_t)(elMat_0_2)); + _data_mat[3] = ((real_t)(elMat_0_3)); + _data_mat[4] = ((real_t)(elMat_0_4)); + _data_mat[5] = ((real_t)(elMat_0_5)); + _data_mat[6] = ((real_t)(elMat_1_0)); + _data_mat[7] = ((real_t)(elMat_1_1)); + _data_mat[8] = ((real_t)(elMat_1_2)); + _data_mat[9] = ((real_t)(elMat_1_3)); + _data_mat[10] = ((real_t)(elMat_1_4)); + _data_mat[11] = ((real_t)(elMat_1_5)); + _data_mat[12] = ((real_t)(elMat_2_0)); + _data_mat[13] = ((real_t)(elMat_2_1)); + _data_mat[14] = ((real_t)(elMat_2_2)); + _data_mat[15] = ((real_t)(elMat_2_3)); + _data_mat[16] = ((real_t)(elMat_2_4)); + _data_mat[17] = ((real_t)(elMat_2_5)); + _data_mat[18] = ((real_t)(elMat_3_0)); + _data_mat[19] = ((real_t)(elMat_3_1)); + _data_mat[20] = ((real_t)(elMat_3_2)); + _data_mat[21] = ((real_t)(elMat_3_3)); + _data_mat[22] = ((real_t)(elMat_3_4)); + _data_mat[23] = ((real_t)(elMat_3_5)); + _data_mat[24] = ((real_t)(elMat_4_0)); + _data_mat[25] = ((real_t)(elMat_4_1)); + _data_mat[26] = ((real_t)(elMat_4_2)); + _data_mat[27] = ((real_t)(elMat_4_3)); + _data_mat[28] = ((real_t)(elMat_4_4)); + _data_mat[29] = ((real_t)(elMat_4_5)); + _data_mat[30] = ((real_t)(elMat_5_0)); + _data_mat[31] = ((real_t)(elMat_5_1)); + _data_mat[32] = ((real_t)(elMat_5_2)); + _data_mat[33] = ((real_t)(elMat_5_3)); + _data_mat[34] = ((real_t)(elMat_5_4)); + _data_mat[35] = ((real_t)(elMat_5_5)); + + + mat->addValues( _data_rowIdx, _data_colIdx, _data_mat ); + } + } + } +} +} // namespace operatorgeneration + +} // namespace hyteg